Source code for trojai.modelgen.data_descriptions


"""
File describes data description classes, which contain specific information that may be used
in order to instantiate an architecture
"""


[docs]class DataDescription: """ Generic Data Description class from which all specific data type data descriptors """ pass
[docs]class CSVTextDatasetDesc(DataDescription): """ Information potentially relevant to instantiating models to process text data """ def __init__(self, vocab_size, unk_idx, pad_idx): """ :param vocab_size: the vocabulary size used to process this text dataset. If set to -1, then a vocabulary isn't built by torchtext, but rather some sort of pre-processing defined by the user. :param unk_idx: the index in the embedding which corresponds to an unknown word in the vocabulary :param pad_idx: the index in the embedding which corresponds to the padding token """ self.vocab_size = vocab_size self.unk_idx = unk_idx self.pad_idx = pad_idx
[docs]class CSVImageDatasetDesc(DataDescription): """ Information potentially relevant to instantiating models to process image data """ def __init__(self, num_samples, shuffled, num_classes): self.num_samples = num_samples self.shuffled = shuffled self.num_classes = num_classes