cerebras.modelzoo.data.nlp.t5.config.T5DynamicDataProcessorConfig#

class cerebras.modelzoo.data.nlp.t5.config.T5DynamicDataProcessorConfig(batch_size: int = <object object at 0x7fc286331b70>, shuffle: bool = True, shuffle_seed: int = 0, num_workers: int = 0, prefetch_factor: int = 10, persistent_workers: bool = True, src_data_dir: str = <object object at 0x7fc286331b70>, src_vocab_file: str = <object object at 0x7fc286331b70>, src_max_sequence_length: int = <object object at 0x7fc286331b70>, tgt_max_sequence_length: int = <object object at 0x7fc286331b70>, shuffle_buffer: Optional[int] = None, do_lower: bool = False, buckets: Optional[List[int]] = None, dynamic_loss_weight: Optional[bool] = None, pack_sequences: Optional[bool] = False, num_documents_to_concatenate: int = 128, drop_last: bool = True, oov_token: str = '<unk>', sos_token: str = '<s>', eos_token: str = '</s>', pad_token: str = '<pad>', extra_ids: Union[int, List[int]] = 0, labels_pad_id: int = 0, input_pad_id: int = 0)[source]#
src_data_dir: str = <object object>#
src_vocab_file: str = <object object>#
src_max_sequence_length: int = <object object>#
tgt_max_sequence_length: int = <object object>#
shuffle_buffer: Optional[int] = None#
do_lower: bool = False#
buckets: Optional[List[int]] = None#
dynamic_loss_weight: Optional[bool] = None#
pack_sequences: Optional[bool] = False#
num_documents_to_concatenate: int = 128#
num_workers: int = 0#

The number of PyTorch processes used in the dataloader

drop_last: bool = True#
prefetch_factor: int = 10#

The number of batches to prefetch in the dataloader

persistent_workers: bool = True#

Whether or not to keep workers persistent between epochs

oov_token: str = '<unk>'#
sos_token: str = '<s>'#
eos_token: str = '</s>'#
pad_token: str = '<pad>'#
extra_ids: Union[int, List[int]] = 0#
labels_pad_id: int = 0#
input_pad_id: int = 0#
batch_size: int = <object object>#

Batch size to be used

shuffle: bool = True#

Whether or not to shuffle the dataset

shuffle_seed: int = 0#

Seed used for deterministic shuffling