cerebras.modelzoo.data_preparation.nlp.slimpajama.dedup.dedup_train.deduplicate_train_holdout_sets#

cerebras.modelzoo.data_preparation.nlp.slimpajama.dedup.dedup_train.deduplicate_train_holdout_sets(train_path, holdout_path, deduped_train_path, chunk_id)[source]#