cerebras.modelzoo.data_preparation.nlp.slimpajama.preprocessing.filter.get_short_documents#

cerebras.modelzoo.data_preparation.nlp.slimpajama.preprocessing.filter.get_short_documents(input_dir, threshold, n_proc, proc_idx, docs_queue, dataset_name)[source]#