Wikipedia Training Data for Megatron-LM

wikipedia_bin (2 files)
wiki_text_sentence.bin 6.29GB
wiki_text_sentence.idx 1.55GB
Type: Dataset
Tags: BERT; NLP;

title= {Wikipedia Training Data for Megatron-LM},
journal= {},
author= {},
year= {},
url= {},
abstract= {A preprocessed dataset for training. Please see instructions in for how to use it.

Note: the author does not own any copyrights of the data. },
keywords= {BERT; NLP;},
terms= {},
license= {},
superseded= {}