OpenWebText (Gokaslan's distribution, 2019), GPT-2 Tokenized
eukaryote31 and Joshua Peterson and Aaron Gokaslan and Vanya Cohen

folder data-owt (395 files)
Type: Dataset

title= {OpenWebText (Gokaslan's distribution, 2019), GPT-2 Tokenized},
journal= {},
author= {eukaryote31 and Joshua Peterson and Aaron Gokaslan and Vanya Cohen},
year= {},
url= {},
abstract= {Code by eukaryote31 and Joshua Peterson: and

Scraped by Aaron Gokaslan and Vanya Cohen:

Tokenized by eukaryote31},
keywords= {},
terms= {},
license= {},
superseded= {}

