data-owt (395 files)
owt262.npz |
40.76MB |
owt1.npz |
40.44MB |
owt2.npz |
40.58MB |
owt3.npz |
40.61MB |
owt4.npz |
40.61MB |
owt5.npz |
40.61MB |
owt6.npz |
40.62MB |
owt7.npz |
40.40MB |
owt8.npz |
40.56MB |
owt9.npz |
40.58MB |
owt10.npz |
40.57MB |
owt11.npz |
40.62MB |
owt12.npz |
40.58MB |
owt13.npz |
40.49MB |
owt14.npz |
40.56MB |
owt15.npz |
40.56MB |
owt16.npz |
40.53MB |
owt17.npz |
40.58MB |
owt18.npz |
40.57MB |
owt19.npz |
40.56MB |
owt20.npz |
40.55MB |
owt21.npz |
40.50MB |
owt22.npz |
40.64MB |
owt23.npz |
40.53MB |
owt24.npz |
40.59MB |
owt25.npz |
40.55MB |
owt26.npz |
40.66MB |
owt27.npz |
40.54MB |
owt28.npz |
40.54MB |
owt29.npz |
40.51MB |
owt30.npz |
40.57MB |
owt31.npz |
40.60MB |
owt32.npz |
40.54MB |
owt33.npz |
40.42MB |
owt34.npz |
40.70MB |
owt35.npz |
40.65MB |
owt36.npz |
40.67MB |
owt37.npz |
40.41MB |
owt38.npz |
40.55MB |
owt39.npz |
40.56MB |
owt40.npz |
40.56MB |
owt41.npz |
40.58MB |
owt42.npz |
40.60MB |
owt43.npz |
40.51MB |
owt44.npz |
40.51MB |
owt45.npz |
40.28MB |
owt46.npz |
40.60MB |
owt47.npz |
40.52MB |
owt48.npz |
40.50MB |
|
|
|
Type: Dataset
Bibtex:
Tags:
Bibtex:
@article{,
title= {OpenWebText (Gokaslan's distribution, 2019), GPT-2 Tokenized},
journal= {},
author= {eukaryote31 and Joshua Peterson and Aaron Gokaslan and Vanya Cohen},
year= {},
url= {},
abstract= {Code by eukaryote31 and Joshua Peterson: https://github.com/jcpeterson/openwebtext and https://github.com/eukaryote31/openwebtext
Scraped by Aaron Gokaslan and Vanya Cohen: https://skylion007.github.io/OpenWebTextCorpus/
Tokenized by eukaryote31},
keywords= {},
terms= {},
license= {},
superseded= {}
}
owt262.npz