WebText dataset urls.txt.tar.gz

Type: Dataset
Tags: WebText, Reddit

title= {WebText dataset urls.txt.tar.gz},
journal= {},
author= {},
year= {},
url= {https://github.com/eukaryote31/openwebtext},
abstract= {Collection of URLs hosting content used in the WebText dataset described by OpenAI here: https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf

URLs obtained with the scripts by eukaryote31},
keywords= {WebText, Reddit},
terms= {},
license= {},
superseded= {}

