ClueWeb09_Anchors (132 files)
part-00131.gz |
185.91MB |
part-00130.gz |
184.87MB |
part-00129.gz |
185.22MB |
part-00128.gz |
185.23MB |
part-00127.gz |
185.27MB |
part-00126.gz |
184.97MB |
part-00125.gz |
185.40MB |
part-00124.gz |
184.85MB |
part-00123.gz |
187.35MB |
part-00122.gz |
184.74MB |
part-00121.gz |
185.53MB |
part-00120.gz |
184.24MB |
part-00119.gz |
185.58MB |
part-00118.gz |
185.17MB |
part-00117.gz |
185.33MB |
part-00116.gz |
184.43MB |
part-00115.gz |
185.54MB |
part-00114.gz |
184.62MB |
part-00113.gz |
184.99MB |
part-00112.gz |
184.95MB |
part-00111.gz |
184.78MB |
part-00110.gz |
185.16MB |
part-00109.gz |
184.87MB |
part-00108.gz |
185.00MB |
part-00107.gz |
184.96MB |
part-00106.gz |
185.94MB |
part-00105.gz |
185.14MB |
part-00104.gz |
185.67MB |
part-00103.gz |
185.38MB |
part-00102.gz |
185.18MB |
part-00101.gz |
185.84MB |
part-00100.gz |
184.54MB |
part-00099.gz |
185.70MB |
part-00098.gz |
185.13MB |
part-00097.gz |
185.61MB |
part-00096.gz |
185.00MB |
part-00095.gz |
184.77MB |
part-00094.gz |
187.29MB |
part-00093.gz |
185.42MB |
part-00092.gz |
185.67MB |
part-00091.gz |
185.35MB |
part-00090.gz |
184.63MB |
part-00089.gz |
184.56MB |
part-00088.gz |
184.78MB |
part-00087.gz |
184.69MB |
part-00086.gz |
186.81MB |
part-00085.gz |
184.20MB |
part-00084.gz |
185.11MB |
part-00083.gz |
185.43MB |
|
|
|
Type: Dataset
Bibtex:
Tags:
Bibtex:
@article{,
title= {ClueWeb09_Anchors (anchor text derived from CMU's ClueWeb09 web crawl)},
journal= {Technical Report TR-CTIT-10-15, Centre for Telematics and Information Technology University of Twente, Enschede. ISSN 1381-3625},
author= {Djoerd Hiemstra},
year= {2010},
url= {http://mirex.sf.net},
license= {http://creativecommons.org/licenses/by/4.0/},
abstract= {Anchor texts extracted from ClueWeb09
https://djoerdhiemstra.com/2010/anchor-text-for-clueweb09-category-a/},
keywords= {web, ClueWeb, HTML, CMU, Twente, anchors, TREC},
terms= {},
superseded= {}
}
part-00131.gz