@article{,
title = {Language-independent classifier-based modelling of source-side context information in Statistical Machine Translation (Data set)},
journal = {},
author = {Maarten van Gompel and Antal van den Bosch},
year = {},
url = {},
license = {},
abstract = { We present a series of experiments focusing on the modelling of
source-side context to improve Phrase-based Statistical Machine
Translation. Statistical Machine Translation systems typically
consist of a translation model and a language model. The former maps
phrases in the source language to the target language, without
regard for the context in which the source phrases occur. The latter
models just the target language, and acts as a target-side model of
context information after translation. We attempt to independently
reproduce a line of existing research and test whether considering
context information directly in the translation model has a positive
effect on translation quality. We furthermore investigate various
ways discriminative classifier-based models can be integrated into
Statical Machine Translation. We will use proven techniques from
Word Sense Disambiguation, effectively integrating these techniques in
Statistical Machine Translation. Our approach is
language-independent and knowledge-poor: we do not employ any
explicit linguistic features computed by part-of-speech taggers,
word sense disambiguation systems, supertaggers, or parsers, as used
by previous work. We find only limited improvement of translation quality for
certain formulaic corpora and conclude that explicit modelling of source-side
context information does not add much to the data already implicitly
available in the decode process.}
}