In [26]:
from nltk.corpus.reader import TaggedCorpusReader
from nltk.tag import BigramTagger
from nltk.tag import TrigramTagger
from nltk.tag import UnigramTagger
from nltk.tokenize import wordpunct_tokenize
In [28]:
#trainer
reader = TaggedCorpusReader('.', 'latin_training_set.pos')
train_sents = reader.tagged_sents()
In [31]:
tagger1 = UnigramTagger(train_sents)
In [32]:
tagger2 = BigramTagger(train_sents, backoff=tagger1)
In [33]:
tagger3 = TrigramTagger(train_sents, backoff=tagger2)
In [35]:
tagger3.evaluate(train_sents)
Out[35]:
0.9796586568315676