In [1]:
from nltk.corpus.reader import TaggedCorpusReader
from nltk.tag import BigramTagger
from nltk.tag import TrigramTagger
from nltk.tag import UnigramTagger
from nltk.tokenize import wordpunct_tokenize
In [2]:
#trainer
reader = TaggedCorpusReader('.', 'greek_training_set.pos')
train_sents = reader.tagged_sents()
In [3]:
tagger1 = UnigramTagger(train_sents)
In [4]:
tagger2 = BigramTagger(train_sents, backoff=tagger1)
In [5]:
tagger3 = TrigramTagger(train_sents, backoff=tagger2)
In [6]:
tagger3.evaluate(train_sents)
Out[6]:
0.972572292486997