This notebook cross-validates the CLTK's part-of-speech taggers. The final results are found at the very bottom.

In :
from nltk.corpus.reader import TaggedCorpusReader
from nltk.tag import AffixTagger
from nltk.tag import BigramTagger
from nltk.tag import tnt
from nltk.tag import TrigramTagger
from nltk.tag import UnigramTagger
from nltk.tokenize import wordpunct_tokenize
import math
import os
import pandas as pd
import random
from statistics import mean
from statistics import stdev

In :
full_training_set_rel = '~/greek_treebank_perseus/pos_training_set.pos'
full_training_set = os.path.expanduser(full_training_set_rel)

In []:
# This section's code is good, but it times out in IPython.
# Consider using it in separate scripts.
unigram_accuracies = []
bigram_accuracies = []
trigram_accuracies = []
backoff_accuracies = []
two_prefix_accuracies = []
three_prefix_accuracies = []
four_prefix_accuracies = []
two_suffix_accuracies = []
three_suffix_accuracies = []
four_suffix_accuracies = []
five_suffix_accuracies = []
six_suffix_accuracies = []
tnt_accuracies = []

with open(full_training_set) as f:
pos_set = training_set_string.split('\n\n')  # mk into a list

sentence_count = len(pos_set)
tenth = math.ceil(int(sentence_count) / int(10))

random.shuffle(pos_set)

def chunks(l, n):
"""Yield successive n-sized chunks from l.
http://stackoverflow.com/a/312464
"""
for i in range(0, len(l), n):
yield l[i:i+n]

# a list of 10 lists
ten_parts = list(chunks(pos_set, tenth))  # a list of 10 lists with 695 sentences each

#for counter in list(range(10)):
for counter, part in list(enumerate(ten_parts)):
# map test list to part of given loop
test_set = ten_parts[counter]  # or: test_set = part

# filter out this loop's test index
training_set_lists = [x for x in ten_parts if x is not ten_parts[counter]]

# next concatenate the list together into 1 file ( http://stackoverflow.com/a/952952 )
training_set = [item for sublist in training_set_lists for item in sublist]

# save shuffled tests to file
# there might be a way of getting
local_dir_rel = '~/cltk_data/user_data'
local_dir = os.path.expanduser(local_dir_rel)
if not os.path.isdir(local_dir):
os.makedirs(local_dir)

test_path = os.path.join(local_dir, 'test_greek.pos')
with open(test_path, 'w') as f:
f.write('\n\n'.join(test_set))

train_path = os.path.join(local_dir, 'train_greek.pos')
with open(train_path, 'w') as f:
f.write('\n\n'.join(training_set))

print('Loop #' + str(counter))
# make unigram tagger
unigram_tagger = UnigramTagger(train_sents)
# evaluate unigram tagger
unigram_accuracy = None
unigram_accuracy = unigram_tagger.evaluate(test_sents)
unigram_accuracies.append(unigram_accuracy)
print('Unigram:', unigram_accuracy)

# make bigram tagger
bigram_tagger = BigramTagger(train_sents)
# evaluate bigram tagger
bigram_accuracy = None
bigram_accuracy = bigram_tagger.evaluate(test_sents)
bigram_accuracies.append(bigram_accuracy)
print('Bigram:', bigram_accuracy)

# make trigram tagger
trigram_tagger = TrigramTagger(train_sents)
# evaluate trigram tagger
trigram_accuracy = None
trigram_accuracy = trigram_tagger.evaluate(test_sents)
trigram_accuracies.append(trigram_accuracy)
print('Trigram:', trigram_accuracy)

# make 1, 2, 3-gram backoff tagger
tagger1 = UnigramTagger(train_sents)
tagger2 = BigramTagger(train_sents, backoff=tagger1)
tagger3 = TrigramTagger(train_sents, backoff=tagger2)
# evaluate trigram tagger
backoff_accuracy = None
backoff_accuracy = tagger3.evaluate(test_sents)
backoff_accuracies.append(backoff_accuracy)
print('1, 2, 3-gram backoff:', backoff_accuracy)

# make 2-char prefix tagger
two_prefix_tagger = AffixTagger(train_sents, affix_length=2)
# evaluate 2-char prefix tagger
two_prefix_accuracy = None
two_prefix_accuracy = two_prefix_tagger.evaluate(test_sents)
two_prefix_accuracies.append(two_prefix_accuracy)
print('2-char prefix:', two_prefix_accuracy)

# make 3-char prefix tagger
three_prefix_tagger = AffixTagger(train_sents, affix_length=3)
# evaluate 3-char prefix tagger
three_prefix_accuracy = None
three_prefix_accuracy = three_prefix_tagger.evaluate(test_sents)
three_prefix_accuracies.append(three_prefix_accuracy)
print('3-char prefix:', three_prefix_accuracy)

# make 4-char prefix tagger
four_prefix_tagger = AffixTagger(train_sents, affix_length=4)
# evaluate 4-char prefix tagger
four_prefix_accuracy = None
four_prefix_accuracy = four_prefix_tagger.evaluate(test_sents)
four_prefix_accuracies.append(four_prefix_accuracy)
print('4-char prefix:', four_prefix_accuracy)

# make 2-char suffix tagger
two_suffix_tagger = AffixTagger(train_sents, affix_length=2)
# evaluate 2-char suffix tagger
two_suffix_accuracy = None
two_suffix_accuracy = two_suffix_tagger.evaluate(test_sents)
two_suffix_accuracies.append(two_suffix_accuracy)
print('2-char suffix:', two_suffix_accuracy)

# make 3-char suffix tagger
three_suffix_tagger = AffixTagger(train_sents, affix_length=3)
# evaluate 3-char suffix tagger
three_suffix_accuracy = None
three_suffix_accuracy = three_suffix_tagger.evaluate(test_sents)
three_suffix_accuracies.append(three_suffix_accuracy)
print('3-char suffix:', three_suffix_accuracy)

# make 4-char suffix tagger
four_suffix_tagger = AffixTagger(train_sents, affix_length=4)
# evaluate 4-char suffix tagger
four_suffix_accuracy = None
four_suffix_accuracy = four_suffix_tagger.evaluate(test_sents)
four_suffix_accuracies.append(four_suffix_accuracy)
print('4-char suffix:', four_suffix_accuracy)

# make 5-char suffix tagger
five_suffix_tagger = AffixTagger(train_sents, affix_length=5)
# evaluate 5-char suffix tagger
five_suffix_accuracy = None
five_suffix_accuracy = five_suffix_tagger.evaluate(test_sents)
five_suffix_accuracies.append(five_suffix_accuracy)
print('5-char suffix:', five_suffix_accuracy)

# make 6-char suffix tagger
six_suffix_tagger = AffixTagger(train_sents, affix_length=6)
# evaluate 6-char suffix tagger
six_suffix_accuracy = None
six_suffix_accuracy = six_suffix_tagger.evaluate(test_sents)
six_suffix_accuracies.append(six_suffix_accuracy)
print('6-char suffix:', six_suffix_accuracy)

# make tnt tagger
tnt_tagger = tnt.TnT(N=100)  # N=1000 is default but but won't finish with Greek
tnt_tagger.train(train_sents)
# evaulate tnt tagger
tnt_accuracy = None
tnt_accuracy = tnt_tagger.evaluate(test_sents)
tnt_accuracies.append(tnt_accuracy)
print('TnT:', tnt_accuracy)

In []:
# This code was not run in IPython, but necessary for assembling statistics.
final_accuracies_list = []
mean_accuracy_unigram = mean(unigram_accuracies)
standard_deviation_unigram = stdev(unigram_accuracies)
uni = {'unigram': {'mean': mean_accuracy_unigram, 'sd': standard_deviation_unigram}}
final_accuracies_list.append(uni)

mean_accuracy_bigram = mean(bigram_accuracies)
standard_deviation_bigram = stdev(bigram_accuracies)
bi = {'bigram': {'mean': mean_accuracy_bigram, 'sd': standard_deviation_bigram}}
final_accuracies_list.append(bi)

mean_accuracy_trigram = mean(trigram_accuracies)
standard_deviation_trigram = stdev(trigram_accuracies)
tri = {'trigram': {'mean': mean_accuracy_trigram, 'sd': standard_deviation_trigram}}
final_accuracies_list.append(tri)

mean_accuracy_backoff = mean(backoff_accuracies)
standard_deviation_backoff = stdev(backoff_accuracies)
back = {'1, 2, 3-gram backoff': {'mean': mean_accuracy_backoff, 'sd': standard_deviation_backoff}}
final_accuracies_list.append(back)

mean_accuracy_two_prefix = mean(two_prefix_accuracies)
standard_deviation_two_prefix = stdev(two_prefix_accuracies)
two_pre = {'2 prefix': {'mean': mean_accuracy_two_prefix, 'sd': standard_deviation_two_prefix}}
final_accuracies_list.append(two_pre)

mean_accuracy_three_prefix = mean(three_prefix_accuracies)
standard_deviation_three_prefix = stdev(three_prefix_accuracies)
three_pre = {'3 prefix': {'mean': mean_accuracy_three_prefix, 'sd': standard_deviation_three_prefix}}
final_accuracies_list.append(three_pre)

mean_accuracy_four_prefix = mean(four_prefix_accuracies)
standard_deviation_four_prefix = stdev(four_prefix_accuracies)
four_pre = {'4 prefix': {'mean': mean_accuracy_four_prefix, 'sd': standard_deviation_four_prefix}}
final_accuracies_list.append(four_pre)

mean_accuracy_two_suffix = mean(two_suffix_accuracies)
standard_deviation_two_suffix = stdev(two_suffix_accuracies)
two_suff= {'2 suffix': {'mean': mean_accuracy_two_suffix, 'sd': standard_deviation_two_suffix}}
final_accuracies_list.append(two_suff)

mean_accuracy_three_suffix = mean(three_suffix_accuracies)
standard_deviation_three_suffix = stdev(three_suffix_accuracies)
three_suff = {'3 suffix': {'mean': mean_accuracy_three_suffix, 'sd': standard_deviation_three_suffix}}
final_accuracies_list.append(three_suff)

mean_accuracy_four_suffix = mean(four_suffix_accuracies)
standard_deviation_four_suffix = stdev(four_suffix_accuracies)
four_suff = {'4 suffix': {'mean': mean_accuracy_four_suffix, 'sd': standard_deviation_four_suffix}}
final_accuracies_list.append(four_suff)

mean_accuracy_five_suffix = mean(five_suffix_accuracies)
standard_deviation_five_suffix = stdev(five_suffix_accuracies)
five_suff = {'5 suffix': {'mean': mean_accuracy_five_suffix, 'sd': standard_deviation_five_suffix}}
final_accuracies_list.append(five_suff)

mean_accuracy_six_suffix = mean(six_suffix_accuracies)
standard_deviation_six_suffix = stdev(six_suffix_accuracies)
six_suff = {'6 suffix': {'mean': mean_accuracy_six_suffix, 'sd': standard_deviation_six_suffix}}
final_accuracies_list.append(six_suff)

# tnt values for 8/10
# [0.9553020305648972, 0.9542076240709662, 0.9564495709663806, 0.9546379227530868, 0.9518869884357039, 0.95546875, 0.9559369923006287, 0.9545558706424909]
#In : mean
#Out: 0.9548057187167692

mean_accuracy_tnt = mean(tnt_accuracies)
standard_deviation_tnt = stdev(tnt_accuracies)
tnt = {'tnt': {'mean': mean_accuracy_tnt, 'sd': standard_deviation_tnt}}
final_accuracies_list.append(tnt)

# added to capture output when running as script
'''
with open('final_accuracies.py', 'w') as f:
f.write(final_dict)
'''

In :
# Note: these values were generated from the above code,
# but through the above code run as a script, as IPython was timing out.
final_dict = {'trigram': {'mean': 0.7259902681754967, 'sd': 0.008456454235275485}, 'unigram': {'mean': 0.9051637750829489, 'sd': 0.0018505984564185247}, '1, 2, 3-gram backoff': {'mean': 0.952937576511621, 'sd': 0.0023986408561536845}, 'bigram': {'mean': 0.7366758737449184, 'sd': 0.007628422048101589}, 'tnt': {'mean': 0.9549570274227535, 'sd': 0.001320804203349713}}

In :
'''
final_dict = {}
for x in final_accuracies_list:
final_dict.update(x)
'''

df = pd.DataFrame(final_dict)
df

Out:
1, 2, 3-gram backoff bigram tnt trigram unigram
mean 0.952938 0.736676 0.954957 0.725990 0.905164
sd 0.002399 0.007628 0.001321 0.008456 0.001851
In []: