readability.py

#!/usr/bin/python # Calculate the readability index of a paragraph. # https://en.wikipedia.org/wiki/Flesch-Kincaid # From the CG questions - http://codegolf.stackexchange.com/questions/10533/build-a-readability-index # Will use a simple but not accurate method to count syllables, found here - # http://allenporter.tumblr.com/post/9776954743/syllables # Should give a decent approximation without needing a lot of code. # FRE = 206.835 - 1.015 * (words per sentence) - 84.6 * (syllables per word) import re def tokenize(paragraph): empty_s= lambda e: e!='' words= filter(empty_s, re.split('[ ,\n\t]', paragraph)) sentences= filter(empty_s, re.split('[.?!]', paragraph)) num_w= float(len(words)) words_sentence= num_w/len(sentences) return words, sentences, words_sentence, num_w def syllables(word): count= len(re.findall(r'([aeiouyAEIOUY]+)', word)) if len(word)>2 and word[-1]=='e' and word[-2] not in 'aeoiuAEIOU' and word[-3] in 'aeiouAEIOU': count-= 1 return count def calc_fre(paragraph): words, sentences, words_sentence, word_c= tokenize(paragraph) syllable_c= 0 for word in words: syllable_c+= syllables(word) syllables_word= syllable_c/word_c fre= 206.835 - 1.015 * words_sentence - 84.6 * syllables_word return fre def test(): test_cases=[['I would not, could not, in the rain.\ Not in the dark, not on a train.\ Not in a car, not in a tree.\ I do not like them, Sam, you see.\ Not in a house, not in a box.\ Not with a mouse, not with a fox.\ I will not eat them here or there.\ I do not like them anywhere!', 111.38, 103.38, 119.38],\ ['It was a bright cold day in April, and the clocks were striking thirteen.\ Winston Smith, his chin nuzzled into his breast in an effort to escape\ the vile wind, slipped quickly through the glass doors of Victory Mansions,\ though not quickly enough to prevent a swirl of gritty dust from entering\ along with him.', 65.09, 57.09, 73.09],\ ["When in the Course of human events, it becomes necessary for one people to\ dissolve the political bands which have connected them with another, and to\ assume among the powers of the earth, the separate and equal station to\ which the Laws of Nature and of Nature's God entitle them, a decent respect\ to the opinions of mankind requires that they should declare the causes\ which impel them to the separation.", 3.70, -4.70, 11.70]] for case in test_cases: fre= calc_fre(case[0]) print fre, case[1], (fre>=case[2] and fre<=case[3]) if __name__=='__main__': test()
Calculate the Flesch-Kincaid readability index of a text.
https://en.wikipedia.org/wiki/Flesch-Kincaid

Be the first to comment

You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.