#!/usr/bin/python
# Calculate the readability index of a paragraph.
# https://en.wikipedia.org/wiki/Flesch-Kincaid
# From the CG questions - http://codegolf.stackexchange.com/questions/10533/build-a-readability-index
# Will use a simple but not accurate method to count syllables, found here -
# http://allenporter.tumblr.com/post/9776954743/syllables
# Should give a decent approximation without needing a lot of code.
# FRE = 206.835 - 1.015 * (words per sentence) - 84.6 * (syllables per word)
import re
def tokenize(paragraph):
empty_s= lambda e: e!=''
words= filter(empty_s, re.split('[ ,\n\t]', paragraph))
sentences= filter(empty_s, re.split('[.?!]', paragraph))
num_w= float(len(words))
words_sentence= num_w/len(sentences)
return words, sentences, words_sentence, num_w
def syllables(word):
count= len(re.findall(r'([aeiouyAEIOUY]+)', word))
if len(word)>2 and word[-1]=='e' and word[-2] not in 'aeoiuAEIOU' and word[-3] in 'aeiouAEIOU':
count-= 1
return count
def calc_fre(paragraph):
words, sentences, words_sentence, word_c= tokenize(paragraph)
syllable_c= 0
for word in words:
syllable_c+= syllables(word)
syllables_word= syllable_c/word_c
fre= 206.835 - 1.015 * words_sentence - 84.6 * syllables_word
return fre
def test():
test_cases=[['I would not, could not, in the rain.\
Not in the dark, not on a train.\
Not in a car, not in a tree.\
I do not like them, Sam, you see.\
Not in a house, not in a box.\
Not with a mouse, not with a fox.\
I will not eat them here or there.\
I do not like them anywhere!', 111.38, 103.38, 119.38],\
['It was a bright cold day in April, and the clocks were striking thirteen.\
Winston Smith, his chin nuzzled into his breast in an effort to escape\
the vile wind, slipped quickly through the glass doors of Victory Mansions,\
though not quickly enough to prevent a swirl of gritty dust from entering\
along with him.', 65.09, 57.09, 73.09],\
["When in the Course of human events, it becomes necessary for one people to\
dissolve the political bands which have connected them with another, and to\
assume among the powers of the earth, the separate and equal station to\
which the Laws of Nature and of Nature's God entitle them, a decent respect\
to the opinions of mankind requires that they should declare the causes\
which impel them to the separation.", 3.70, -4.70, 11.70]]
for case in test_cases:
fre= calc_fre(case[0])
print fre, case[1], (fre>=case[2] and fre<=case[3])
if __name__=='__main__':
test()
Calculate the Flesch-Kincaid readability index of a text.
https://en.wikipedia.org/wiki/Flesch-Kincaid
https://en.wikipedia.org/wiki/Flesch-Kincaid
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.