diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1ff9e80..7c69947 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,14 @@ Need help? Reach out on [gitter](https://gitter.im/cdimascio-oss/community) +### Prequisites + +1. Install deps +``` +pip install . +python -m nltk.downloader punkt +``` + ### Code 1. Fork the repo diff --git a/README.md b/README.md index adaacca..64e8939 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ r.dale_chall() r.ari() r.linsear_write() r.smog() +r.spache() ``` **\*Note:** `text` must contain >= 100 words\* @@ -47,6 +48,7 @@ r.smog() - [Coleman Liau Index](#coleman-liau-index) - [Gunning Fog](#gunning-fog) - [SMOG](#smog) +- [Spache](#spache) - [Linsear Write](#linsear-write) ## Readability Metric Details and Properties @@ -187,6 +189,22 @@ print(s.score) print(s.grade_level) ``` +The Spache Readability Formula is used for Primary-Grade Reading Materials, published in 1953 in The Elementary School Journal. The Spache Formula is best used to calculate the difficulty of text that falls at the 3rd grade level or below. + +**_call:_** + +```python +r.spache() +``` + +**_example:_** + +```python +s = r.spache() +print(s.score) +print(s.grade_level) +``` + ### Linsear Write Linsear Write is a readability metric for English text, purportedly developed for the United States Air Force to help them calculate the readability of their technical manuals. diff --git a/readability/data/spache_easy.txt b/readability/data/spache_easy.txt new file mode 100644 index 0000000..5d264d6 --- /dev/null +++ b/readability/data/spache_easy.txt @@ -0,0 +1,1064 @@ +a +able +about +above +across +act +add +afraid +after +afternoon +again +against +ago +air +airplane +alarm +all +almost +alone +along +already +also +always +am +among +an +and +angry +animal +another +answer +any +anyone +appear +apple +are +arm +around +arrow +as +ask +asleep +at +ate +attention +aunt +awake +away +b +baby +back +bad +bag +ball +balloon +bang +bank +bark +barn +basket +be +bean +bear +beat +beautiful +became +because +become +bed +bee +been +before +began +begin +behind +believe +bell +belong +bend +bent +beside +best +better +between +big +bird +birthday +bit +bite +black +blanket +blew +block +blow +blue +board +boat +book +boot +born +borrow +both +bother +bottle +bottom +bought +bow +box +boy +branch +brave +bread +break +breakfast +breath +brick +bridge +bright +bring +broke +broken +brother +brought +brown +brush +build +bump +burn +bus +busy +but +butter +button +buy +by +c +cabin +cage +cake +call +came +camp +can +candle +candy +can\t +cap +captain +car +card +care +careful +carrot +carry +case +castle +cat +catch +cattle +caught +cause +cent +certain +chair +chance +change +chase +chicken +chief +child +children +church +circle +circus +city +clap +clean +clever +cliff +climb +clock +close +cloth +clothes +clown +coat +cold +color +come +comfortable +company +contest +continue +cook +cool +corner +could +count +country +course +cover +cow +crawl +cream +cry +cup +curtain +cut +d +Dad +dance +danger +dangerous +dark +dash +daughter +day +dear +decide +deep +desk +did +didn\t +die +different +dig +dinner +direction +disappear +disappoint +discover +distance +do +doctor +does +dog +dollar +done +don\t +door +down +dragon +dream +dress +drink +drive +drop +drove +dry +duck +during +dust +e +each +eager +ear +early +earn +earth +easy +eat +edge +egg +eight +eighteen +either +elephant +else +empty +end +enemy +enough +enter +even +ever +every +everything +exact +except +excite +exclaim +explain +eye +face +fact +fair +fall +family +far +farm +farmer +farther +fast +fat +father +feather +feed +feel +feet +fell +fellow +felt +fence +few +field +fierce +fight +figure +fill +final +find +fine +finger +finish +fire +first +fish +five +flag +flash +flat +flew +floor +flower +fly +follow +food +for +forest +forget +forth +found +four +fourth +fox +fresh +friend +frighten +frog +from +front +fruit +full +fun +funny +fur +g +game +garden +gasp +gate +gave +get +giant +gift +girl +give +glad +glass +go +goat +gone +good +got +grandfather +grandmother +grass +gray +great +green +grew +grin +ground +group +grow +growl +guess +gun +h +had +hair +half +hall +hand +handle +hang +happen +happiness +happy +hard +harm +has +hat +hate +have +he +head +hear +heard +heavy +held +hello +help +hen +her +here +herself +he\s +hid +hide +high +hill +him +himself +his +hit +hold +hole +holiday +home +honey +hop +horn +horse +hot +hour +house +how +howl +hum +hundred +hung +hungry +hunt +hurry +hurt +husband +i +I +ice +idea +if +I\ll +I\m +imagine +important +in +inch +indeed +inside +instead +into +invite +is +it +it\s +its +j +jacket +jar +jet +job +join +joke +joy +jump +just +k +keep +kept +key +kick +kill +kind +king +kitchen +kitten +knee +knew +knock +know +l +ladder +lady +laid +lake +land +large +last +late +laugh +lay +lazy +lead +leap +learn +least +leave +left +leg +less +let +let\s +letter +lick +lift +light +like +line +lion +list +listen +little +live +load +long +look +lost +lot +loud +love +low +luck +lump +lunch +m +machine +made +magic +mail +make +man +many +march +mark +market +master +matter +may +maybe +me +mean +meant +meat +meet +melt +men +merry +met +middle +might +mile +milk +milkman +mind +mine +minute +miss +mistake +moment +money +monkey +month +more +morning +most +mother +mountain +mouse +mouth +move +much +mud +music +must +my +n +name +near +neck +need +needle +neighbor +neighborhood +nest +never +new +next +nibble +nice +night +nine +no +nod +noise +none +north +nose +not +note +nothing +notice +now +number +o +ocean +of +off +offer +often +oh +old +on +once +one +only +open +or +orange +order +other +our +out +outside +over +owl +own +p +pack +paid +pail +paint +pair +palace +pan +paper +parade +parent +park +part +party +pass +past +pasture +path +paw +pay +peanut +peek +pen +penny +people +perfect +perhaps +person +pet +pick +picnic +picture +pie +piece +pig +pile +pin +place +plan +plant +play +pleasant +please +plenty +plow +picket +point +poke +pole +policeman +pond +poor +pop +postman +pot +potato +pound +pour +practice +prepare +present +pretend +pretty +princess +prize +probably +problem +promise +protect +proud +puff +pull +puppy +push +put +q +queen +queer +quick +quiet +quite +r +rabbit +raccoon +race +radio +rag +rain +raise +ran +ranch +rang +reach +read +ready +real +red +refuse +remember +reply +rest +return +reward +rich +ride +right +ring +river +road +roar +rock +rode +roll +roof +room +rope +round +row +rub +rule +run +rush +s +sad +safe +said +sail +sale +salt +same +sand +sang +sat +save +saw +say +scare +school +scold +scratch +scream +sea +seat +second +secret +see +seed +seem +seen +sell +send +sent +seven +several +sew +shadow +shake +shall +shape +she +sheep +shell +shine +ship +shoe +shone +shook +shoot +shop +shore +short +shot +should +show +sick +side +sight +sign +signal +silent +silly +silver +since +sing +sister +sit +six +size +skip +sky +sled +sleep +slid +slide +slow +small +smart +smell +smile +smoke +snap +sniff +snow +so +soft +sold +some +something +sometimes +son +song +soon +sorry +sound +speak +special +spend +spill +splash +spoke +spot +spread +spring +squirrel +stand +star +start +station +stay +step +stick +still +stone +stood +stop +store +story +straight +strange +street +stretch +strike +strong +such +sudden +sugar +suit +summer +sun +supper +suppose +sure +surprise +swallow +sweet +swim +swing +t +table +tail +take +talk +tall +tap +taste +teach +teacher +team +tear +teeth +telephone +tell +ten +tent +than +thank +that +that\s +the +their +them +then +there +these +they +thick +thin +thing +think +third +this +those +though +thought +three +threw +through +throw +tie +tiger +tight +time +tiny +tip +tire +to +today +toe +together +told +tomorrow +too +took +tooth +top +touch +toward +tower +town +toy +track +traffic +train +trap +tree +trick +trip +trot +truck +true +trunk +try +turkey +turn +turtle +twelve +twin +two +u +ugly +uncle +under +unhappy +until +up +upon +upstairs +us +use +usual +v +valley +vegetable +very +village +visit +voice +w +wag +wagon +wait +wake +walk +want +war +warm +was +wash +waste +watch +water +wave +way +we +wear +weather +week +well +went +were +wet +what +wheel +when +where +which +while +whisper +whistle +white +who +whole +whose +why +wide +wife +will +win +wind +window +wing +wink +winter +wire +wise +wish +with +without +woke +wolf +woman +women +wonder +won\t +wood +word +wore +work +world +worm +worry +worth +would +wrong +x +y +yard +year +yell +yellow +yes +yet +you +young +your +z +zoo \ No newline at end of file diff --git a/readability/data/spache_easy_porterstem.txt b/readability/data/spache_easy_porterstem.txt new file mode 100644 index 0000000..9212ba1 --- /dev/null +++ b/readability/data/spache_easy_porterstem.txt @@ -0,0 +1,1064 @@ +a +abl +about +abov +across +act +add +afraid +after +afternoon +again +against +ago +air +airplan +alarm +all +almost +alon +along +alreadi +also +alway +am +among +an +and +angri +anim +anoth +answer +ani +anyon +appear +appl +are +arm +around +arrow +as +ask +asleep +at +ate +attent +aunt +awak +away +b +babi +back +bad +bag +ball +balloon +bang +bank +bark +barn +basket +be +bean +bear +beat +beauti +becam +becaus +becom +bed +bee +been +befor +began +begin +behind +believ +bell +belong +bend +bent +besid +best +better +between +big +bird +birthday +bit +bite +black +blanket +blew +block +blow +blue +board +boat +book +boot +born +borrow +both +bother +bottl +bottom +bought +bow +box +boy +branch +brave +bread +break +breakfast +breath +brick +bridg +bright +bring +broke +broken +brother +brought +brown +brush +build +bump +burn +bu +busi +but +butter +button +buy +by +c +cabin +cage +cake +call +came +camp +can +candl +candi +can\t +cap +captain +car +card +care +care +carrot +carri +case +castl +cat +catch +cattl +caught +caus +cent +certain +chair +chanc +chang +chase +chicken +chief +child +children +church +circl +circu +citi +clap +clean +clever +cliff +climb +clock +close +cloth +cloth +clown +coat +cold +color +come +comfort +compani +contest +continu +cook +cool +corner +could +count +countri +cours +cover +cow +crawl +cream +cri +cup +curtain +cut +d +dad +danc +danger +danger +dark +dash +daughter +day +dear +decid +deep +desk +did +didn\t +die +differ +dig +dinner +direct +disappear +disappoint +discov +distanc +do +doctor +doe +dog +dollar +done +don\t +door +down +dragon +dream +dress +drink +drive +drop +drove +dri +duck +dure +dust +e +each +eager +ear +earli +earn +earth +easi +eat +edg +egg +eight +eighteen +either +eleph +els +empti +end +enemi +enough +enter +even +ever +everi +everyth +exact +except +excit +exclaim +explain +eye +face +fact +fair +fall +famili +far +farm +farmer +farther +fast +fat +father +feather +feed +feel +feet +fell +fellow +felt +fenc +few +field +fierc +fight +figur +fill +final +find +fine +finger +finish +fire +first +fish +five +flag +flash +flat +flew +floor +flower +fli +follow +food +for +forest +forget +forth +found +four +fourth +fox +fresh +friend +frighten +frog +from +front +fruit +full +fun +funni +fur +g +game +garden +gasp +gate +gave +get +giant +gift +girl +give +glad +glass +go +goat +gone +good +got +grandfath +grandmoth +grass +gray +great +green +grew +grin +ground +group +grow +growl +guess +gun +h +had +hair +half +hall +hand +handl +hang +happen +happi +happi +hard +harm +ha +hat +hate +have +he +head +hear +heard +heavi +held +hello +help +hen +her +here +herself +he\ +hid +hide +high +hill +him +himself +hi +hit +hold +hole +holiday +home +honey +hop +horn +hors +hot +hour +hous +how +howl +hum +hundr +hung +hungri +hunt +hurri +hurt +husband +i +I +ice +idea +if +i\ll +i\m +imagin +import +in +inch +inde +insid +instead +into +invit +is +it +it\ +it +j +jacket +jar +jet +job +join +joke +joy +jump +just +k +keep +kept +key +kick +kill +kind +king +kitchen +kitten +knee +knew +knock +know +l +ladder +ladi +laid +lake +land +larg +last +late +laugh +lay +lazi +lead +leap +learn +least +leav +left +leg +less +let +let\ +letter +lick +lift +light +like +line +lion +list +listen +littl +live +load +long +look +lost +lot +loud +love +low +luck +lump +lunch +m +machin +made +magic +mail +make +man +mani +march +mark +market +master +matter +may +mayb +me +mean +meant +meat +meet +melt +men +merri +met +middl +might +mile +milk +milkman +mind +mine +minut +miss +mistak +moment +money +monkey +month +more +morn +most +mother +mountain +mous +mouth +move +much +mud +music +must +my +n +name +near +neck +need +needl +neighbor +neighborhood +nest +never +new +next +nibbl +nice +night +nine +no +nod +nois +none +north +nose +not +note +noth +notic +now +number +o +ocean +of +off +offer +often +oh +old +on +onc +one +onli +open +or +orang +order +other +our +out +outsid +over +owl +own +p +pack +paid +pail +paint +pair +palac +pan +paper +parad +parent +park +part +parti +pass +past +pastur +path +paw +pay +peanut +peek +pen +penni +peopl +perfect +perhap +person +pet +pick +picnic +pictur +pie +piec +pig +pile +pin +place +plan +plant +play +pleasant +pleas +plenti +plow +picket +point +poke +pole +policeman +pond +poor +pop +postman +pot +potato +pound +pour +practic +prepar +present +pretend +pretti +princess +prize +probabl +problem +promis +protect +proud +puff +pull +puppi +push +put +q +queen +queer +quick +quiet +quit +r +rabbit +raccoon +race +radio +rag +rain +rais +ran +ranch +rang +reach +read +readi +real +red +refus +rememb +repli +rest +return +reward +rich +ride +right +ring +river +road +roar +rock +rode +roll +roof +room +rope +round +row +rub +rule +run +rush +s +sad +safe +said +sail +sale +salt +same +sand +sang +sat +save +saw +say +scare +school +scold +scratch +scream +sea +seat +second +secret +see +seed +seem +seen +sell +send +sent +seven +sever +sew +shadow +shake +shall +shape +she +sheep +shell +shine +ship +shoe +shone +shook +shoot +shop +shore +short +shot +should +show +sick +side +sight +sign +signal +silent +silli +silver +sinc +sing +sister +sit +six +size +skip +sky +sled +sleep +slid +slide +slow +small +smart +smell +smile +smoke +snap +sniff +snow +so +soft +sold +some +someth +sometim +son +song +soon +sorri +sound +speak +special +spend +spill +splash +spoke +spot +spread +spring +squirrel +stand +star +start +station +stay +step +stick +still +stone +stood +stop +store +stori +straight +strang +street +stretch +strike +strong +such +sudden +sugar +suit +summer +sun +supper +suppos +sure +surpris +swallow +sweet +swim +swing +t +tabl +tail +take +talk +tall +tap +tast +teach +teacher +team +tear +teeth +telephon +tell +ten +tent +than +thank +that +that\ +the +their +them +then +there +these +they +thick +thin +thing +think +third +thi +those +though +thought +three +threw +through +throw +tie +tiger +tight +time +tini +tip +tire +to +today +toe +togeth +told +tomorrow +too +took +tooth +top +touch +toward +tower +town +toy +track +traffic +train +trap +tree +trick +trip +trot +truck +true +trunk +tri +turkey +turn +turtl +twelv +twin +two +u +ugli +uncl +under +unhappi +until +up +upon +upstair +us +use +usual +v +valley +veget +veri +villag +visit +voic +w +wag +wagon +wait +wake +walk +want +war +warm +wa +wash +wast +watch +water +wave +way +we +wear +weather +week +well +went +were +wet +what +wheel +when +where +which +while +whisper +whistl +white +who +whole +whose +whi +wide +wife +will +win +wind +window +wing +wink +winter +wire +wise +wish +with +without +woke +wolf +woman +women +wonder +won\t +wood +word +wore +work +world +worm +worri +worth +would +wrong +x +y +yard +year +yell +yellow +ye +yet +you +young +your +z +zoo diff --git a/readability/data/fix.py b/readability/data/stem.py similarity index 91% rename from readability/data/fix.py rename to readability/data/stem.py index 23824e3..d65e66b 100644 --- a/readability/data/fix.py +++ b/readability/data/stem.py @@ -3,7 +3,7 @@ porter_stemmer = PorterStemmer() -file = 'dale_chall_easy.txt' +file = 'spache_easy.txt' cur_path = os.path.dirname(os.path.realpath(__file__)) dale_chall_path = os.path.join(cur_path, file) words = None diff --git a/readability/readability.py b/readability/readability.py index f29c7f5..dbdda6e 100644 --- a/readability/readability.py +++ b/readability/readability.py @@ -1,6 +1,6 @@ from .text import Analyzer from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \ - FleschKincaid, GunningFog, LinsearWrite, Smog + FleschKincaid, GunningFog, LinsearWrite, Smog, Spache class Readability: @@ -40,6 +40,10 @@ def smog(self): """SMOG Index.""" return Smog(self._statistics, self._analyzer.sentences).score() + def spache(self): + """Spache Index.""" + return Spache(self._statistics).score() + def statistics(self): return { 'num_letters': self._statistics.num_letters, diff --git a/readability/scorers/__init__.py b/readability/scorers/__init__.py index da4e187..df708e8 100644 --- a/readability/scorers/__init__.py +++ b/readability/scorers/__init__.py @@ -7,3 +7,4 @@ from .ari import ARI from .linsear_write import LinsearWrite from .smog import Smog +from .spache import Spache diff --git a/readability/scorers/spache.py b/readability/scorers/spache.py new file mode 100644 index 0000000..0f9a2c4 --- /dev/null +++ b/readability/scorers/spache.py @@ -0,0 +1,35 @@ +from readability.exceptions import ReadabilityException + + +class Result: + def __init__(self, score, grade_level): + self.score = score + self.grade_level = grade_level + + def __str__(self): + return "score: {}, grade_level: '{}'". \ + format(self.score, self.grade_level) + + +class Spache: + def __init__(self, stats): + self._stats = stats + if stats.num_words < 100: + raise ReadabilityException('100 words required.') + + def score(self): + score = self._score() + return Result( + score=score, + grade_level=self._grade_level(score)) + + def _score(self): + stats = self._stats + avg_sentence_len = stats.num_words / stats.num_sentences + percent_difficult_words = \ + stats.num_spache_complex / stats.num_words * 100 + + return (0.141 * avg_sentence_len) + (0.086 * percent_difficult_words) + 0.839 + + def _grade_level(self, score): + return str(round(score)) diff --git a/readability/text/analyzer.py b/readability/text/analyzer.py index b04b8b6..d90789d 100644 --- a/readability/text/analyzer.py +++ b/readability/text/analyzer.py @@ -37,6 +37,10 @@ def num_gunning_complex(self): def num_dale_chall_complex(self): return self.stats['num_dale_chall_complex'] + @property + def num_spache_complex(self): + return self.stats['num_spache_complex'] + @property def avg_words_per_sentence(self): return self.num_words / self.num_sentences @@ -57,6 +61,7 @@ def __init__(self): def analyze(self, text): self._dale_chall_set = self._load_dale_chall() + self._spache_set = self._load_spache() stats = self._statistics(text) self.sentences = stats['sentences'] # hack for smog return AnalyzerStatistics(stats) @@ -80,6 +85,7 @@ def _statistics(self, text): letters_count = 0 gunning_complex_count = 0 dale_chall_complex_count = 0 + spache_complex_count = 0 porter_stemmer = PorterStemmer() def is_gunning_complex(t, syllable_count): @@ -91,6 +97,10 @@ def is_dale_chall_complex(t): stem = porter_stemmer.stem(t.lower()) return stem not in self._dale_chall_set + def is_spache_complex(t): + stem = porter_stemmer.stem(t.lower()) + return stem not in self._spache_set + for t in tokens: if not self._is_punctuation(t): @@ -104,6 +114,8 @@ def is_dale_chall_complex(t): else 0 dale_chall_complex_count += \ 1 if is_dale_chall_complex(t) else 0 + spache_complex_count += \ + 1 if is_spache_complex(t) else 0 sentences = self._tokenize_sentences(text) sentence_count = len(sentences) @@ -116,6 +128,7 @@ def is_dale_chall_complex(t): 'num_letters': letters_count, 'num_gunning_complex': gunning_complex_count, 'num_dale_chall_complex': dale_chall_complex_count, + 'num_spache_complex': spache_complex_count, 'sentences': sentences, } @@ -133,3 +146,10 @@ def _load_dale_chall(self): dale_chall_path = os.path.join(cur_path, '..', 'data', file) with open(dale_chall_path) as f: return set(line.strip() for line in f) + + def _load_spache(self): + file = 'spache_easy_porterstem.txt' + cur_path = os.path.dirname(os.path.realpath(__file__)) + spache_path = os.path.join(cur_path, '..', 'data', file) + with open(spache_path) as f: + return set(line.strip() for line in f) diff --git a/test/test_readability.py b/test/test_readability.py index ef3f0d8..62e7201 100644 --- a/test/test_readability.py +++ b/test/test_readability.py @@ -68,6 +68,12 @@ def test_smog(self): self.assertEqual(12.516099999999998, r.score) self.assertEqual('13', r.grade_level) + def test_spache(self): + r = self.readability.spache() + print(r) + self.assertEqual(7.164945054945054, r.score) + self.assertEqual('7', r.grade_level) + def test_print_stats(self): stats = self.readability.statistics() self.assertEqual(562, stats['num_letters'])