-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpellChecker.py
76 lines (66 loc) · 3.24 KB
/
SpellChecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#prerequisite: You need to have enchant module installed
#Currently searches for all c# files in a directory
#Suggestions for typos are dumped in a text file with the name derived from C# file name
#ToDo: Get the directory path, files extensions as user inputs
import enchant
import wx
import os
import fileinput
import sys
import stat
import re
enUSDict = enchant.Dict("en_US")
keywords = ["namespace","args"]
#Need to transform this 'script' into an OOPS program
def checkWordInDict(subWords, suggestionsDict,typosFile):
if(isinstance(subWords, list)):
for subword in subWords:
#remove non-alpha characters like ; , : - etc.
nonAlphaCharacters = re.compile('[^a-zA-Z]+')
subword = nonAlphaCharacters.sub('',subword)
if len(subword)>0 and not subword in keywords and not enUSDict.check(subword) and subword not in suggestionsDict.keys():
typosFile.write(subword+' : ')
suggestions = enUSDict.suggest(subword)
suggestionsDict[subword] = suggestions
for suggestion in suggestions:
typosFile.write(suggestion +", ")
typosFile.write("\n")
elif (isinstance(subWords,str)):
#remove non-alpha characters like ; , : - etc.
nonAlphaCharacters = re.compile('[^a-zA-Z]+')
subWords = nonAlphaCharacters.sub('',subWords)
if len(subWords)>0 and not subWords in keywords and not enUSDict.check(subWords) and subWords not in suggestionsDict.keys():
typosFile.write(subWords+' : ')
suggestions = enUSDict.suggest(subWords)
suggestionsDict[subWords] = suggestions
for suggestion in suggestions:
typosFile.write(suggestion +", ")
typosFile.write("\n")
if __name__ == "__main__":
dirToScan = input(" Enter full path of the directory to scan for typos: ")
for root,dirs,files in os.walk(dirToScan):
for file in files:
if file.endswith(".cs"):
f1 = open(os.path.join(root, file),'r')
typosFileName = file[:-3] +"typos.txt"
typosFile = open(os.path.join(root,typosFileName),'w')
suggestionsDict = dict()
lineCount=0
for line in f1:
line = line.strip()
lineCount+=1
words = line.split()
for word in words:
word = word.replace("#","")
#if not re.match(('[A-Za-z]'),word):
# continue
#Find words starting with capital letters
subWords = re.findall('[A-Z][a-z]*',word)
#Find words starting with small letters
subWords + re.findall('[a-z][A-Z]*',word)
if(len(subWords) > 0):
checkWordInDict(subWords,suggestionsDict,typosFile)
else:
checkWordInDict(word, suggestionsDict,typosFile)
typosFile.write("Total lines parsed:"+str(lineCount))
typosFile.close()