Skip to content

Commit

Permalink
add disambiguation checker
Browse files Browse the repository at this point in the history
  • Loading branch information
notconfusing committed Oct 1, 2012
1 parent 0964964 commit 7163a6b
Showing 1 changed file with 26 additions and 8 deletions.
34 changes: 26 additions & 8 deletions viafbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
enwp = wikipedia.getSite('en','wikipedia')
dewp = wikipedia.getSite('de','wikipedia')
#files
wikilinksfile = open("35linksfortrial.out")#should be wikilinksforbot.out when real
wikilinksfile = open("wikilinksforbot.out")#should be wikilinksforbot.out when real
wikilinks = wikilinksfile.readlines()
viafbotrun = open("viafbotrun.log", 'w+')
NoDEWPlog = open("NoDEWP.log", 'w+')
Expand Down Expand Up @@ -64,8 +64,6 @@





def pageValidate(nameOfPage):
"""accepts string of page name in EnglishWikipedia.
returns a Page Object of either the page or it's redirect (upto 10 redirects).
Expand Down Expand Up @@ -485,7 +483,9 @@ def writeVIAFparamOnly2(validatedPage,viafnum):
except exceptions.SpamfilterError:
raise exceptions.SpamfilterError
except exceptions.PageNotSaved:
raise exceptions.PageNotSaved
raise exceptions.PageNotSaved


def writeStats():
wikipedia.Page(enwp,'User:VIAFbot/Stats').put(
'{| class="wikitable"\n\
Expand Down Expand Up @@ -539,20 +539,34 @@ def writeStats():
| Pages which VIAFbot touched that already had Normdaten template with VIAF parameter|| '+str(normdatenVIAFcount)+'\n\
|}',
comment='Updating Stats', minorEdit=True)


def isDab(pageObject):
templates = pageObject.templatesWithParams()
for template in templates:
templateUpper = str(template[0]).upper()
if templateUpper == 'DAB' or templateUpper == 'DISAMBIGUATION':
return True
return False

#the main loop
for wikilink in wikilinks:
'''Load the article and number from file'''
wikilink = wikilink.split() #to get the line into a list of (name, viafnum)
unvalidatedPageName = wikilink[0]
viafnum = int(wikilink[1])
touched = touched + 1
'''Find redirects or deletions, after all this file could be 6 months oout of date'''
touched = touched + 1 #how many wikilinks we've seen

'''Find redirects, deletions and dabs, after all this file could be 6 months out of date'''
try:
validatedPage = pageValidate(unvalidatedPageName) #It's possible that the page doesn't exist
except exceptions.NoPage:
viafbotrun.write(unvalidatedPageName.title() + "did not exist, or redirected more than 10 times")
continue #If the page doesn't exist, then we don't need to write anything to the Wiki.
if isDab(validatedPage):
viafbotrun.write(validatedPage.title() + " was a disambiguation page \n")
continue
else: pass

'''get statuses of Authority Control and Normdaten templates'''
acStatus = determineAuthorityControlTemplate(validatedPage)
try:
Expand All @@ -563,16 +577,20 @@ def writeStats():
normdatenStatus = determineNormdatenTemplate(germanPageName)
else:
normdatenStatus = 'noNormdatenTemplate' #if there's no page there's also noACtemplate either

'''Write the viafnumber according to what we found from DEWP'''
try:
writeToWiki(validatedPage, acStatus, normdatenStatus, viafnum, writeAttempts=0)
except exceptions.Error:
viafbotrun.write('http://en.wikipedia.org/wiki/' + validatedPage.title() + " was not written to wiki because of ac and nd status were not valid")#write to log

'''Write statistics onwiki every so often'''
if (touched % 1000) == 0:
writeStats()
else: pass

percentageComplete = touched / 2662.02 #the length of the wikilinks file divided by 100 so we can easily display as a percentage
print str(percentageComplete) + '% complete'

#close files
wikilinksfile.close()
viafbotrun.close()
Expand Down

0 comments on commit 7163a6b

Please sign in to comment.