From 7163a6b0c260449996b609c51e4d148ce433f480 Mon Sep 17 00:00:00 2001 From: notconfusing Date: Mon, 1 Oct 2012 12:18:59 -0700 Subject: [PATCH] add disambiguation checker --- viafbot.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/viafbot.py b/viafbot.py index 4f29ab0..7ee0ed9 100644 --- a/viafbot.py +++ b/viafbot.py @@ -15,7 +15,7 @@ enwp = wikipedia.getSite('en','wikipedia') dewp = wikipedia.getSite('de','wikipedia') #files -wikilinksfile = open("35linksfortrial.out")#should be wikilinksforbot.out when real +wikilinksfile = open("wikilinksforbot.out")#should be wikilinksforbot.out when real wikilinks = wikilinksfile.readlines() viafbotrun = open("viafbotrun.log", 'w+') NoDEWPlog = open("NoDEWP.log", 'w+') @@ -64,8 +64,6 @@ - - def pageValidate(nameOfPage): """accepts string of page name in EnglishWikipedia. returns a Page Object of either the page or it's redirect (upto 10 redirects). @@ -485,7 +483,9 @@ def writeVIAFparamOnly2(validatedPage,viafnum): except exceptions.SpamfilterError: raise exceptions.SpamfilterError except exceptions.PageNotSaved: - raise exceptions.PageNotSaved + raise exceptions.PageNotSaved + + def writeStats(): wikipedia.Page(enwp,'User:VIAFbot/Stats').put( '{| class="wikitable"\n\ @@ -539,20 +539,34 @@ def writeStats(): | Pages which VIAFbot touched that already had Normdaten template with VIAF parameter|| '+str(normdatenVIAFcount)+'\n\ |}', comment='Updating Stats', minorEdit=True) - + +def isDab(pageObject): + templates = pageObject.templatesWithParams() + for template in templates: + templateUpper = str(template[0]).upper() + if templateUpper == 'DAB' or templateUpper == 'DISAMBIGUATION': + return True + return False + #the main loop for wikilink in wikilinks: '''Load the article and number from file''' wikilink = wikilink.split() #to get the line into a list of (name, viafnum) unvalidatedPageName = wikilink[0] viafnum = int(wikilink[1]) - touched = touched + 1 - '''Find redirects or deletions, after all this file could be 6 months oout of date''' + touched = touched + 1 #how many wikilinks we've seen + + '''Find redirects, deletions and dabs, after all this file could be 6 months out of date''' try: validatedPage = pageValidate(unvalidatedPageName) #It's possible that the page doesn't exist except exceptions.NoPage: viafbotrun.write(unvalidatedPageName.title() + "did not exist, or redirected more than 10 times") continue #If the page doesn't exist, then we don't need to write anything to the Wiki. + if isDab(validatedPage): + viafbotrun.write(validatedPage.title() + " was a disambiguation page \n") + continue + else: pass + '''get statuses of Authority Control and Normdaten templates''' acStatus = determineAuthorityControlTemplate(validatedPage) try: @@ -563,16 +577,20 @@ def writeStats(): normdatenStatus = determineNormdatenTemplate(germanPageName) else: normdatenStatus = 'noNormdatenTemplate' #if there's no page there's also noACtemplate either + '''Write the viafnumber according to what we found from DEWP''' try: writeToWiki(validatedPage, acStatus, normdatenStatus, viafnum, writeAttempts=0) except exceptions.Error: viafbotrun.write('http://en.wikipedia.org/wiki/' + validatedPage.title() + " was not written to wiki because of ac and nd status were not valid")#write to log + '''Write statistics onwiki every so often''' if (touched % 1000) == 0: writeStats() else: pass - + percentageComplete = touched / 2662.02 #the length of the wikilinks file divided by 100 so we can easily display as a percentage + print str(percentageComplete) + '% complete' + #close files wikilinksfile.close() viafbotrun.close()