Skip to content

Commit

Permalink
Merge branch 'master' of github.com:CrowdTruth/CrowdTruth-core
Browse files Browse the repository at this point in the history
  • Loading branch information
ancadumitrache committed May 9, 2017
2 parents e15fcb1 + 12b0958 commit aad03e0
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
29 changes: 18 additions & 11 deletions controllers/inputController.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import chardet
#import Judgment, Worker, Unit, Job, Collection
class Found(Exception): pass
import re, string
Expand Down Expand Up @@ -33,17 +34,21 @@ def progress(job_title, progress):

def processFile(root, directory, filename, config):

progress(filename,.05)
progress(filename,0)
job = filename.split('.csv')[0]

judgments = pd.read_csv(root+'/'+directory+'/'+filename)
#with open(root+'/'+directory+'/'+filename, 'rb') as f:
# result = chardet.detect(f.read()) # or readline if the file is large
# #print result['encoding']
progress(filename,.05)

judgments = pd.read_csv(root+'/'+directory+'/'+filename)#, encoding=result['encoding'])

if directory == '':
directory = '/'

collection = directory


platform = getPlatform(judgments)
#print df.head()

Expand Down Expand Up @@ -83,8 +88,8 @@ def processFile(root, directory, filename, config):
# for col in config.output.values():
# judgments['annotations.'+col] = judgments[col].apply(lambda x: getAnnotations(x))

judgments['started'] = judgments['started'].apply(lambda x: pd.to_datetime(x))
judgments['submitted'] = judgments['submitted'].apply(lambda x: pd.to_datetime(x))
judgments['started'] = judgments['started'].apply(lambda x: pd.to_datetime(str(x)))
judgments['submitted'] = judgments['submitted'].apply(lambda x: pd.to_datetime(str(x)))
judgments['duration'] = judgments.apply(lambda row: (row['submitted'] - row['started']).seconds, axis=1)


Expand Down Expand Up @@ -133,14 +138,12 @@ def processFile(root, directory, filename, config):

# tag judgments that were spam
judgments['spam'] = judgments['worker'].apply(lambda x: workers.at[x,'spam'])
judgments = judgments[judgments['spam'] == False]


filteredJudgments = judgments[judgments['spam'] == False]

#
# aggregate units
#
units = Unit.aggregate(judgments, config)
units = Unit.aggregate(filteredJudgments, config)
progress(filename,.8)


Expand All @@ -152,7 +155,7 @@ def processFile(root, directory, filename, config):
annotations = pd.DataFrame()
for col in config.output.values():
# annotations[col] = pd.Series(judgments[col].sum())
res = pd.DataFrame(judgments[col].apply(lambda x: pd.Series(x.keys()).value_counts()).sum(),columns=[col])
res = pd.DataFrame(filteredJudgments[col].apply(lambda x: pd.Series(x.keys()).value_counts()).sum(),columns=[col])
annotations = pd.concat([annotations, res], axis=0)
progress(filename,.85)

Expand All @@ -161,8 +164,12 @@ def processFile(root, directory, filename, config):
#
# aggregate job
#
job = Job.aggregate(units, judgments, workers, config)
job = Job.aggregate(units, filteredJudgments, workers, config)
job['spam'] = workers['spam'].sum() / float(workers['spam'].count())
job['spam.judgments'] = workers['spam'].sum()
job['spam.workers'] = workers['spam'].count()
job['workerAgreementThreshold'] = workerAgreementThreshold
job['workerCosineThreshold'] = workerCosineThreshold
progress(filename,.9)


Expand Down
2 changes: 1 addition & 1 deletion controllers/outputController.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def saveResults(root, directory, results):
filename = directory.replace('/', '-')
writer = pd.ExcelWriter(root+directory+'/results'+filename+'.xlsx')
writer = pd.ExcelWriter(root+directory+'/results'+filename+'.xlsx',options={'encoding':'cp1252'})
for tab in results:
#print 'Saving:',tab
results[tab].to_excel(writer, tab)
Expand Down
5 changes: 4 additions & 1 deletion crowdtruth.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ def scanDirectory(directory=''):


# add customized results
results = config.processResults(results)
for c in config.output.items():
results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x))

results = config.processResults(results, config)

oc.saveResults(root, directory, results)

Expand Down

0 comments on commit aad03e0

Please sign in to comment.