Skip to content

Commit

Permalink
Bugfix in wordseg-stats (issue #8)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmmaat authored Oct 13, 2017
1 parent d0ac02b commit 4ae33e8
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions wordseg/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def stat_corpus(text, separator=default_separator):

df.number_tokens = len(list_of_words)
df.number_types = len(dict_of_types)
df.number_utterance = len(text)
df.number_utterances = len(text)

return df

Expand Down Expand Up @@ -96,14 +96,17 @@ def main():
description=__doc__,
separator=default_separator)

# force the input to be a list (can be a sequence or a file buffer)
streamin = list(streamin)

top = top_frequency_tokens(streamin, separator=separator)
streamout.write(
'{} top frequency tokens:\n'.format(len(top))
+ '\n'.join('{} {}'.format(t[0], t[1]) for t in top)
+ '\n')
+ '\n' * 2)

stat = stat_corpus(streamin, separator=separator)
streamout.write(stat)
streamout.write(stat.to_string() + '\n')


if __name__ == '__main__':
Expand Down

0 comments on commit 4ae33e8

Please sign in to comment.