Skip to content

Commit

Permalink
Modernised Brill POS Tagger to classes (#717)
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugo-ter-Doest authored Dec 5, 2023
1 parent a728884 commit 3a08b4a
Show file tree
Hide file tree
Showing 10 changed files with 777 additions and 764 deletions.
64 changes: 33 additions & 31 deletions lib/natural/brill_pos_tagger/lib/Brill_POS_Tagger.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,42 +22,44 @@ const Sentence = require('./Sentence')

const DEBUG = false

function BrillPOSTagger (lexicon, ruleSet) {
this.lexicon = lexicon
this.ruleSet = ruleSet
}

// Tags a sentence, sentence is an array of words
// Returns an array of tagged words; a tagged words is an array consisting of
// the word itself followed by its lexical category
BrillPOSTagger.prototype.tag = function (sentence) {
const taggedSentence = this.tagWithLexicon(sentence)
DEBUG && console.log(taggedSentence)
return this.applyRules(taggedSentence)
}
class BrillPOSTagger {
constructor (lexicon, ruleSet) {
this.lexicon = lexicon
this.ruleSet = ruleSet
}

BrillPOSTagger.prototype.tagWithLexicon = function (sentence) {
const taggedSentence = new Sentence()
// Tags a sentence, sentence is an array of words
// Returns an array of tagged words; a tagged words is an array consisting of
// the word itself followed by its lexical category
tag (sentence) {
const taggedSentence = this.tagWithLexicon(sentence)
DEBUG && console.log(taggedSentence)
return this.applyRules(taggedSentence)
}

const that = this
sentence.forEach(function (word, index) {
const categories = that.lexicon.tagWord(word)
taggedSentence.addTaggedWord(word, categories[0])
})
return (taggedSentence)
}
tagWithLexicon (sentence) {
const taggedSentence = new Sentence()

// Applies the transformation rules to an initially tagged sentence.
// taggedSentence is an array of tagged words.
// A tagged word is an array consisting of the word itself followed by its lexical category.
// Returns an array of tagged words as well
BrillPOSTagger.prototype.applyRules = function (sentence) {
for (let i = 0, size = sentence.taggedWords.length; i < size; i++) {
this.ruleSet.getRules().forEach(function (rule) {
rule.apply(sentence, i)
const that = this
sentence.forEach(function (word, index) {
const categories = that.lexicon.tagWord(word)
taggedSentence.addTaggedWord(word, categories[0])
})
return (taggedSentence)
}

// Applies the transformation rules to an initially tagged sentence.
// taggedSentence is an array of tagged words.
// A tagged word is an array consisting of the word itself followed by its lexical category.
// Returns an array of tagged words as well
applyRules (sentence) {
for (let i = 0, size = sentence.taggedWords.length; i < size; i++) {
this.ruleSet.getRules().forEach(function (rule) {
rule.apply(sentence, i)
})
}
return sentence
}
return sentence
}

module.exports = BrillPOSTagger
68 changes: 33 additions & 35 deletions lib/natural/brill_pos_tagger/lib/Brill_POS_Tester.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,41 @@

'use strict'

function BrillPOSTester () {

}

BrillPOSTester.prototype.test = function (corpus, tagger) {
let totalWords = 0
let correctTagsLexicon = 0
let correctTagsAfterRules = 0

// Tag the corpus using the tagger
corpus.sentences.forEach(function (sentence) {
const s = sentence.taggedWords.map(function (token) {
return token.token
})

// Use the lexicon to tag the sentence
const taggedSentence = tagger.tagWithLexicon(s)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
totalWords++
if (token.tag === taggedSentence.taggedWords[i].tag) {
correctTagsLexicon++
}
})

// Use the rule set to tag the sentence
const taggedSentenceAfterRules = tagger.applyRules(taggedSentence)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
if (token.tag === taggedSentenceAfterRules.taggedWords[i].tag) {
correctTagsAfterRules++
}
class BrillPOSTester {
test (corpus, tagger) {
let totalWords = 0
let correctTagsLexicon = 0
let correctTagsAfterRules = 0

// Tag the corpus using the tagger
corpus.sentences.forEach(function (sentence) {
const s = sentence.taggedWords.map(function (token) {
return token.token
})

// Use the lexicon to tag the sentence
const taggedSentence = tagger.tagWithLexicon(s)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
totalWords++
if (token.tag === taggedSentence.taggedWords[i].tag) {
correctTagsLexicon++
}
})

// Use the rule set to tag the sentence
const taggedSentenceAfterRules = tagger.applyRules(taggedSentence)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
if (token.tag === taggedSentenceAfterRules.taggedWords[i].tag) {
correctTagsAfterRules++
}
})
})
})

// Return percentage right
return [100 * correctTagsLexicon / totalWords, 100 * correctTagsAfterRules / totalWords]
// Return percentage right
return [100 * correctTagsLexicon / totalWords, 100 * correctTagsAfterRules / totalWords]
}
}

module.exports = BrillPOSTester
Loading

0 comments on commit 3a08b4a

Please sign in to comment.