Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modernised Brill POS Tagger to classes #717

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 33 additions & 31 deletions lib/natural/brill_pos_tagger/lib/Brill_POS_Tagger.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,42 +22,44 @@ const Sentence = require('./Sentence')

const DEBUG = false

function BrillPOSTagger (lexicon, ruleSet) {
this.lexicon = lexicon
this.ruleSet = ruleSet
}

// Tags a sentence, sentence is an array of words
// Returns an array of tagged words; a tagged words is an array consisting of
// the word itself followed by its lexical category
BrillPOSTagger.prototype.tag = function (sentence) {
const taggedSentence = this.tagWithLexicon(sentence)
DEBUG && console.log(taggedSentence)
return this.applyRules(taggedSentence)
}
class BrillPOSTagger {
constructor (lexicon, ruleSet) {
this.lexicon = lexicon
this.ruleSet = ruleSet
}

BrillPOSTagger.prototype.tagWithLexicon = function (sentence) {
const taggedSentence = new Sentence()
// Tags a sentence, sentence is an array of words
// Returns an array of tagged words; a tagged words is an array consisting of
// the word itself followed by its lexical category
tag (sentence) {
const taggedSentence = this.tagWithLexicon(sentence)
DEBUG && console.log(taggedSentence)
return this.applyRules(taggedSentence)
}

const that = this
sentence.forEach(function (word, index) {
const categories = that.lexicon.tagWord(word)
taggedSentence.addTaggedWord(word, categories[0])
})
return (taggedSentence)
}
tagWithLexicon (sentence) {
const taggedSentence = new Sentence()

// Applies the transformation rules to an initially tagged sentence.
// taggedSentence is an array of tagged words.
// A tagged word is an array consisting of the word itself followed by its lexical category.
// Returns an array of tagged words as well
BrillPOSTagger.prototype.applyRules = function (sentence) {
for (let i = 0, size = sentence.taggedWords.length; i < size; i++) {
this.ruleSet.getRules().forEach(function (rule) {
rule.apply(sentence, i)
const that = this
sentence.forEach(function (word, index) {
const categories = that.lexicon.tagWord(word)
taggedSentence.addTaggedWord(word, categories[0])
})
return (taggedSentence)
}

// Applies the transformation rules to an initially tagged sentence.
// taggedSentence is an array of tagged words.
// A tagged word is an array consisting of the word itself followed by its lexical category.
// Returns an array of tagged words as well
applyRules (sentence) {
for (let i = 0, size = sentence.taggedWords.length; i < size; i++) {
this.ruleSet.getRules().forEach(function (rule) {
rule.apply(sentence, i)
})
}
return sentence
}
return sentence
}

module.exports = BrillPOSTagger
68 changes: 33 additions & 35 deletions lib/natural/brill_pos_tagger/lib/Brill_POS_Tester.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,41 @@

'use strict'

function BrillPOSTester () {

}

BrillPOSTester.prototype.test = function (corpus, tagger) {
let totalWords = 0
let correctTagsLexicon = 0
let correctTagsAfterRules = 0

// Tag the corpus using the tagger
corpus.sentences.forEach(function (sentence) {
const s = sentence.taggedWords.map(function (token) {
return token.token
})

// Use the lexicon to tag the sentence
const taggedSentence = tagger.tagWithLexicon(s)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
totalWords++
if (token.tag === taggedSentence.taggedWords[i].tag) {
correctTagsLexicon++
}
})

// Use the rule set to tag the sentence
const taggedSentenceAfterRules = tagger.applyRules(taggedSentence)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
if (token.tag === taggedSentenceAfterRules.taggedWords[i].tag) {
correctTagsAfterRules++
}
class BrillPOSTester {
test (corpus, tagger) {
let totalWords = 0
let correctTagsLexicon = 0
let correctTagsAfterRules = 0

// Tag the corpus using the tagger
corpus.sentences.forEach(function (sentence) {
const s = sentence.taggedWords.map(function (token) {
return token.token
})

// Use the lexicon to tag the sentence
const taggedSentence = tagger.tagWithLexicon(s)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
totalWords++
if (token.tag === taggedSentence.taggedWords[i].tag) {
correctTagsLexicon++
}
})

// Use the rule set to tag the sentence
const taggedSentenceAfterRules = tagger.applyRules(taggedSentence)
// Count the right tags
sentence.taggedWords.forEach(function (token, i) {
if (token.tag === taggedSentenceAfterRules.taggedWords[i].tag) {
correctTagsAfterRules++
}
})
})
})

// Return percentage right
return [100 * correctTagsLexicon / totalWords, 100 * correctTagsAfterRules / totalWords]
// Return percentage right
return [100 * correctTagsLexicon / totalWords, 100 * correctTagsAfterRules / totalWords]
}
}

module.exports = BrillPOSTester
Loading