-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBackoffLmKen.h
99 lines (83 loc) · 3.06 KB
/
BackoffLmKen.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*
* This file is part of the continuous space language and translation model toolkit
* for statistical machine translation and large vocabulary speech recognition.
*
* Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France
*
* The CSLM toolkit is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 3 as
* published by the Free Software Foundation
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*
*
*/
#ifndef _BackoffLmKen_h
#define _BackoffLmKen_h
#include <vector>
#include "BackoffLm.h"
#include "Tools.h"
#include "WordList.h"
// from KENLM
#include <lm/model.hh>
#include <lm/word_index.hh>
class BackoffLmKen : public BackoffLm {
private:
lm::ngram::Model *ken_ngram;
const lm::ngram::Vocabulary *ken_vocab;
std::vector<LMWordIndex> map_cslm2ken; // map internal CSLM indices to internal KENLM WordIndex
std::vector<WordID> map_ken2wid; // map internal KENLM WordIndex to internal WordID
std::vector<WordID> wid_vect; // vector of WordID in sentence
public:
BackoffLmKen(char *p_fname, int p_max_order, const WordList &wlist);
virtual ~BackoffLmKen();
/**
* returns order of the loaded LM
*/
inline virtual int GetOrder() {
return ((NULL != ken_ngram) ? ken_ngram->Order() : 0); }
/**
* returns size of the vocabulary
*/
inline virtual WordID GetVocSize() {
return ((NULL != ken_vocab) ? (ken_vocab->Bound() + 1) : 0); }
/**
* gets WordID of words in sentence
* @param wid output table of WordID (allocated internally)
* @param sentence input sentence
* @param bos start sentence with BOS
* @param eos end sentence with EOS
* @return number of words
*/
virtual int GetSentenceIds(WordID *&wid, const std::string &sentence, bool bos, bool eos);
/**
* gets backoff LM P(w|ctxt) from sequence of words
*/
inline virtual REAL BoffPw(char **ctxt, char *w, int req_order) {
return exp(BoffLnPw(ctxt, w, req_order)); }
/**
* gets ln of backoff LM P(w|ctxt) from sequence of words
*/
virtual REAL BoffLnPw(char **ctxt, char *w, int req_order);
/**
* gets backoff LM P(w|ctxt) from sequence of CSLM indices
*/
inline virtual REAL BoffPid(REAL *ctxt, WordID predw, int req_order) {
return exp(BoffLnPid(ctxt, predw, req_order)); }
/**
* gets ln of backoff LM P(w|ctxt) from sequence of CSLM indices
*/
virtual REAL BoffLnPid(REAL *ctxt, WordID predw, int req_order);
/**
* gets ln of backoff LM P(w|ctxt) from sequence of CSLM indices, without mapping
*/
virtual REAL BoffLnStd(WordID *ctxt, WordID predw, int req_order);
};
#endif