forked from hschwenk/cslm-toolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMachSoftmaxClass.h
84 lines (79 loc) · 3.56 KB
/
MachSoftmaxClass.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*
* This file is part of the continuous space language and translation model toolkit
* for statistical machine translation and large vocabulary speech recognition.
*
* Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France
*
* The CSLM toolkit is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 3 as
* published by the Free Software Foundation
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*
*
*
* Machine that computes classification probabilities for words in a vocabulary
* of size n, divided in c classes. The probabilities are computed as the probability
* of a class c and the conditional probability of a word w given a class, given the
* context h:
* P(w|h) = P(c|h) * P(w|c,h)
* Each of these probabilities is computed as a softmax:
* a_i = exp(a_i) / sum_k a_k
* with a_k is the kth output of a linear machine
*
* There is one softmax for the probabilities of classes, and one for each of
* the classes (that computes the probabilities of words in that class).
*
* This enables us to compute the log-likelihood of one word without having to
* compute the probabilities for all words: we only need the probabilities of
* all classes, and the conditional probability of all words in that class.
*/
#ifndef _MachSoftmaxClass_h
#define _MachSoftmaxClass_h
#include "Mach.h"
#include "MachLin.h"
#include "MachSoftmax.h"
#include "WordList.h"
class MachSoftmaxClass : public MachLin
{
protected:
// A MachSoftmax that predicts the class, to encapsulate that part.
Mach *class_softm_mach; //could be MachSoftmax or MachSoftmaxStable
// The word list, that contains the information defining the architecture
WordList *wlist;
int n_classes;
int max_class_size;
// Buffer containing the offset and lengths of the target class in the vocabulary
int* target_class_info;
int stable;
// Read and write binary data
virtual void ReadData(istream&, size_t, int=0);
virtual void WriteData(ostream&);
// create a copy of the machine, sharing the parameters
MachSoftmaxClass(const MachSoftmaxClass &);
public:
MachSoftmaxClass(const int p_idim=0, const int p_odim=0, const int p_bsize=128,
const ulong p_nbfw=0, const ulong p_nbbw=0, const int stable=1);
virtual ~MachSoftmaxClass();
virtual MachSoftmaxClass *Clone() {return new MachSoftmaxClass(*this);} // create a copy of the machine
virtual int GetMType() {return file_header_mtype_softmax_class;}; // get type of machine
// get number of allocated parameters
virtual ulong GetNbParams();
virtual void Info(bool=false, char *txt=(char*)""); // display (detailed) information on machine
virtual bool CopyParams(Mach* mach); // copy parameters from another machine
virtual void SetUp(WordList* p_wlist); // Sets machine architecture
virtual void Forw(int=0, bool=false); // calculate outputs for current inputs
// backprop gradients from output to input and update all weights
virtual void Backw (const float lrate, const float wdecay, int=0);
virtual REAL* GetDataOutClass();
virtual void SetGradOutClass(REAL* data);
virtual void SetTargetInfo(int* info);
};
#endif