diff --git a/.depend b/.depend new file mode 100644 index 0000000..d987d50 --- /dev/null +++ b/.depend @@ -0,0 +1,103 @@ + +Tools.o: Tools.cpp Tools.h +Toolsgz.o: Toolsgz.cpp Tools.h Toolsgz.h +MachConfig.o: MachConfig.cpp MachAvr.h MachCombined.h MachMulti.h Mach.h \ + Tools.h Blas.h Timer.h MachConfig.h MachLin.h Shareable.h MachTab.h \ + MachJoin.h MachLinRectif.h MachPar.h MachSeq.h MachSig.h MachSoftmax.h \ + MachSoftmaxStable.h MachSoftmaxClass.h WordList.h MachSplit.h \ + MachSplit1.h MachTanh.h MachCopy.h +Mach.o: Mach.cpp Tools.h Mach.h Blas.h Timer.h MachCopy.h MachTab.h \ + Shareable.h MachLin.h MachSig.h MachTanh.h MachSoftmax.h \ + MachSoftmaxStable.h MachSoftmaxClass.h WordList.h MachLinRectif.h \ + MachSeq.h MachMulti.h MachPar.h MachSplit.h MachSplit1.h MachJoin.h +MachTab.o: MachTab.cpp Tools.h MachTab.h Mach.h Blas.h Timer.h \ + Shareable.h +MachCopy.o: MachCopy.cpp Tools.h MachCopy.h Mach.h Blas.h Timer.h +MachLin.o: MachLin.cpp Tools.h MachLin.h Mach.h Blas.h Timer.h \ + Shareable.h +MachSig.o: MachSig.cpp Tools.h MachSig.h MachLin.h Mach.h Blas.h Timer.h \ + Shareable.h +MachTanh.o: MachTanh.cpp Tools.h MachTanh.h MachLin.h Mach.h Blas.h \ + Timer.h Shareable.h +MachSoftmax.o: MachSoftmax.cpp Tools.h MachSoftmax.h MachLin.h Mach.h \ + Blas.h Timer.h Shareable.h +MachSoftmaxStable.o: MachSoftmaxStable.cpp Tools.h MachSoftmaxStable.h \ + MachLin.h Mach.h Blas.h Timer.h Shareable.h +MachLinRectif.o: MachLinRectif.cpp Tools.h MachLinRectif.h MachLin.h \ + Mach.h Blas.h Timer.h Shareable.h +MachMulti.o: MachMulti.cpp Tools.h MachMulti.h Mach.h Blas.h Timer.h +MachSeq.o: MachSeq.cpp Tools.h MachSeq.h MachMulti.h Mach.h Blas.h \ + Timer.h +MachPar.o: MachPar.cpp Tools.h MachTab.h Mach.h Blas.h Timer.h \ + Shareable.h MachPar.h MachMulti.h +MachSplit.o: MachSplit.cpp Tools.h MachSplit.h MachMulti.h Mach.h Blas.h \ + Timer.h +MachSplit1.o: MachSplit1.cpp Tools.h MachSplit1.h MachMulti.h Mach.h \ + Blas.h Timer.h +MachJoin.o: MachJoin.cpp Tools.h MachJoin.h MachMulti.h Mach.h Blas.h \ + Timer.h +Data.o: Data.cpp Tools.h Data.h DataFile.h WordList.h DataAscii.h \ + DataAsciiClass.h DataMnist.h DataNgramBin.h DataPhraseBin.h +DataFile.o: DataFile.cpp Tools.h Data.h DataFile.h WordList.h +DataAscii.o: DataAscii.cpp Tools.h Data.h DataFile.h WordList.h \ + DataAscii.h +DataAsciiClass.o: DataAsciiClass.cpp Tools.h Data.h DataFile.h WordList.h \ + DataAsciiClass.h DataAscii.h +DataMnist.o: DataMnist.cpp Tools.h Data.h DataFile.h WordList.h \ + DataMnist.h +DataNgramBin.o: DataNgramBin.cpp Tools.h Data.h DataFile.h WordList.h \ + DataNgramBin.h +DataPhraseBin.o: DataPhraseBin.cpp Tools.h DataPhraseBin.h Data.h \ + DataFile.h WordList.h +ErrFct.o: ErrFct.cpp Tools.h ErrFct.h Mach.h Blas.h Timer.h Data.h \ + DataFile.h WordList.h +ErrFctMSE.o: ErrFctMSE.cpp Tools.h ErrFctMSE.h ErrFct.h Mach.h Blas.h \ + Timer.h Data.h DataFile.h WordList.h +ErrFctMCE.o: ErrFctMCE.cpp Tools.h ErrFctMCE.h ErrFct.h Mach.h Blas.h \ + Timer.h Data.h DataFile.h WordList.h +ErrFctCrossEnt.o: ErrFctCrossEnt.cpp Tools.h ErrFctCrossEnt.h ErrFct.h \ + Mach.h Blas.h Timer.h Data.h DataFile.h WordList.h +ErrFctSoftmCrossEntNgram.o: ErrFctSoftmCrossEntNgram.cpp Tools.h \ + ErrFctSoftmCrossEntNgram.h ErrFct.h Mach.h Blas.h Timer.h Data.h \ + DataFile.h WordList.h +ErrFctSoftmCrossEntNgramMulti.o: ErrFctSoftmCrossEntNgramMulti.cpp \ + Tools.h ErrFctSoftmCrossEntNgramMulti.h ErrFct.h Mach.h Blas.h Timer.h \ + Data.h DataFile.h WordList.h ErrFctSoftmCrossEntNgram.h +Hypo.o: Hypo.cpp Hypo.h Tools.h Toolsgz.h +Lrate.o: Lrate.cpp Lrate.h Mach.h Tools.h Blas.h Timer.h +NbestLM.o: NbestLM.cpp NbestLM.h Hypo.h Tools.h Toolsgz.h +NbestCSLM.o: NbestCSLM.cpp Hypo.h Tools.h Toolsgz.h NbestCSLM.h NbestLM.h \ + Mach.h Blas.h Timer.h TrainerNgramSlist.h ErrFct.h Data.h DataFile.h \ + WordList.h DataNgramBin.h TrainerNgram.h Trainer.h Lrate.h BackoffLm.h +Trainer.o: Trainer.cpp Tools.h Mach.h Blas.h Timer.h ErrFctMCE.h ErrFct.h \ + Data.h DataFile.h WordList.h Trainer.h Lrate.h +TrainerNgram.o: TrainerNgram.cpp Mach.h Tools.h Blas.h Timer.h \ + TrainerNgram.h ErrFct.h Data.h DataFile.h WordList.h DataNgramBin.h \ + Trainer.h Lrate.h +TrainerNgramSlist.o: TrainerNgramSlist.cpp Tools.h Mach.h Blas.h Timer.h \ + MachTab.h Shareable.h MachPar.h MachMulti.h MachSeq.h \ + TrainerNgramSlist.h ErrFct.h Data.h DataFile.h WordList.h DataNgramBin.h \ + TrainerNgram.h Trainer.h Lrate.h BackoffLm.h +MachSoftmaxClass.o: MachSoftmaxClass.cpp MachSoftmaxClass.h Mach.h \ + Tools.h Blas.h Timer.h MachLin.h Shareable.h MachSoftmax.h WordList.h \ + MachSoftmaxStable.h +ErrFctSoftmClassCrossEntNgram.o: ErrFctSoftmClassCrossEntNgram.cpp \ + ErrFctSoftmClassCrossEntNgram.h ErrFct.h Tools.h Mach.h Blas.h Timer.h \ + Data.h DataFile.h WordList.h MachSoftmaxClass.h MachLin.h Shareable.h \ + MachSoftmax.h +TrainerNgramClass.o: TrainerNgramClass.cpp TrainerNgramClass.h \ + TrainerNgram.h Tools.h Mach.h Blas.h Timer.h ErrFct.h Data.h DataFile.h \ + WordList.h DataNgramBin.h Trainer.h Lrate.h \ + ErrFctSoftmClassCrossEntNgram.h MachSoftmaxClass.h MachLin.h Shareable.h \ + MachSoftmax.h +Shareable.o: Shareable.cpp Shareable.h +WordList.o: WordList.cpp WordList.h Tools.h +MachCombined.o: MachCombined.cpp Tools.h MachCombined.h MachMulti.h \ + Mach.h Blas.h Timer.h +MachAvr.o: MachAvr.cpp Tools.h MachAvr.h MachCombined.h MachMulti.h \ + Mach.h Blas.h Timer.h +Blas.o: Blas.c +NbestLMKEN.o: NbestLMKEN.cpp NbestLMKEN.h NbestLM.h Hypo.h Tools.h \ + Toolsgz.h +BackoffLmKen.o: BackoffLmKen.cpp BackoffLmKen.h BackoffLm.h Tools.h \ + WordList.h diff --git a/BackoffLm.h b/BackoffLm.h new file mode 100644 index 0000000..a9097b1 --- /dev/null +++ b/BackoffLm.h @@ -0,0 +1,56 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _BackoffLm_h +#define _BackoffLm_h + +#include +#include "Tools.h" // for type WordID + +// We must be very careful with the indices +// - most LM toolkits have their own internal word list +// - binary ngram data files us indices with respect to their word list +// (ideally, this word list should be identical to the one of the LM!) +// - the CSLM code with short list performs a mapping of the binary indices +// of the datafiles according to the 1-gram frequency +// +// + +#define NULL_LN_PROB (1.0) // this value must not be possible as a normal return value of ln Prob + +class BackoffLm { + private: + public: + BackoffLm() {}; + virtual ~BackoffLm() {}; + inline virtual int GetOrder() {return 0; }; // returns order of the loaded LM + inline virtual WordID GetVocSize() {return 0; }; // returns size of the vocabulary + virtual int GetSentenceIds(WordID *&wid, const std::string &sentence, bool bos, bool eos) {return 0; }; // gets WordID of words in sentence + virtual REAL BoffPw(char **ctxt, char *w, int req_order) {return 0;} // gets backoff LM P(w|ctxt) from sequence of words + virtual REAL BoffLnPw(char **ctxt, char *w, int req_order) {return -99;} // idem but ln of P(w|ctxt) + virtual REAL BoffPid(REAL *ctxt, WordID predw, int req_order) {return 0;} // similar for sequences of CSLM indices + virtual REAL BoffLnPid(REAL *ctxt, WordID predw, int req_order) {return -99;} + virtual REAL BoffLnStd(WordID *ctxt, WordID predw, int req_order) {return -99; } // simple wrapper w/o mapping + // req-order can be any value +}; + +#endif diff --git a/BackoffLmKen.cpp b/BackoffLmKen.cpp new file mode 100644 index 0000000..b6d08cc --- /dev/null +++ b/BackoffLmKen.cpp @@ -0,0 +1,205 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#include +#include +#include "BackoffLmKen.h" +using namespace std; +using namespace lm::ngram; + +BackoffLmKen::BackoffLmKen(char *p_fname, int, const WordList &wlist) +{ + if ((p_fname == NULL) || (p_fname[0] == '\0')) { + // no back-off file + ken_ngram = NULL; + ken_vocab = NULL; + return; + } + + cout << " - reading back-off KENLM from file '" << p_fname << "'" << endl; + ken_ngram = new ProbingModel(p_fname); + if (NULL == ken_ngram) { + cout << " error" << endl; + ken_vocab = NULL; + return; + } + + ken_vocab = &(ken_ngram->GetVocabulary()); + LMWordIndex ken_size = (ken_vocab->Bound() + 1); + printf(" found %d-gram with vocabulary of %d words\n", (int) ken_ngram->Order(), ken_size); + + // set up mapping from/to KENLM indices + WordList::WordIndex wlist_size = wlist.GetSize(); + map_cslm2ken.reserve(wlist_size); + map_cslm2ken.resize(wlist_size); + map_ken2wid.reserve(ken_size); + map_ken2wid.resize(ken_size); + WordList::const_iterator iter = wlist.Begin(), end = wlist.End(); + for (size_t ci = 0 ; iter != end ; iter++, ci++) { + LMWordIndex wi = ken_vocab->Index(iter->word); + map_cslm2ken[ci] = wi; + if (wi == ken_vocab->NotFound()) + fprintf(stderr,"word %s not found at pos %zu\n", iter->word, ci); + else + map_ken2wid[wi] = iter->id; + } +} + +BackoffLmKen::~BackoffLmKen() +{ + if (NULL != ken_ngram) + delete ken_ngram; + map_cslm2ken.clear(); + wid_vect.clear(); +} + +/** + * gets WordID of words in sentence + * @param wid output table of WordID (allocated internally) + * @param sentence input sentence + * @param bos start sentence with BOS + * @param eos end sentence with EOS + * @return number of words + */ +int BackoffLmKen::GetSentenceIds(WordID *&wid, const string &sentence, bool bos, bool eos) +{ + if (NULL == ken_vocab) + return 0; + + int nw = 0; + wid_vect.clear(); + + // start sentence with BOS ? + if (bos) { + wid_vect.push_back(map_ken2wid[ken_vocab->BeginSentence()]); + nw++; + } + + istringstream iss(sentence); + while (iss) { + string s; + iss >> s; + if (!s.empty()) { + wid_vect.push_back(map_ken2wid[ken_vocab->Index(s)]); + nw++; + } + } + + // end sentence with EOS ? + if (eos) { + wid_vect.push_back(map_ken2wid[ken_vocab->EndSentence()]); + nw++; + } + + wid = &(wid_vect.front()); + return nw; +} + +/** + * gets ln of backoff LM P(w|ctxt) from sequence of words + */ +REAL BackoffLmKen::BoffLnPw(char **ctxt, char *w, int req_order) + // gets LOG_e backoff LM proba from a sequence of CSLM indices + // if the order of the back-off LM is smaller than we use the last n-1 words of the context + // w1 w2 w3 -> w4 + // \ 2-gram / + // \-- 3-gram --/ + // \---- 4-gram ----/ +{ +#ifdef DEBUG + printf ("\nrequest KENLM %d-gram: %s ", req_order, ctxt[0]); + for (int i = 1; i < (req_order - 1); i++) printf(", %s", ctxt[i]); + printf(" -> %s \n", w); +#endif + if (NULL == ken_ngram) + // return constant value if we have no LM + return NULL_LN_PROB; + + State state(ken_ngram->NullContextState()), out_state; + for (int i = 0; i < (req_order - 1); i++) { + ken_ngram->Score(state, ken_vocab->Index(ctxt[i]), out_state); + state = out_state; + } + + // we need to convert from log_10 to ln + return M_LN10 * ken_ngram->Score(state, ken_vocab->Index(w), out_state); +} + +/** + * gets ln of backoff LM P(w|ctxt) from sequence of CSLM indices + */ +REAL BackoffLmKen::BoffLnPid(REAL *ctxt, WordID predw, int req_order) + // gets LOG_e backoff LM proba from a sequence of CSLM indices + // if the order of the back-off LM is smaller than we use the last n-1 words of the context + // w1 w2 w3 -> w4 + // \ 2-gram / + // \-- 3-gram --/ + // \---- 4-gram ----/ +{ +#ifdef DEBUG + printf ("\nrequest KENLM %d-gram: %d ", req_order, (WordID) ctxt[0]); + for (int i = 1; i < (req_order - 1); i++) printf(", %d", (WordID) ctxt[i]); + printf(" -> %d \n", predw); +#endif + if (NULL == ken_ngram) + // return constant value if we have no LM + return NULL_LN_PROB; + + State state(ken_ngram->NullContextState()), out_state; + for (int i = 0; i < (req_order - 1); i++) { + ken_ngram->Score(state, map_cslm2ken[(WordID) ctxt[i]], out_state); + state = out_state; + } + + // we need to convert from log_10 to ln + return M_LN10 * ken_ngram->Score(state, map_cslm2ken[predw], out_state); +} + +/** + * gets ln of backoff LM P(w|ctxt) from sequence of CSLM indices, without mapping + */ +REAL BackoffLmKen::BoffLnStd(WordID *ctxt, WordID predw, int req_order) + // gets LOG_e backoff LM proba from a sequence of CSLM indices + // if the order of the back-off LM is smaller than we use the last n-1 words of the context + // w1 w2 w3 -> w4 + // \ 2-gram / + // \-- 3-gram --/ + // \---- 4-gram ----/ +{ +#ifdef DEBUG + printf ("\nrequest KENLM %d-gram: %d ", req_order, ctxt[0]); + for (int i = 1; i < (req_order - 1); i++) printf(", %d", ctxt[i]); + printf(" -> %d \n", predw); +#endif + if (NULL == ken_ngram) + // return constant value if we have no LM + return NULL_LN_PROB; + + State state(ken_ngram->NullContextState()), out_state; + for (int i = 0; i < (req_order - 1); i++) { + ken_ngram->Score(state, ctxt[i], out_state); + state = out_state; + } + + // we need to convert from log_10 to ln + return M_LN10 * ken_ngram->Score(state, predw, out_state); +} diff --git a/BackoffLmKen.h b/BackoffLmKen.h new file mode 100644 index 0000000..22e4b67 --- /dev/null +++ b/BackoffLmKen.h @@ -0,0 +1,99 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _BackoffLmKen_h +#define _BackoffLmKen_h + + +#include + +#include "BackoffLm.h" +#include "Tools.h" +#include "WordList.h" + +// from KENLM +#include +#include + +class BackoffLmKen : public BackoffLm { + private: + lm::ngram::Model *ken_ngram; + const lm::ngram::Vocabulary *ken_vocab; + std::vector map_cslm2ken; // map internal CSLM indices to internal KENLM WordIndex + std::vector map_ken2wid; // map internal KENLM WordIndex to internal WordID + std::vector wid_vect; // vector of WordID in sentence + + public: + BackoffLmKen(char *p_fname, int p_max_order, const WordList &wlist); + virtual ~BackoffLmKen(); + + /** + * returns order of the loaded LM + */ + inline virtual int GetOrder() { + return ((NULL != ken_ngram) ? ken_ngram->Order() : 0); } + + /** + * returns size of the vocabulary + */ + inline virtual WordID GetVocSize() { + return ((NULL != ken_vocab) ? (ken_vocab->Bound() + 1) : 0); } + + /** + * gets WordID of words in sentence + * @param wid output table of WordID (allocated internally) + * @param sentence input sentence + * @param bos start sentence with BOS + * @param eos end sentence with EOS + * @return number of words + */ + virtual int GetSentenceIds(WordID *&wid, const std::string &sentence, bool bos, bool eos); + + /** + * gets backoff LM P(w|ctxt) from sequence of words + */ + inline virtual REAL BoffPw(char **ctxt, char *w, int req_order) { + return exp(BoffLnPw(ctxt, w, req_order)); } + + /** + * gets ln of backoff LM P(w|ctxt) from sequence of words + */ + virtual REAL BoffLnPw(char **ctxt, char *w, int req_order); + + /** + * gets backoff LM P(w|ctxt) from sequence of CSLM indices + */ + inline virtual REAL BoffPid(REAL *ctxt, WordID predw, int req_order) { + return exp(BoffLnPid(ctxt, predw, req_order)); } + + /** + * gets ln of backoff LM P(w|ctxt) from sequence of CSLM indices + */ + virtual REAL BoffLnPid(REAL *ctxt, WordID predw, int req_order); + + /** + * gets ln of backoff LM P(w|ctxt) from sequence of CSLM indices, without mapping + */ + virtual REAL BoffLnStd(WordID *ctxt, WordID predw, int req_order); +}; + +#endif diff --git a/BackoffLmSri.cpp b/BackoffLmSri.cpp new file mode 100644 index 0000000..ef810cb --- /dev/null +++ b/BackoffLmSri.cpp @@ -0,0 +1,175 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include "BackoffLmSri.h" + +// +// +// + +void BackoffLmSri::BackoffLmSri_init(char *p_fname, int p_max_order) +{ + if ((p_fname == NULL) || (p_fname[0] == '\0')) { + // no back-off file + sri_vocab = NULL; + sri_ngram = NULL; + sri_order = p_max_order; + sri_context_idxs = NULL; + return; + } + + if (p_max_order < 2) + Error ("unsupported order of the SRI LM"); // TODO: give the actual order + + sri_vocab = new Vocab(); + + if (strstr(p_fname,".vocab")) { + cout << " - vocabulary " << p_fname << "was specified instead of an LM" << endl; + + sri_vocab->unkIsWord() = true; + sri_vocab->toLower() = false; + { + File file(p_fname, "r"); + sri_vocab->read(file); + //voc->remove("-pau-"); + } + cout << " found "<< sri_vocab->numWords() << ", returning lnProp=" << NULL_LN_PROB << "in all calls" << endl; + + sri_order=p_max_order; // TODO: is this correct + sri_ngram = NULL; + } + else { + cout << " - reading back-off SRILM from file '" << p_fname << "'" << endl; + sri_ngram = new Ngram(*sri_vocab, p_max_order); + + // reading SRI LM + sri_ngram->setorder(p_max_order); + sri_ngram->skipOOVs() = false; + File ngram_file(p_fname, "r"); + sri_ngram->read(ngram_file, 0); + + // get number of n-grams + // TODO: can we get the order of the model read from file ? + vector nb_ngrams; + nb_ngrams.push_back(sri_vocab->numWords()); + cout << " vocabulary: " << nb_ngrams[0] << " words; ngrams:"; + sri_order=0; + for (int o=1; o<=p_max_order; o++) { + nb_ngrams.push_back(sri_ngram->numNgrams(o)); + cout << " " << nb_ngrams.back(); + if (nb_ngrams.back()>0) sri_order++; + } + } + + cout << " (order=" << sri_order << ")" << endl; + if (sri_order > p_max_order) { + cout << " - limiting order of the back-off LM to the order of the CSLM (" << p_max_order << ")" << endl; + sri_order = p_max_order; + } + +#ifdef LM_SRI0 + for (i=wlist.begin(); i!=wlist.end(); i++) { + int sri_idx = sri_vocab->getIndex((*i).word); +printf("word=%s, sri=%d, wlist=%d\n", (*i).word, sri_idx, (*i).id); + } +#endif + + // reserve memory for the context in SRI format + sri_context_idxs = new VocabIndex[sri_order+1]; + sri_context_idxs[sri_order-1]=Vocab_None; // terminate, this is needed to specify the length of the context + + map_cslm2sri.clear(); +} + +// +// +// + +BackoffLmSri::BackoffLmSri(char *p_fname, int p_max_order, const WordList &wlist) +{ + BackoffLmSri::BackoffLmSri_init(p_fname, p_max_order); + if (NULL == sri_vocab) + return; + + // set up mapping from CSLM indices to SRI LM indices + cout << " - setting up mapping from CSLM to SRI word list" << endl; + WordList::WordIndex wlsz = wlist.GetSize(); + map_cslm2sri.reserve(wlsz); + map_cslm2sri.resize(wlsz); + WordList::const_iterator iter = wlist.Begin(), end = wlist.End(); + for (size_t ci=0; iter!=end; iter++, ci++) { + VocabIndex vi = sri_vocab->getIndex(iter->word); + if (vi == Vocab_None) { + fprintf(stderr,"word %s not found at pos %zu\n", iter->word, ci ); + } + else + map_cslm2sri[ci] = vi; + } +} + +BackoffLmSri::~BackoffLmSri() { + if (sri_vocab) delete sri_vocab; + if (sri_ngram) delete sri_ngram; + map_cslm2sri.clear(); + if (sri_context_idxs) delete [] sri_context_idxs; +} + +/** + * gets WordID of words in sentence + * @param wid output table of WordID (allocated internally) + * @param sentence input sentence + * @param bos start sentence with BOS + * @param eos end sentence with EOS + * @return number of words + */ +int BackoffLmSri::GetSentenceIds(WordID *&wid, const string &sentence, bool bos, bool eos) +{ + if (NULL == sri_vocab) + return 0; + + int nw = 0; + static char str[max_words*16]; + static VocabString vstr[max_words-1]; + + strcpy(str,sentence.c_str()); // we need to copy since parseWords() modifies the string + nw = sri_vocab->parseWords(str, vstr, max_words - 1); + if (nw >= max_words-1) Error("too many words in one hypothesis\n"); + + int b=0; + // start sentence with BOS ? + if (bos) wid_table[b++]=sri_vocab->ssIndex(); + + sri_vocab->getIndices(vstr, (VocabIndex*) (wid_table+b), nw + 1, sri_vocab->unkIndex()); +#ifdef DEBUG + for (int i=0;iseIndex(); + + wid = wid_table; + return nw; +} diff --git a/BackoffLmSri.h b/BackoffLmSri.h new file mode 100644 index 0000000..37de379 --- /dev/null +++ b/BackoffLmSri.h @@ -0,0 +1,137 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _BackoffLmSri_h +#define _BackoffLmSri_h + +#include +#include + +#include "BackoffLm.h" +#include "Tools.h" + +// from SRILM +#include +#include +#include "WordList.h" + + +class BackoffLmSri : public BackoffLm { + private: + static const int max_words=16384; // max words in a sentence + Vocab *sri_vocab; + Ngram *sri_ngram; + int sri_order; + std::vector map_cslm2sri; // map internal CSLM indices to internal SRI VocabIndex + VocabIndex *sri_context_idxs; // internal storage of n-gram size + WordID wid_table[max_words]; // table of WordID in sentence + void BackoffLmSri_init(char *p_fname, int p_max_order); + + public: + BackoffLmSri(char *p_fname, int p_max_order) + : sri_vocab(NULL), sri_ngram(NULL), sri_order(0) {BackoffLmSri_init(p_fname, p_max_order); } + BackoffLmSri(char *p_fname, int p_max_order, const WordList &wlist); + virtual ~BackoffLmSri(); + inline virtual int GetOrder() { return sri_order; } + inline virtual WordID GetVocSize() { + return ((NULL != sri_vocab) ? sri_vocab->numWords() : 0); } + + /** + * gets WordID of words in sentence + * @param wid output table of WordID (allocated internally) + * @param sentence input sentence + * @param bos start sentence with BOS + * @param eos end sentence with EOS + * @return number of words + */ + virtual int GetSentenceIds(WordID *&wid, const std::string &sentence, bool bos, bool eos); + + virtual REAL BoffPw(char **ctxt, char *w, int req_order) // gets backoff LM P(w|ctxt) from sequence of words + { Error ("BoffPw() not implmented for SRIL LMs"); return 0; } + virtual REAL BoffLnPw(char **ctxt, char *w, int req_order) // idem but ln of P(w|ctxt) + // if the order of the back-off LM is smaller than we use the last n-1 words of the context + { Error ("BoffLnPw() not implmented for SRIL LMs"); return -99; } + virtual REAL BoffLnPid(REAL *ctxt, WordID predw, int req_order) + // gets LOG_e backoff LM proba from a sequence of CSLM indices + // if the order of the back-off LM is smaller than we use the last n-1 words of the context + // w1 w2 w3 -> w4 + // \ 2-gram / + // \-- 3-gram --/ + // \---- 4-gram ----/ + { +#ifdef DEBUG + printf ("\nrequest SRI %d-gram: %d ", req_order, (WordID) ctxt[0]); + for (int i=1; i %d \n", predw); +#endif + if (!sri_ngram) return NULL_LN_PROB; // return constant value if we have no LM + + // SRILM requires a context vector which contains the words in REVERSE order + int i; + for (i=0; i sri=%d, sri_idx=%d word=%s\n", j, i, sri_context_idxs[i], sri_vocab->getWord(sri_context_idxs[i]) ); + } + sri_context_idxs[i]=Vocab_None; // terminate, this is needed to specify the length of the context + //printf(" - predict cslm_id=%d, sri_idx=%d word=%s\n", predw, map_cslm2sri[predw], sri_vocab->getWord(map_cslm2sri[predw]) ); + +#ifdef DEBUG + printf(" - SRI context: "); + for (i=0; sri_context_idxs[i]!=Vocab_None; i++) { + printf(" %s [%d]", sri_vocab->getWord(sri_context_idxs[i]), sri_context_idxs[i] ); + } + printf(" -> %s [%d]", sri_vocab->getWord(map_cslm2sri[predw]), map_cslm2sri[predw]); + printf (", log10P=%e\n", sri_ngram->wordProb(map_cslm2sri[predw], sri_context_idxs)); +#endif + + // we need to convert from log_10 to ln + return M_LN10 * sri_ngram->wordProb(map_cslm2sri[predw], sri_context_idxs); + } + virtual REAL BoffPid(REAL *ctxt, WordID predw, int req_order) {return exp(BoffLnPid(ctxt,predw,req_order)); } + virtual REAL BoffLnStd(WordID *ctxt, WordID predw, int req_order) + { + // standard back-off n-gram wrapper, + // SRILM properly shortens the context if we request an n-gram with an order that is larger then the back-off LM + + if (!sri_ngram) return NULL_LN_PROB; // return constant value if we have no LM + + int i; + for (i=0; igetWord(sri_context_idxs[i]), sri_context_idxs[i] ); + } + printf(" -> %s [%d]", sri_vocab->getWord(predw), predw); + printf (", log10P=%e\n", sri_ngram->wordProb(predw, sri_context_idxs)); +#endif + return M_LN10 * sri_ngram->wordProb(predw, sri_context_idxs); // convert from log_10 to ln + } +}; + +#endif diff --git a/Blas.c b/Blas.c new file mode 100644 index 0000000..50ca989 --- /dev/null +++ b/Blas.c @@ -0,0 +1,34 @@ +/* + * This file is part of the continuous space language model toolkit for large + * vocabulary speech recognition and statistical machine translation. + * + * Copyright 2014, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * $Id: Blas.c,v 1.4 2014/02/03 15:35:59 coetmeur Exp $ + * + */ + +#include + +// basic implementation of fucntions on vectors +// It is more efficient to use vectorized functions, for instance those available in MKL +// + +void atlas_vtanh(int *n, float *d) {int i; for (i=0; i<*n; i++, d++) *d = tanh(*d); } +void atlas_vlog(int *n, float *d) {int i; for (i=0; i<*n; i++, d++) *d = log(*d); } +void atlas_vexp(int *n, float *d) {int i; for (i=0; i<*n; i++, d++) *d = exp(*d); } +void atlas_vsqr(int *n, float *d) {int i; for (i=0; i<*n; i++, d++) *d *= *d; } + diff --git a/Blas.h b/Blas.h new file mode 100644 index 0000000..ac4e404 --- /dev/null +++ b/Blas.h @@ -0,0 +1,187 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _Blas_h +#define _Blas_h + +#include // memcpy() +#include "Tools.h" + + +//------------------------------------------- +// support for Intel's MKL +//------------------------------------------- + +#ifdef BLAS_INTEL_MKL + extern int inc1; + #include "mkl_blas.h" + #include "mkl_vml.h" +// for single precision + #define COPY scopy // BLAS1 + #define ASUM sasum + #define AXPY saxpy + #define SCAL sscal + #define GEMV sgemv // BLAS2 + #define GER sger + #define GEMM sgemm // BLAS3 + // special vectorized functions of MKL +#if 0 + #define VSQR atlas_vsqr + #define VLOG atlas_vlog + #define VTANH atlas_vtanh + #define VEXP atlas_vexp + extern "C" void atlas_vtanh(int *n, float *d); + extern "C" void atlas_vlog(int *n, float *d); + extern "C" void atlas_vexp(int *n, float *d); + extern "C" void atlas_vsqr(int *n, float *d); +#else + #define VSQR(n,d) vssqr_(n,d,d) + #define VLOG(n,d) vslog_(n,d,d) + #define VTANH(n,d) vstanh_(n,d,d) + #define VEXP(n,d) vsexp_(n,d,d) +#endif + +#endif + +//------------------------------------------- +// support for Nvidia GPU cards +//------------------------------------------- + +#ifdef BLAS_CUDA + #include "Gpu.cuh" + #define COPY Gpu::CublasScopy // Blas1 + #define ASUM Gpu::CublasSasum + #define AXPY Gpu::CublasSaxpy + #define SCAL Gpu::CublasSscal + #define GEMV Gpu::CublasSgemv // Blas2 + #define GER Gpu::CublasSger + #define GEMM Gpu::CublasSgemm // Blas3 + + #define VSQR(n,d) nppsSqr_32f_I(d,*n) + #define VLOG(n,d) nppsLn_32f_I(d,*n) + #define VEXP(n,d) nppsExp_32f_I(d,*n) +#endif + +//------------------------------------------- +// support for standard BLAS +//------------------------------------------- + +#ifdef BLAS_ATLAS +extern "C" void sscal_(const int *n, float *a, const float *x, const int *incx); +extern "C" float sasum_(const int *n, const float *x, const int *incx); +extern "C" void saxpy_(const int *n, const float *a, const float *x, const int *incx, float *y, const int *incy); +extern "C" void scopy_(int *n, const float *x, int *incx, float *y, int *incy); +extern "C" void sgemv_(const char *trans, const int *m, const int *n, const float *alpha, + const float *a, const int *lda, const float *x, const int *incx, + const float *beta, float *y, const int *incy); +extern "C" void sger_(const int *m, const int *n, const float *alpha, + const float *x, const int *incx, const float *y, const int *incy, + float *A, const int *lda); +extern "C" void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, + const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, + const float *beta, float *c, const int *ldc); + #define COPY scopy_ + #define GEMV sgemv_ + #define GER sger_ + #define GEMM sgemm_ + #define AXPY saxpy_ + #define SCAL sscal_ + #define ASUM sasum_ + + extern int inc1; + + extern "C" void atlas_vtanh(int *n, float *d); + extern "C" void atlas_vlog(int *n, float *d); + extern "C" void atlas_vexp(int *n, float *d); + extern "C" void atlas_vsqr(int *n, float *d); + + #define VSQR atlas_vsqr + #define VLOG atlas_vlog + #define VTANH atlas_vtanh + #define VEXP atlas_vexp +#endif + +// matrix/vector multiplication: c = 1.0*A * b + 1.0 * c +// the matrix must be stored in COLUM MAJOR order + +/*--------------------------------------------------------------------------* + * + * Wrapper routine for GEMV function + * that uses the TRANSPOSED fortran routine + * + * dest = matrix * source + bias + * + * dest: dim_dest x 1 + * matrix: dim_dest x dim_src + * source: dim_src x 1 + * + *--------------------------------------------------------------------------*/ + +inline void call_gemv (REAL *dest, REAL *matrix, REAL *source, REAL *bias, + int dim_dest, int dim_src) +{ + char trans = 'N'; + REAL fact = 1.0; + int inc = 1; + + // int sgemv(char *trans, integer *m, integer *n, + // real *alpha, *real *a, integer *lda, + // real *x, integer *incx, real *beta, real *y, *integer *incy) + // + // y := alpha*A*x + beta*y + // m x n + + +#ifdef BLAS_CUDA + COPY(dim_dest,bias,inc,dest,inc); // TODO: verify + GEMV(trans, dim_dest, dim_src, fact, matrix, dim_dest, source, inc, fact, dest, inc); + Gpu::CheckError("call_gemv"); +#else + memcpy(dest, bias, dim_dest * sizeof(REAL)); + GEMV(&trans, &dim_dest, &dim_src, &fact, matrix, &dim_dest, source, &inc, &fact, dest, &inc); +#endif +} + + +// matrix/matrix multiplication: C = alpha*A * B + beta * C +// both must be stored in COLUM MAJOR order + +inline void call_gemm (REAL *C, REAL *A, REAL *B, REAL beta, int dimy, int dimx, int dimk) +{ + char transN = 'N'; + REAL alpha = 1.0; + + // gemm ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) + // * C = alpha*A * B + beta * b + // mxn mxk kxn + // lda ldb ldc + + TRACE("-mkl- call gemm\n"); +#ifdef BLAS_CUDA + GEMM (transN, transN, dimy, dimx, dimk, alpha, A, dimy, B, dimk, beta, C, dimy); + Gpu::CheckError("call_gemm"); +#else + GEMM (&transN, &transN, &dimy, &dimx, &dimk, &alpha, A, &dimy, B, &dimk, &beta, C, &dimy); +#endif +} + +#endif diff --git a/Data.cpp b/Data.cpp new file mode 100644 index 0000000..9ff85d7 --- /dev/null +++ b/Data.cpp @@ -0,0 +1,770 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +using namespace std; +#include +#include +#include +#include +#include +#include +#include + +#include "Tools.h" +#include "Data.h" +#include "DataAscii.h" +#include "DataAsciiClass.h" +#include "DataMnist.h" +#include "DataNgramBin.h" +#include "DataPhraseBin.h" + +const char* DATA_HEADER_TXT="DataDescr"; +const int DATA_HEADER_ID=3; +const char* DATA_PRELOAD="Preload"; +const int DATA_PRELOAD_ACT=8; // preloading is activated, additional flags: +const int DATA_PRELOAD_TODO=1; // mark that preloading was not yet done, we use this to avoid multiple (costly) preloading + // this flag is set by Next() -> new Rewind() triggers resampling +const int DATA_PRELOAD_ONCE=4; // we resample only once, even if rewind is called many times +const char* DATA_RESAMPL_MODE="ResamplMode"; +const char* DATA_RESAMPL_SEED="ResamplSeed"; +const char* DATA_SHUFFLE_MODE="ShuffleMode"; +const char* DATA_NORMALIZE_MODE="Normalize"; +const char* DATA_PATH_PREFIX="PathPrefix"; +const char* DATA_NB_FACTORS="NumberFactors"; +const char* DATA_SENTENCE_SCORES="SentenceScores"; +const char* DATA_AUXILIARY_DATA="AuxiliaryData"; +const char* DATA_BETWEEN_SENT_CONTEXT="BetweenSentenceContext"; +const char* DATA_WORD_LIST="WordList"; +const char* DATA_WORD_LIST_TARGET="WordListTarget"; +const char* DATA_WORD_LIST_SOURCE="WordListSource"; +const char* DATA_WORD_LIST_MULT="WordListMultiple"; +const char* DATA_WORD_LIST_UNSTABLE="UseUnstableSort"; // switch for compatibility with CSLM <= V3.0 to use unstable sort +const char* DATA_WITH_EOS="with_eos"; + +/************************** + * + **************************/ + +Data::Data() + : use_unstable_sort(false), + fname(NULL), path_prefix(NULL), idim(0), odim(0), auxdim(0), nb_SentSc(0), tgpos(-1), nb_totl(0), + preload(0),betweenSent_ctxt(0), resampl_mode(0), resampl_seed(1234567890), shuffle_mode(0), norm_mode(0), nb_factors(1), + sr_wl_idx(-1), tg_wl_idx(-1), current_df(0), idx(-1), + mem_cdf(NULL), mem_inp(NULL), mem_trg(NULL), input(NULL), target(NULL), aux(NULL) +{} + +Data::Data(const char *p_fname, Data *other_data, bool use_class) + : use_unstable_sort(false), + fname(p_fname), path_prefix(NULL), idim(0), odim(0), auxdim(0), nb_SentSc(0), tgpos(-1), nb_totl(0), + preload(0),betweenSent_ctxt(0), resampl_mode(0), resampl_seed(1234567890), shuffle_mode(0), norm_mode(0), nb_factors(1), + sr_wl_idx(-1), tg_wl_idx(-1), current_df(0), idx(-1), + mem_cdf(NULL), mem_inp(NULL), mem_trg(NULL), input(NULL), target(NULL), aux(NULL) +{ + ReadFile(other_data, use_class); +} + +void Data::ReadFile(Data *other_data, bool use_class) +{ + cout << "Opening data description '" << fname << "'" << endl; + ifstream ifs; + ifs.open(fname,ios::in); + CHECK_FILE(ifs,fname); + + // parsing data description + int i=ReadInt(ifs,DATA_HEADER_TXT); + if (i>DATA_HEADER_ID) Error("unknown data description header\n"); + + vector lang_used; // languages used for each data file (1: only one, 2: source and target, -1: several) + string auxdata_ext; + string SentSc_ext; + + while (!ifs.eof()) { + bool ok=false; + vector factored_df; + + string buf; char line[DATA_LINE_LEN]; + ifs >> buf; + if (buf[0]=='#') {ifs.getline(line, DATA_LINE_LEN); continue;} // skip comments + if (buf=="") break; // HACK + if (buf==DATA_PRELOAD) { preload=DATA_PRELOAD_ACT | DATA_PRELOAD_TODO; ok=true; } + if (buf==DATA_RESAMPL_MODE) { ifs >> resampl_mode; ok=true; } + if (buf==DATA_RESAMPL_SEED) { ifs >> resampl_seed; ok=true; } + if (buf==DATA_SHUFFLE_MODE) { ifs >> shuffle_mode; ok=true; } + if (buf==DATA_NORMALIZE_MODE) { ifs >> norm_mode; ok=true; } + if (buf==DATA_WORD_LIST_UNSTABLE) { use_unstable_sort=true; ok=true; } + if (buf==DATA_PATH_PREFIX) { + string tmp; + ifs >> tmp; ok=true; + cout << "Prefix for all data files: " << tmp << endl; + path_prefix=strdup(tmp.c_str()); // ugly + } + if (buf==DATA_NB_FACTORS) { + ifs >> nb_factors; + if (nb_factors<1) Error("The number of factors must be at least one"); + ok=true; + } + if (buf==DATA_SENTENCE_SCORES) { + string SentScInfo_buf; + getline(ifs,SentScInfo_buf); + stringstream SentScInfo_str(SentScInfo_buf); + SentScInfo_str >> nb_SentSc >> SentSc_ext >> ExpGrowth; + + if(!SentScInfo_str){ + nb_SentSc = 1; + SentSc_ext = "scores"; + ExpGrowth = 0.0; + } + if(ExpGrowth < 0 ) ExpGrowth = 0; + + if( ExpGrowth ) + cout<<"Resampling with ExpGrowth ("<> auxdim >> auxdata_ext; // read dimension and file extension + if (!dad_str) + auxdata_ext = "aux"; // use default extension + else if ('.' == auxdata_ext[0]) + auxdata_ext.erase(0, 1); + if (auxdim<1) Error("The auxiliary data dimension must be at least one"); + ok=true; + } + if (buf==DATA_WORD_LIST_SOURCE) { + sr_wl_idx = wlist.size(); + CreateWordList(ifs, sr_wl_idx, use_class); + ok=true; + } + if ( (buf==DATA_WORD_LIST ) + || (buf==DATA_WORD_LIST_TARGET) ) { + tg_wl_idx = wlist.size(); + CreateWordList(ifs, tg_wl_idx, use_class); + ok=true; + } + if (buf==DATA_WORD_LIST_MULT) { + size_t idx = -1; + ifs >> idx; + if (idx != (size_t)-1) { + CreateWordList(ifs, idx, use_class); + ok=true; + } + } + + if (buf==DATA_FILE_ASCII) { + factored_df.clear(); + factored_df.push_back(new DataAscii(path_prefix,ifs, auxdim, auxdata_ext, nb_SentSc, SentSc_ext, betweenSent_ctxt)); + for (int i=1; iGetTgPos(); + for (int i=1; i ifs is no more correct !!! + //cerr << "reading " << nb_factors-1 << " factor datafiles ..." << endl; + factored_df.push_back(new DataNgramBin(path_prefix,ifs, auxdim, auxdata_ext, nb_SentSc, SentSc_ext,betweenSent_ctxt, (DataNgramBin*)factored_df[0])); + } + datafile.push_back(factored_df); + lang_used.push_back(1); + ok=true; + } + + + if (buf==DATA_FILE_PHRASEBIN) { + factored_df.clear(); + for (int i = 0 ; i < nb_factors ; i++) { + factored_df.push_back(new DataPhraseBin(path_prefix, ifs, auxdim, auxdata_ext, nb_SentSc, SentSc_ext,betweenSent_ctxt, (0 < i) ? (DataPhraseBin*)factored_df[0] : NULL)); + if ((sr_wl_idx != (size_t)-1) && (tg_wl_idx != (size_t)-1)) + factored_df.back()->SetWordLists(&(wlist[sr_wl_idx]->at(i)), &(wlist[tg_wl_idx]->at(i))); + } + datafile.push_back(factored_df); + lang_used.push_back(2); + ok=true; + } + + /*if (datafile.size()==1) { + // input and output dimension is sum of factors + idim=odim=0; + for (vector::iterator it=datafile[0].begin(); it!=datafile[0].end(); ++it) { + idim+=(*it)->GetIdim(); + odim+=(*it)->GetOdim(); + } + }*/ + if (datafile.size()>=1) { + // check whether additional datafiles have the same dimensions (this is implicitly OK for factors) + // Loic -> why not checking factors also ? + // TODO: check nb of examples + idim=odim=0; + for(vector< vector >::iterator itdf=datafile.begin(); itdf!=datafile.end(); ++itdf){ + int nidim=0, nodim=0; + for (vector::iterator itfactor=(*itdf).begin(); itfactor!=(*itdf).end(); ++itfactor) { + nidim+=(*itfactor)->GetIdim(); + nodim+=(*itfactor)->GetOdim(); + } + if(idim==0 && odim==0){ + idim=nidim; odim=nodim; + } else { + if (idim != nidim) Error("Data::Readfile: mismatch in input dimension\n"); + if (odim != nodim) Error("Data::ReadFile: mismatch in output dimension\n"); + } + } + } + + if (!ok) { + ifs.getline(line, DATA_LINE_LEN); + cerr << buf << "" << line << endl; + Error("Data::ReadFile: parse error in above line of the datafile\n"); + } + } + ifs.close(); + if (0 > tgpos) + // set default target position + tgpos = idim; + + // check word lists + if (tg_wl_idx == (size_t)-1) { + for (vector::const_iterator ci = lang_used.begin(), cie = lang_used.end() ; ci != cie ; ci++) + if ((1 == *ci) || (2 == *ci)) // target word list is needed + Error("No target word list given\n"); + } + else if (other_data != NULL) { + size_t stNbWList = min(wlist.size(), other_data->wlist.size()); + for (size_t st = 0 ; st < stNbWList ; st++) { + if (st == sr_wl_idx) + CompareWordList(sr_wl_idx, *other_data, other_data->sr_wl_idx); + else if ((st == tg_wl_idx) && (sr_wl_idx != (size_t)-1)) + CompareWordList(tg_wl_idx, *other_data, other_data->tg_wl_idx); + else + CompareWordList(st, *other_data, st); + } + } + + nb_totl=0; + cout << "Summary of used data: (" << nb_factors << " factors)" << endl; + df_dim.resize(datafile.size()); + for (size_t df = 0, dfs = datafile.size() ; df < dfs ; df++) { + DataFile* dff = datafile[df].front(); + nb_totl+=dff->Info(); + if (nb_factors>1) { + for (i=1; iInfo(""); + } + } + df_dim[df].resize(wlist.size(), make_pair(0, NULL)); + switch(lang_used[df]) { + case 2: + df_dim[df][1].first = dff->GetOdim(); + df_dim[df][1].second = dff->target_vect; + case 1: + df_dim[df][0].first = dff->GetIdim(); + df_dim[df][0].second = dff->input; + break; + } + } + + cout << " - total number of examples: " << nb_totl << endl; + cout << " - dimensions: input=" << idim << ", output=" << odim << endl; + if (resampl_mode) { + cout << " - resampling with seed " << resampl_seed << endl; + srand48(resampl_seed); + } + if (preload > 0) { + printf(" - allocating preload buffer of %.1f GBytes\n", (REAL) ((size_t) nb_totl*idim*sizeof(REAL) / 1024 / 1024 / 1024)); + mem_cdf = new int[nb_totl]; + mem_inp = new REAL[(size_t) nb_totl*idim]; // cast to 64bit ! + if (odim>0) mem_trg = new REAL[(size_t) nb_totl*odim]; + + // check whether there is a resampling coeff != 0 + // i.e. we need to resample at each rewind + double s = 0.0; + for (FactoredDataFiles::iterator itf = datafile.begin(); itf!=datafile.end(); ++itf) + s+=(*itf)[0]->GetResamplCoef(); + if (s>=datafile.size()) { + preload|=DATA_PRELOAD_ONCE; + cout << " - all resampling coefficients are set to one, loading data once\n"; + } + + } + else { + if (norm_mode>0) + Error("Normalization of the data is only implemented with preloading\n"); + } + Preload(); + Shuffle(); +} + +/************************** + * + **************************/ + +Data::~Data() +{ + if (preload) { + delete [] mem_cdf; + delete [] mem_inp; + if (odim>0) delete [] mem_trg; + } + for (FactoredDataFiles::iterator itf = datafile.begin(); itf!=datafile.end(); ++itf) + for (vector::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) + delete (*it); + datafile.clear(); + for (size_t st = 0 ; st < wlist.size() ; st++) + DeleteWordList(wlist[st], w_shared[st], w_mutex[st]); + wlist.clear(); + w_mutex.clear(); + w_shared.clear(); + if (path_prefix) free(path_prefix); +} + + +/************************** + * + **************************/ + +void Data::Shuffle() +{ + if (shuffle_mode < 1 || !preload) return; + + time_t t_beg, t_end; + time(&t_beg); + + REAL *inp = new REAL[idim]; + REAL *trg = new REAL[odim]; + + cout << " - shuffling data " << shuffle_mode << " times ..."; + cout.flush(); + for (ulong i=0; i0) { + memcpy(trg, mem_trg + i1*odim, odim*sizeof(REAL)); + memcpy(mem_trg + i1*odim, mem_trg + i2*odim, odim*sizeof(REAL)); + memcpy(mem_trg + i2*odim, trg, odim*sizeof(REAL)); + } + + } + + delete [] inp; delete [] trg; + + time(&t_end); + time_t dur=t_end-t_beg; + + cout << " done (" << dur / 60 << "m" << dur % 60 << "s)" << endl; +} + +//************************** +// +// +/* + * Preload: read datafiles and put the content into mem_inp and mem_trg + * Factors are appended (not interleaved) + * + * */ +void Data::Preload() +{ + if (!preload) return; + if (! (preload&DATA_PRELOAD_TODO)) { + cout << " - all data is already loaded into memory" << endl; + return; + } + preload &= ~DATA_PRELOAD_TODO; // clear flag + + cout << " - loading all data into memory ..."; + ++Epoch_num; + cout.flush(); + time_t t_beg, t_end; + time(&t_beg); + + // rewind everything + for (FactoredDataFiles::iterator itf = datafile.begin(); itf!=datafile.end(); ++itf) { + for (vector::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) (*it)->Rewind(); + } + + int idx=0; + int cdf=0; + for (FactoredDataFiles::iterator itf = datafile.begin(); itf!=datafile.end(); ++itf, ++cdf) { + + // get the required number of examples from all factors + int n = -1, maxn = (*itf)[0]->GetNbresampl(); + int idim1=(*itf)[0]->GetIdim(); // dimension of one factor (identical for all, e.g. a 7-gram) + int odim1=(*itf)[0]->GetOdim(); + + while (++n < maxn) { + mem_cdf[idx] = cdf; + + bool ok=false; + while (!ok) { + // advance synchronously all factors until ok + for (vector::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) { + if (! (*it)->Next()) (*it)->Rewind(); // TODO: deadlock if file empty + } + + if( ((*itf)[0]->GetNB_SentScores() > 0) && ((*itf)[0]->GetResamplCoef() < 1.0 ) ){ + ok = (drand48() < ( (*itf)[0]->GetResamplScore() * (exp(- (float)ExpGrowth/Epoch_num)) ) ); + }else{ + ok = (drand48() < (*itf)[0]->GetResamplCoef()); + } + + } + + // copy all factors sequentially in memory + REAL *adr_inp=mem_inp+idx*idim; + REAL *adr_trg=mem_trg+idx*odim; + for (vector::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) { + memcpy(adr_inp, (*it)->input, idim1*sizeof(REAL)); + adr_inp+=idim1; + if (odim1 > 0) { + memcpy(adr_trg, (*it)->target_vect, odim1*sizeof(REAL)); + adr_trg+=odim1; + } + } + idx++; // next example + } + + } + + if (norm_mode & 1) { + cout << " subtract mean,"; cout.flush(); + for (int i=0; i0) + for (e=0, mptr=mem_inp+i; e%d):\n",mem_inp,idx,idim,odim); + for (int e=0; e::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) + (*it)->Rewind(); + } + idx = -1; +} + +/************************** + * Advance to next data + **************************/ +/* + * set 'input' and 'target' pointers to the next values (pointing into mem_inp and mem_trg) + * */ +bool Data::Next() +{ + if (idx >= nb_totl-1) return false; + idx++; + + if (preload) { + // just advance to next data in memory + input = &mem_inp[idx*idim]; + aux = (input + (idim - auxdim)); + if (odim>0) target = &mem_trg[idx*odim]; + current_df = mem_cdf[idx]; + + // handling multiple languages + const size_t nb_lang = df_dim[current_df].size(); + switch (nb_lang) { + default: { + REAL* cur_input = input; + for (size_t i = 0 ; nb_lang > i ; i++) { + df_dim[current_df][i].second = cur_input; + cur_input += df_dim[current_df][i].first; + } + } + break; + case 2: + df_dim[current_df][1].second = target; + case 1: + df_dim[current_df][0].second = input; + break; + } +//printf("DATA:"); for (int i =0; i1) + Error("multiple factors are only implemented with preloading"); + + if (shuffle_mode > 0) { + // resample in RANDOMLY SELECTED datafile until data was found + // we are sure to find something since idx was checked before + current_df = (int) (drand48() * datafile.size()); +//cout << " df=" << df << endl; + datafile[current_df][0]->Resampl(); + input = datafile[current_df][0]->input; + if (odim>0) target = datafile[current_df][0]->target_vect; + } + else { + // resample SEQUENTIALLY all the data files + static int i=-1, nbdf=datafile[current_df][0]->GetNbex(); + if (idx==0) {current_df = 0, i=-1, nbdf=datafile[current_df][0]->GetNbex(); } // (luint) this) is a hack to know when there was a global rewind + if (++i >= nbdf) { current_df++; nbdf=datafile[current_df][0]->GetNbex(); i=-1; } + if (current_df >= (int) datafile.size()) Error("internal error: no examples left\n"); +//printf("seq file: current_df=%d, i=%d\n", current_df,i); + datafile[current_df][0]->Resampl(); //TODO: idx= ?? +//cout << " got df=" << df << " idx="<input; + if (odim>0) target = datafile[current_df][0]->target_vect; + } + aux = (input + (idim - auxdim)); + + return true; +} + +//************************** +// +// + +void Data::CreateWordList(ifstream &ifs, size_t idx, bool use_class) +{ + // resize vectors + if (wlist.size() <= idx) { + size_t ns = (idx + 1); + wlist .resize(ns, NULL); + w_shared.resize(ns, NULL); + w_mutex .resize(ns, NULL); + } + vector *&iw = wlist[idx]; + pthread_mutex_t *&im = w_mutex[idx]; + int *&is = w_shared[idx]; + + if (im != NULL) + pthread_mutex_lock(im); + + // new word list + if (iw == NULL) + iw = new vector; + if (iw == NULL) + Error("Can't allocate word list"); + iw->reserve(nb_factors); + iw->resize(nb_factors); + vector vsPath(nb_factors, (NULL != path_prefix) ? (string(path_prefix) += '/') : string()); + stringbuf sb; + ifs.get(sb); + istream istr(&sb); + for (int i=0; i> fname; + vsPath[i] += fname; + } + string buf; + istr >> buf; + bool bUseEos = (DATA_WITH_EOS == buf); + for (int i=0; iat(i).SetSortBehavior(!use_unstable_sort); + WordList::WordIndex voc_size = iw->at(i).Read(vsPath[i].c_str(), use_class, bUseEos); + cout << ", got " << voc_size << " words" << endl; + } + + if (im != NULL) + pthread_mutex_unlock(im); + else { + // word list sharing + im = new pthread_mutex_t; + if (im != NULL) { + pthread_mutex_init(im, NULL); + int *new_is = new int; + if (new_is != NULL) { + (*new_is) = 0; + is = new_is; + } + } + } +} + +void Data::CompareWordList(size_t ii, Data &other_data, size_t ei) +{ + if ((ii >= this->wlist.size()) || (ei >= other_data.wlist.size())) + return; + vector *&iw = this->wlist[ii]; + vector * ew = other_data.wlist[ei]; + pthread_mutex_t *&im = this->w_mutex[ii]; + pthread_mutex_t * em = other_data.w_mutex[ei]; + int *&is = this->w_shared[ii]; + int * es = other_data.w_shared[ei]; + if ((iw == NULL) || (ew == NULL)) + return; + + // compare with other word list + size_t stCurWl = 0; + size_t stWlCountI = iw->size(); + size_t stWlCountE = ew->size(); + if (stWlCountI == stWlCountE) + for (; stCurWl < stWlCountI ; stCurWl++) { + WordList::WordIndex wiCur = 0; + WordList::WordIndex wiSize = (*iw)[stCurWl].GetSize(); + if (wiSize != (*ew)[stCurWl].GetSize()) + break; + for (; wiCur < wiSize ; wiCur++) { + WordList::WordInfo &wiInt = (*iw)[stCurWl].GetWordInfo(wiCur); + WordList::WordInfo &wiExt = (*ew)[stCurWl].GetWordInfo(wiCur); + if ((wiInt.id != wiExt.id) || (wiInt.n != wiExt.n) || (wiInt.cl != wiExt.cl) + || (strcmp(wiInt.word, wiExt.word) != 0) ) + break; + } + if (wiCur < wiSize) + break; + } + if ((stCurWl < stWlCountI) || (stCurWl < stWlCountE)) + Error("Word lists are not identical\n"); + else { + vector *old_iw = iw; + pthread_mutex_t *old_im = im; + int *old_is = is; + + // share other word list + int inc_is = 0; + if (em != NULL) { + pthread_mutex_lock(em); + inc_is = ((es != NULL) ? (*es) + 1 : 0); + if (inc_is > 0) { + (*es) = inc_is; + iw = ew; + is = es; + im = em; + } + pthread_mutex_unlock(em); + } + if (inc_is <= 0) + Error ("Can't share word list\n"); + else + // remove previous word list + DeleteWordList(old_iw, old_is, old_im); + } +} + +void Data::DeleteWordList(vector *&iw, int *&is, pthread_mutex_t *&im) +{ + vector *old_iw = iw; + pthread_mutex_t *old_im = im; + int *old_is = is; + is = NULL; + iw = NULL; + im = NULL; + + // verify if word list is shared + if (old_im != NULL) { + pthread_mutex_lock(old_im); + if (old_is != NULL) { + if ((*old_is) > 0) { + (*old_is)--; + pthread_mutex_unlock(old_im); + return; + } + else + delete old_is; + } + } + + if (old_iw != NULL) + delete old_iw; + + // destroy mutex + if (old_im != NULL) { + pthread_mutex_unlock(old_im); + pthread_mutex_destroy(old_im); + delete old_im; + } +} diff --git a/Data.h b/Data.h new file mode 100644 index 0000000..f6d1498 --- /dev/null +++ b/Data.h @@ -0,0 +1,139 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifndef _Data_h +#define _Data_h + +#include +#include +#include +#include +#include "Tools.h" +#include "DataFile.h" +#include "WordList.h" + +// Names of information in files + +#define DATA_LINE_LEN 16384 // extern const int gives internal gcc error for 4.7.2 +extern const char* DATA_HEADER_TXT; +extern const int DATA_HEADER_ID; +extern const char* DATA_PRELOAD; +extern const char* DATA_RESAMPL_MODE; +extern const char* DATA_RESAMPL_SEED; +extern const char* DATA_SHUFFLE_MODE; +extern const char* DATA_PATH_PREFIX; +extern const char* DATA_NB_FACTORS; +extern const char* DATA_SENTENCE_SCORES; +extern const char* DATA_AUXILIARY_DATA; +extern const char* DATA_WORD_LIST; +extern const char* DATA_WORD_LIST_TARGET; +extern const char* DATA_WORD_LIST_SOURCE; +extern const char* DATA_WORD_LIST_MULT; +extern const char* DATA_WITH_EOS; + +#define DATA_FILE_BUF_SIZE 16384 // read large chunks form file for faster processing (used by some classes) + + +/* + * Strategy + * - there is one function Rewind() and Next() which should not be overridden + * - they perform all the processing with preloading, shuffling, etc + * - the class specific processing is done in First() and Advance() + */ + +typedef vector< vector > FactoredDataFiles; + +class Data +{ +private: + void CreateWordList(ifstream &ifs, size_t idx, bool use_class); + void CompareWordList(size_t ii, Data &other_data, size_t ei); + static void DeleteWordList(vector *&iw, int *&is, pthread_mutex_t *&im); + bool use_unstable_sort; // switch for compatibility with CSLM <= V3.0 to use unstable sort +protected: + const char *fname; + char *path_prefix; // prefix added to all file names + int idim, odim; // dimensions + int auxdim; // auxiliary data dimension + int nb_SentSc; // Number of Sentence scores + int ExpGrowth; // V Value to be used for exponentielle growth for exp(-V/#Ep_nb) + int Epoch_num; // epoch number + int tgpos; // position of target + int nb_totl; // number of examples + // flags + int preload; // + int betweenSent_ctxt; // To be activated for continuous ngram between consecutive sentences + int resampl_mode; // + int resampl_seed; // + int shuffle_mode; // + int norm_mode; // evtl. perform normalization; bits: 1=subtract mean, 2=divide by var. + int nb_factors; // + + // word lists + vector*> wlist; + vector w_shared; // number of objects sharing word list + vector w_mutex; // mutex used to share word list + size_t sr_wl_idx, tg_wl_idx; // source and target word list index + // data files + FactoredDataFiles datafile; + vector > > df_dim; // data file buffer (dim and ptr) for each word list + int current_df; + // actual data + int idx; // index of current example [0,nb-1] + int *mem_cdf; // current data file for each example in memory + REAL *mem_inp; // all the input data in memory + REAL *mem_trg; // all the output data in memory + // constructor to create a void data object + Data(); + // method to read content of data file + virtual void ReadFile(Data *other_data = NULL, bool use_class = false); + // local tools, only used when preload is activated + void Preload(); // preload all data + void Shuffle(); // shuffle in memory +public: + Data(const char *fname, Data *other_data = NULL, bool use_class = false); + virtual ~Data(); + // access function to local variables + const char *GetFname() {return fname;} + int GetIdim() {return idim;} + int GetOdim() {return odim;} + int GetNbFactors() {return nb_factors;} + int GetNbSentSc() const { return nb_SentSc; } + int GetAuxdim() const { return auxdim; } + int GetTgPos() const { return tgpos; } + int GetNb() {return nb_totl;} + int GetIdx() {if (idx<0) Error("DataNext() must be called before GetIdx()"); return idx;}; + vector *GetSrcWList() {return ((sr_wl_idx!=(size_t)-1) ? wlist[sr_wl_idx] : NULL);} + vector *GetTgtWList() {return ((tg_wl_idx!=(size_t)-1) ? wlist[tg_wl_idx] : NULL);} + // the following two pointers are only valid after first DataNext() ! + REAL *input; // pointer to current inputs + REAL *target; // pointer to current target + REAL *aux; // pointer to current auxiliary data + //REAL *GetData() {return val;} + // main functions to access data + virtual int GetDim(size_t lg) const { return (((0 <= current_df) && (df_dim[current_df].size() > lg)) ? df_dim[current_df][lg].first : 0); } + virtual REAL * GetBuffer(size_t lg) { return (((0 <= current_df) && (df_dim[current_df].size() > lg)) ? df_dim[current_df][lg].second : NULL); } + virtual vector *GetWList(size_t lg) {return ((wlist.size() > lg) ? wlist[lg] : NULL);} + virtual void Rewind(); // rewind to first example, performs resampling, shuffling, etc if activated + virtual bool Next(); // advance to next example, return FALSE if at end +}; + +#endif diff --git a/DataAscii.cpp b/DataAscii.cpp new file mode 100644 index 0000000..1d4f530 --- /dev/null +++ b/DataAscii.cpp @@ -0,0 +1,147 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +using namespace std; +#include + +#include "Tools.h" +#include "Data.h" +#include "DataAscii.h" + +const char* DATA_FILE_ASCII="DataAscii"; + +DataAscii::DataAscii(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, const string& p_SentSc_ext,int p_betweenSentCtxt , DataAscii *prev_df) + : DataFile::DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext, p_betweenSentCtxt, prev_df) +{ + + char full_fname[max_word_len]=""; + + if (path_prefix) { + if (strlen(path_prefix)+strlen(fname)+2>(size_t)max_word_len) + Error("full filename is too long"); + + strcpy(full_fname, path_prefix); + strcat(full_fname, "/"); + } + strcat(full_fname, fname); + + dfs.open(full_fname,ios::in); + CHECK_FILE(dfs,full_fname); + + if (prev_df) { + nbex=prev_df->nbex; + idim=prev_df->idim; + odim=prev_df->odim; + printf(" - %s: ASCII data with %lu examples of dimension %d -> %d (factor)\n", fname, nbex, idim, odim); + } + else { + char buf[DATA_LINE_LEN]; + dfs.getline(buf,DATA_LINE_LEN); + sscanf(buf, "%lu %d %d", &nbex, &idim, &odim); + printf(" - %s: ASCII data with %lu examples of dimension %d -> %d\n", fname, nbex, idim, odim); + } + + if (idim>0) input = new REAL[idim + auxdim]; + if (odim>0) target_vect = new REAL[odim]; +} + + +/************************** + * + **************************/ + +DataAscii::~DataAscii() +{ + dfs.close(); + if (idim>0) delete [] input; + if (odim>0) delete [] target_vect; +} + + +/************************** + * + **************************/ + +void DataAscii::Rewind() +{ + dfs.seekg(0, dfs.beg); + char buf[DATA_LINE_LEN]; + dfs.getline(buf,DATA_LINE_LEN); + if (aux_fs.is_open()) + aux_fs.seekg(0, aux_fs.beg); +} + +/************************** + * + **************************/ + +bool DataAscii::Next() +{ + char line[DATA_LINE_LEN]; + dfs.getline(line, DATA_LINE_LEN); + if (dfs.eof()) return false; + else idx++; + + // parse input data + char *lptr=line; +//cout << "\nLINE: " << line << endl; + for (int i=0; i> input[idim + i]; + if (!aux_fs) + { + cout << " - Error in auxiliary data file: " << aux_fname << endl; + Error("Not enough auxiliary data available"); + } + } + } + + if (odim<=0) return true; + + // parse target data + for (int i=0; i +#include + +#include "DataFile.h" + +extern const char* DATA_FILE_ASCII; + +class DataAscii : public DataFile +{ +protected: + ifstream dfs; +public: + DataAscii(char* , ifstream &ifs, int, const string&, int, const string&,int , DataAscii* =NULL); // optional object to initialize when adding factors + virtual ~DataAscii(); + virtual void Rewind(); + virtual bool Next(); +}; + +#endif diff --git a/DataAsciiClass.cpp b/DataAsciiClass.cpp new file mode 100644 index 0000000..4071afd --- /dev/null +++ b/DataAsciiClass.cpp @@ -0,0 +1,93 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +using namespace std; +#include + +#include "Tools.h" +#include "Data.h" +#include "DataAsciiClass.h" + +const char* DATA_FILE_ASCIICLASS="DataAsciiClass"; + +DataAsciiClass::DataAsciiClass(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, const string& p_SentSc_ext,int p_betweenSentCtxt, DataAsciiClass *prev_df) + : DataAscii::DataAscii(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext, p_betweenSentCtxt, prev_df) +{ + + if (prev_df) { + tgt0=prev_df->tgt0; + tgt1=prev_df->tgt1; + printf(" targets %5.2f/%5.2f (factor)\n", tgt0, tgt1); + } + else { + ifs >> tgt0 >> tgt1; + printf(" targets %5.2f/%5.2f\n", tgt0, tgt1); + } +} + + +/************************** + * + **************************/ + +bool DataAsciiClass::Next() +{ + char line[DATA_LINE_LEN]; + dfs.getline(line, DATA_LINE_LEN); + if (dfs.eof()) return false; + else idx++; + + // parse input data + char *lptr=line; +//cout << "\nLINE: " << line << endl; + for (int i=0; i> input[idim + i]; + if (!aux_fs) + Error("Not enough auxiliary data available"); + } + } + + if (odim<=0) return true; + + // parse target data + while (*lptr==' ' || *lptr=='\t') lptr++; + if (!*lptr) Error("unable to parse target id in ASCII datafile"); + if (sscanf(lptr, "%d", &target_id)!=1) Error("parsing target in ASCII datafile"); + for (int t=0; t +#include + +#include "DataAscii.h" + +extern const char* DATA_FILE_ASCIICLASS; + +class DataAsciiClass : public DataAscii +{ +private: + REAL tgt0, tgt1; +public: + DataAsciiClass(char* , ifstream &ifs, int, const string&, int, const string&,int , DataAsciiClass* =NULL); // optional object to initialize when adding factors + virtual bool Next(); +}; + +#endif diff --git a/DataFile.cpp b/DataFile.cpp new file mode 100644 index 0000000..1c0fe04 --- /dev/null +++ b/DataFile.cpp @@ -0,0 +1,189 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +using namespace std; +#include +#include +#include +#include +#include +#include + +#include "Tools.h" +#include "Data.h" +#include "DataFile.h" + +DataFile::DataFile(char *p_path_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc,const string& p_SentSc_ext,int p_betweenSentCtxt , DataFile *prev_df) + : idim(0), odim(0), auxdim(p_aux_dim), nbex(0), resampl_coeff(1.0), path_prefix(p_path_prefix), fname(NULL), + nb_SentSc(p_nb_SentSc), betweenSent_ctxt(p_betweenSentCtxt), SentSc_ext(p_SentSc_ext), + idx(-1), input(NULL), target_vect(NULL), aux(NULL), target_id(0) +{ + char p_fname[DATA_LINE_LEN]; + + ifs >> p_fname; + // prev_df is usefull for factors, since we don't repeat the resampl_coef for each factor, same for aux data + if (prev_df) { + resampl_coeff = prev_df->resampl_coeff; + auxdim = prev_df->auxdim; + aux_fname = prev_df->aux_fname; + SentSc_fname = prev_df->SentSc_fname; + SentSc_ext = prev_df->SentSc_ext; + nb_SentSc = prev_df->nb_SentSc; + betweenSent_ctxt = prev_df->betweenSent_ctxt; + if (0 < auxdim) { + aux_fs.open(aux_fname.c_str(), ios::in); + CHECK_FILE(aux_fs, aux_fname.c_str()); + } + } + else { + ifs >> resampl_coeff; // read resampl coeff + SetAuxDataInfo(p_aux_dim, p_aux_ext, p_fname); + } + if (resampl_coeff<=0 || resampl_coeff>1) + Error("resampl coefficient must be in (0,1]\n"); + fname=strdup(p_fname); + + // memory allocation of input and target_vect should be done in subclass + // in function of the dimension and number of examples +} + +DataFile::DataFile(char *p_path_prefix, char *p_fname, const float p_rcoeff) + : idim(0), odim(0), auxdim(0), nbex(0), resampl_coeff(p_rcoeff), path_prefix(p_path_prefix), fname(NULL), + idx(-1), input(NULL), target_vect(NULL), aux(NULL), target_id(0) +{ + if (NULL != p_fname) + fname = strdup(p_fname); + + // memory allocation of input and target_vect should be done in subclass + // in function of the dimension and number of examples +} + +DataFile::~DataFile() +{ + if (fname) free(fname); + if (aux_fs.is_open()) + aux_fs.close(); + // memory deallocation of input and target_vect should be done in subclass + +} + +/** + * set auxiliary data information + * @param dim dimension + * @param ext file name extension + * @param fname file name (with other extension) + */ +void DataFile::SetAuxDataInfo(int dim, const string& ext, char* fname) +{ + // get dimension and file name + auxdim = dim; + if (NULL != path_prefix) { + aux_fname = path_prefix; + aux_fname += '/'; + } + else + aux_fname.clear(); + aux_fname += ((NULL != fname) ? fname : this->fname); + size_t dotpos = aux_fname.find_last_of('.'); + if (string::npos != dotpos) + aux_fname.replace(dotpos + 1, string::npos, ext); + else { + aux_fname += '.'; + aux_fname += ext; + } + + // open auxiliary file + if (aux_fs.is_open()) + aux_fs.close(); + if (0 < auxdim) { + cout << " - opening auxiliary data file " << aux_fname << endl; + aux_fs.open(aux_fname.c_str(), ios::in); + CHECK_FILE(aux_fs, aux_fname.c_str()); + } +} +/******************* + * return the resampling score. Return the avg if many + * ******************/ +double DataFile::GetResamplScore() +{ + int i; + double r_score=0.0; + + if( nb_SentSc == 1 ) return SentScores[0]; + + for(i=0;i + +// system headers +#include +#include +#include +#include + +#include "Tools.h" +#include "Data.h" +#include "DataMnist.h" + +const char* DATA_FILE_MNIST="DataMnist"; +const uint magic_mnist_data=0x00000803; +const uint magic_mnist_labels=0x00000801; + +/* + * + */ + +uint DataMnist::read_iswap(int fd) { + uint i, s; + unsigned char *pi=(unsigned char*) &i, *ps=(unsigned char*) &s; + + read(fd, &i, sizeof(i)); + + // swap integer Big Endian -> little Endian + ps[0]=pi[3]; ps[1]=pi[2]; ps[2]=pi[1]; ps[3]=pi[0]; + + return s; +} + +/* + * + */ + +DataMnist::DataMnist(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, string& p_SentSc_ext,int p_betweenSentCtxt, DataMnist *prev_df) + : DataFile::DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext, p_betweenSentCtxt, prev_df) +{ + char full_fname[max_word_len]=""; + + printf(" - %s: MNIST data ", fname); fflush(stdout); + + // open data file (fname is parsed by DataFile::DataFile() + if (path_prefix) { + if (strlen(path_prefix)+strlen(fname)+2>(size_t)max_word_len) + Error("full filename is too long"); + + strcpy(full_fname, path_prefix); + strcat(full_fname, "/"); + } + strcat(full_fname, fname); + + dfd=open(full_fname, O_RDONLY); + if (dfd<0) perror(""); + if (read_iswap(dfd) != magic_mnist_data) Error("magic number of data file is wrong"); + nbex = read_iswap(dfd); + idim = read_iswap(dfd) * read_iswap(dfd); + printf("with %lu examples of dimension %d\n", nbex, idim); + + // open corresponding label file + if (prev_df) { + cl_fname=prev_df->cl_fname; + odim=prev_df->odim; + tgt0=prev_df->tgt0; + tgt1=prev_df->tgt1; + printf(" %s with labels in %d classes, targets %5.2f %5.2f (factor)\n", cl_fname, odim, tgt0, tgt1); fflush(stdout); + } + else { + char p_clfname[DATA_LINE_LEN]; + ifs >> p_clfname >> odim >> tgt0 >> tgt1; + cl_fname=strdup(p_clfname); + printf(" %s with labels in %d classes, targets %5.2f %5.2f\n", cl_fname, odim, tgt0, tgt1); fflush(stdout); + } + + full_fname[0]=0; + if (path_prefix) { + if (strlen(path_prefix)+strlen(cl_fname)+2>(size_t)max_word_len) + Error("full filename is too long"); + + strcpy(full_fname, path_prefix); + strcat(full_fname, "/"); + } + strcat(full_fname, cl_fname); + + lfd=open(full_fname, O_RDONLY); + if (lfd<0) perror(""); + ulong val; + if (read_iswap(lfd) != magic_mnist_labels) Error("magic number of label file is wrong"); + if ((val=read_iswap(lfd)) != nbex) ErrorN("found %lu examples in label file", val); + + if (idim>0) { + input = new REAL[idim + auxdim]; + ubuf = new unsigned char[idim]; + } + if (odim>0) target_vect = new REAL[odim]; +} + + +/************************** + * + **************************/ + +DataMnist::~DataMnist() +{ + close(dfd); + close(lfd); + if (idim>0) { delete [] input; delete [] ubuf; } + if (odim>0) delete [] target_vect; + if (cl_fname) free(cl_fname); +} + + +/************************** + * + **************************/ + +void DataMnist::Rewind() +{ + lseek(dfd, 16, SEEK_SET); + lseek(lfd, 8, SEEK_SET); + if (aux_fs.is_open()) + aux_fs.seekg(0, aux_fs.beg); +} + +/************************** + * + **************************/ + +bool DataMnist::Next() +{ + + // read next image + int t=idim*sizeof(unsigned char); + if (read(dfd, ubuf, t) != t) return false; + + for (t=0; t 16 ? 1 : 0); + printf("\n"); + } +#endif + + // read auxiliary data + if (aux_fs.is_open()) { + for (int i = 0; i < auxdim ; i++) { + aux_fs >> input[idim + i]; + if (!aux_fs) + Error("Not enough auxiliary data available"); + } + } + + // read next class label + if (odim<=0) return true; + if (read(lfd, ubuf, 1) != 1) { + char msg[16384]; // TODO + sprintf(msg, "no examples left in class file %s", cl_fname); + Error(msg); + } + target_id = (int) ubuf[0]; + if (target_id>=odim) { + ErrorN("example %lu has a target of %d, but we have only %d classes\n", idx+1, target_id, odim); + } + for (t=0; t +#include + +#include "DataFile.h" + +extern const char* DATA_FILE_MNIST; + +class DataMnist : public DataFile +{ +protected: + int dfd; // file descriptor for data + int lfd; // file descriptor for labels + char *cl_fname; // file name of classes + REAL tgt0, tgt1; // low and high values of targets (e.g. -0.6/0.6 for tanh; 0/1 for softmax, ...) + unsigned char *ubuf; // input buffer + uint read_iswap(int); // read integer from file and swap bytes +public: + DataMnist(char *, ifstream &ifs, int, const string&, int, string&, int, DataMnist* =NULL); // optional object to initialize when adding factors + virtual ~DataMnist(); + virtual void Rewind(); + virtual bool Next(); +}; + +#endif diff --git a/DataNgramBin.cpp b/DataNgramBin.cpp new file mode 100644 index 0000000..4a31d09 --- /dev/null +++ b/DataNgramBin.cpp @@ -0,0 +1,366 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +using namespace std; +#include +#include +#include + +// system headers +#include +#include +#include + +#include "Tools.h" +#include "Data.h" +#include "DataNgramBin.h" +#include "WordList.h" + +const char* DATA_FILE_NGRAMBIN="DataNgramBin"; +const int DATA_NGRAM_IGN_SHORT=1; // ignore uncomplete n-grams + // if this options is not set, wholes will be filled with NULL_WORD + // in order to simulate shorter n-grams +const int DATA_NGRAM_IGN_UNK=2; // ignore n-grams with at last position +const int DATA_NGRAM_IGN_UNKall=4; // ignore n-grams that contain anywhere +const int DATA_NGRAM_IGN_EOS=8; // TODO: not implemented +const int DATA_NGRAM_IGN_ALL=15; + + +//******************* + +void DataNgramBin::do_constructor_work() +{ + // parse header of binary Ngram file + char full_fname[max_word_len]=""; + char full_SentSc_fname[max_word_len]=""; + + if (path_prefix) { + if (strlen(path_prefix)+strlen(fname)+2>(size_t)max_word_len) + Error("full filename is too long"); + + strcpy(full_fname, path_prefix); + strcat(full_fname, "/"); + } + strcat(full_fname, fname); + + if( (nb_SentSc > 0) ){ + strcpy(full_SentSc_fname, path_prefix); + strcat(full_SentSc_fname, "/"); + string rawname(fname); + int fileExtPos = rawname.find_last_of("."); + string rawfname = rawname.substr(0,fileExtPos+1); + strcat(full_SentSc_fname,rawfname.c_str()); + strcat(full_SentSc_fname,SentSc_ext.c_str()); + SentScores = new double[nb_SentSc]; // vector of scores (double) when we deal with more than 1 Sentence score + + if(resampl_coeff < 1 ){ + cout<<" - reading resampling scores from file "<< full_SentSc_fname<<" (using "< 0) && (resampl_coeff < 1.0 ) ){ + SentSc_fs.open(SentSc_fname.c_str(), ios::in); + CHECK_FILE(SentSc_fs, SentSc_fname.c_str()); + } + int s; + read(fd, &s, sizeof(int)); + if (s != sizeof(WordID)) { + ErrorN("binary n-gram data uses %d bytes per index, but this code is compiled for %d byte indices\n", s, (int) sizeof(WordID)); + } + read(fd, &bos, sizeof(WordID)); + read(fd, &eos, sizeof(WordID)); + read(fd, &unk, sizeof(WordID)); + printf(" - %s binary ngram file V%d with %lu words in %lu lines, order=%d, tgt_pos=%d, mode=%d (bos=%d, eos=%d, unk=%d)\n", fname, -id, nbex, nbl, order, tgpos, mode, bos, eos, unk); + + idim=order-1; + odim=1; + + if (idim>0) { + input = new REAL[idim + auxdim]; + wid = new WordID[order]; + if (! betweenSent_ctxt) + for (int i=0; inbex+nbl) + Error("Number of counted examples is larger than the information in file header !?"); + nbex=n; // + } +} + +//******************* + +DataNgramBin::DataNgramBin(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, const string& p_SentSc_ext, int p_betweenSentCtxt, DataNgramBin *prev_df) + : DataFile::DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext,p_betweenSentCtxt, prev_df), + order(0), tgpos(0), eospos(0), mode(0), nbw(0), nbs(0), nbu(0), nbi(0) +{ + // DataNgramBin [] + + // parse addtl params -> + if (prev_df) { + order=prev_df->order; // use same order, tgpos and mode than previous datafiles + tgpos=prev_df->tgpos; + mode=prev_df->mode; + } + else { //reading first DataFile followed by order, tgpos and mode + ifs >> order >> tgpos >> mode; // read order, tgpos and mode + + if( (ifs.rdstate() & std::ifstream::eofbit) || (ifs.rdstate() & std::ifstream::failbit ) ) { + Error("Bad file format, should be DataNgramBin [factor_file1 factor_file2 ...]\n" ); + } + } + if (order<2) + Error("order must be at least 2\n"); + if (tgpos<0 || tgpos>=order) + ErrorN("wrong value of target position: %d not in [0,%d]\n",tgpos,order-1); + if (mode<0 || mode>DATA_NGRAM_IGN_ALL) + Error("wrong value of DataNgramBin mode\n"); + + do_constructor_work(); +} + +//******************* + +DataNgramBin::DataNgramBin(char *p_fname, float p_rcoeff, int p_order) + : DataFile::DataFile(NULL, p_fname, p_rcoeff), + order(p_order), tgpos(p_order - 1), eospos(0), mode(3), nbw(0), nbs(0), nbu(0), nbi(0) +{ + + do_constructor_work(); + // skip counting for efficieny reasons + nbw=nbex+nbl; // this should be an upper bound on the number of n-grams +} + +//******************* + +DataNgramBin::DataNgramBin(char *p_fname, float p_rcoeff, int p_order, int p_tgpos, int p_mode) + : DataFile::DataFile(NULL, p_fname, p_rcoeff), + order(p_order), tgpos(p_tgpos), mode(p_mode), nbw(0), nbs(0), nbu(0), nbi(0) +{ + if (tgpos<0 || tgpos>=order) + ErrorN("wrong value of target position: %d not in [0,%d]\n",tgpos,order-1); + + do_constructor_work(); + // skip counting for efficieny reasons + nbw=nbex+nbl; // this should be an upper bound on the number of n-grams +} + +//******************* + +DataNgramBin::~DataNgramBin() +{ + + close(fd); + if (idim>0) { + delete [] wid; + delete [] input; + } + delete [] target_vect; +} + + +//******************* +/* + * Fill input and target_vec with data coming from the files + * Factors are appended (not interleaved) + * */ +bool DataNgramBin::Next() +{ + bool ok=false; + string line_sc; + int i; + + // we may need to skip some n-grams in function of the flags + while (!ok) { + + // read from file into, return if EOF + WordID w = NULL_WORD; + // if eos word has been read in previous loop, insert NULL_WORD until encounter a NULL target or NULL context + if ( (tgpos >= eospos) || (1 >= eospos) ) { + if ( (DATA_FILE_BUF_SIZE>0) ) { + if (! ReadBuffered(&w)) return false; + } + else { + if (read(fd, &w, sizeof(w)) != sizeof(w)) return false; + } + } + + // shift previous order + for (i=1; i read current sentence score + if(SentSc_fs.is_open()) { + std::getline(SentSc_fs,line_sc); + SentScores[0] = std::atof(line_sc.c_str() ); // TODO add support of multiple scores per line + } + + // read next line of auxiliary data + if (aux_fs.is_open()) { + for (i = 0; i < auxdim ; i++) { + aux_fs >> input[idim + i]; + if (!aux_fs) + { + cout << " - Error in auxiliary data file " << aux_fname << endl; + Error("Not enough auxiliary data available"); + } + } + } + continue; + } + + if (mode & DATA_NGRAM_IGN_UNK) { + // ignore n-grams with at last position + if (w == unk) { + nbi++; + continue; + } + } + + if (mode & DATA_NGRAM_IGN_UNKall) { + // ignore n-grams that contain anywhere + for (i=0; i +#include +#include + +#include "Data.h" +#include "DataFile.h" + +extern const char* DATA_FILE_NGRAMBIN; +// ID of binary formet, we use negative numbers so that we can detect old files which have no version ID +// (the first field is number of lines which should be positive) +// id nw nl voc_size id_byte beos eos unk +// no id: n/a int int int +// id=-2: int ulong ulong int int int int +#define DATA_FILE_NGRAMBIN_VERSION2 (-2) // introduced on Dec 02 2013 +#define DATA_FILE_NGRAMBIN_VERSION DATA_FILE_NGRAMBIN_VERSION2 +#define DATA_FILE_NGRAMBIN_HEADER_SIZE1 (2*sizeof(int)+sizeof(int)+sizeof(int)+3*sizeof(WordID)) +#define DATA_FILE_NGRAMBIN_HEADER_SIZE2 (sizeof(int)+2*sizeof(ulong)+sizeof(int)+sizeof(int)+3*sizeof(WordID)) + +// Syntax of a line in data description: +// DataNgramBin [] [flags] +// u: skip n-grams with at the right most position +// U: skip n-grams with anywhere +// b: skip n-grams with elsewhere than at the left most position +// e: skip n-grams with elsewhere than at the right most position + +class DataNgramBin : public DataFile +{ +private: + void do_constructor_work(); + int id; // ID to support different formats (see DATA_FILE_NGRAMBIN_VERSION) + int header_len; // length of header (in function of file version) + // read buffer for faster File IO + WordID buf_wid[DATA_FILE_BUF_SIZE]; + int buf_n; // actual size of data in buffer + int buf_pos; // current position + bool ReadBuffered(WordID *wid) { + if (++buf_pos>=buf_n) { + // read new block of data, we can get less than requested + buf_n = read(fd, buf_wid, DATA_FILE_BUF_SIZE*sizeof(WordID)) / sizeof(WordID); +//printf("put %d elements into buffer\n", buf_n); + if (buf_n<=0) return false; // no data left + buf_pos=0; + } + *wid=buf_wid[buf_pos]; + return true; + } +protected: + int fd; // UNIX style binary file + int vocsize; // vocab size (including , and ) + int order; // order of the ngrams + int tgpos; // position of target word in n-gram + int eospos; // position of eos word in n-gram + int mode; // see above for possible flags + WordID *wid; // whole n-gram context + WordID bos, eos, unk; // word ids of special symbols + // stats (in addition to nbex in mother class) + ulong nbl, nbw, nbs, nbu;// lines, words, sentences, unks + ulong nbi; // ignored n-grams +public: + explicit DataNgramBin(char*, ifstream&, int, const string&, int, const string&, int, DataNgramBin* =NULL); // optional object to initialize when adding factors + DataNgramBin(char*, float =1.0, int =4); + DataNgramBin(char*, float, int, int, int =3); + virtual ~DataNgramBin(); + virtual bool Next(); + virtual void Rewind(); + virtual WordID GetVocSize() {return vocsize;}; + int GetTgPos() const { return tgpos; } +}; + +#endif diff --git a/DataPhraseBin.cpp b/DataPhraseBin.cpp new file mode 100644 index 0000000..6c9567d --- /dev/null +++ b/DataPhraseBin.cpp @@ -0,0 +1,346 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +using namespace std; +#include + +// system headers +#include +#include +#include +#include + +#include "Tools.h" +#include "DataPhraseBin.h" + +const char* DATA_FILE_PHRASEBIN="DataPhraseBin"; +const int DATA_PHRASE_IGN_SHORT_SRC=1; // ignore phrase pairs for which the source phrase is to short +const int DATA_PHRASE_IGN_SHORT_TGT=16; // ignore phrase pairs for which the target phrase is to short +const int DATA_PHRASE_IGN_ALL=17; // all flags ORed together +const int max_buf_len=32; // maximum length of phrases that can be read + + +//******************* + +void DataPhraseBin::do_constructor_work() +{ + ulong n; + + char full_fname[max_word_len]=""; + + if (path_prefix) { + if (strlen(path_prefix)+strlen(fname)+2>(size_t)max_word_len) + Error("full filename is too long"); + + strcpy(full_fname, path_prefix); + strcat(full_fname, "/"); + } + strcat(full_fname, fname); + + // parse header binary Ngram file + + fd=open(full_fname, O_RDONLY); + if (fd<0) { + perror(full_fname); Error(); + } + int s; + read(fd, &s, sizeof(int)); + if (s != sizeof(WordID)) { + ErrorN("binary phrase data uses %d bytes per index, but this code is compiled for %d byte indices\n", s, (int) sizeof(WordID)); + } + + read(fd, &max_len, sizeof(int)); // maximal length of a phrase (source or target) + if (max_len<1 || max_len>255) { + ErrorN("binary phrase data: max length must be in 1..255\n"); + } + + // source side vocabulary infos + read(fd, &ivocsize, sizeof(int)); + read(fd, &ibos, sizeof(WordID)); // BOS + read(fd, &ieos, sizeof(WordID)); // EOS, not used + read(fd, &iunk, sizeof(WordID)); // UNK + iempty= NULL_WORD; // used to identify empty words in phrase + + // read source counts + inbphw = new int[max_len+1]; inbphw[0]=0; + for (s=1; s<=max_len; s++) {read(fd, inbphw+s, sizeof(int)); inbphw[0]+=inbphw[s]; } + printf(" - %s binary phrase pairs with %d entries of max length of %d, mode=%d\n", fname, inbphw[0], max_len, mode); + + // calc source cumulated counts + icnbphw = new int[max_len+1]; + icnbphw[1]=inbphw[1]; + for (s=2; s<=max_len; s++) icnbphw[s]=icnbphw[s-1]+inbphw[s]; + printf(" source: vocabulary of %d words (bos=%d, eos=%d, unk=%d, empty=%d)\n", ivocsize, ibos, ieos, iunk, iempty); + + // target side vocabulary infos + read(fd, &ovocsize, sizeof(int)); + read(fd, &obos, sizeof(WordID)); + read(fd, &oeos, sizeof(WordID)); + read(fd, &ounk, sizeof(WordID)); + oempty= NULL_WORD; // used to identify empty words in phrase + printf(" target: vocabulary of %d words (bos=%d, eos=%d, unk=%d, empty=%d)\n", ovocsize, obos, oeos, ounk, oempty); + + // read target counts + onbphw = new int[max_len+1]; onbphw[0]=0; + for (s=1; s<=max_len; s++) {read(fd, onbphw+s, sizeof(int)); onbphw[0]+=onbphw[s]; } + if (onbphw[0] != inbphw[0]) { + ErrorN("number of source phrase (%d) does not match the number of target phrases (%d)\n", inbphw[0], onbphw[0]); + } + + // calc target cumulated counts + ocnbphw = new int[max_len+1]; + ocnbphw[1]=onbphw[1]; + for (s=2; s<=max_len; s++) ocnbphw[s]=ocnbphw[s-1]+onbphw[s]; + + idim=src_phlen; + odim=tgt_phlen; + + if (idim>0) { + input = new REAL[idim + auxdim]; + } + + if (odim>0) { + target_vect = new REAL[odim]; + } + + // initialize read buffer + buf_n=0; buf_pos=-1; + + cout << " statistics:" << endl; + printf(" source:"); for (s=0; s<=max_len; s++) printf("%10d", inbphw[s]); printf("\n"); + printf(" target:"); for (s=0; s<=max_len; s++) printf("%10d", onbphw[s]); printf("\n"); + + if (mode==0 && src_phlen==max_len && tgt_phlen==max_len) { + // we will use all the data -> we can directly get the numbers of phrase pairs from the header information + nbi=0; + nbex=inbphw[0]; + printf(" - %lu phrase pairs of full length (from header)\n", nbex); + return; + } + + printf(" limiting phrase pairs to length %d/%d words, mode %d\n", src_phlen, tgt_phlen, mode); + if (src_phlen == tgt_phlen) { + // we can get an UPPER BOUND of the nbr of phrases directly from the file header + n = icnbphw[src_phlen] < ocnbphw[tgt_phlen] ? icnbphw[src_phlen] : ocnbphw[tgt_phlen]; + nbi=inbphw[0]-n; + printf(" header: upper bound of %lu phrase pairs (%d=%5.2f%% ignored)\n", n, nbi, 100.0*nbi/inbphw[0]); + } + + + // counting nbex to get true number of examples + cout << " counting ..."; cout.flush(); + time_t t_beg, t_end; + time(&t_beg); + + int nbi=0; n=0; + while (DataPhraseBin::Next()) n++; + nbi=inbphw[0]-n; + + time(&t_end); + time_t dur=t_end-t_beg; + printf(" %lu phrase pairs (%lum%lus, %d=%5.2f%% ignored)\n", n, dur/60, dur%60, nbi, 100.0*nbi/inbphw[0]); + + if (n>(ulong)inbphw[0]) + Error("Number of counted examples is larger than the information in file header !?"); + nbex=n; +} + +//******************* + +DataPhraseBin::DataPhraseBin(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, const string& p_SentSc_ext, int p_betweenSentCtxt, DataPhraseBin* prev_df) + : DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc,p_SentSc_ext,p_betweenSentCtxt, prev_df), + max_len(0), mode(0), src_phlen(0), tgt_phlen(0), + iwlist(NULL), inbphw(NULL), icnbphw(NULL), + owlist(NULL), onbphw(NULL), ocnbphw(NULL), + nbi(0) +{ + // DataPhraseBin [flags] + // parse addtl params + if (prev_df) { + src_phlen=prev_df->src_phlen; + tgt_phlen=prev_df->tgt_phlen; + mode=prev_df->mode; + } + else { + ifs >> src_phlen >> tgt_phlen >> mode; + if (src_phlen<1 || src_phlen>256) + Error("length of source phrases must be in [1,256]\n"); + if (tgt_phlen<1 || tgt_phlen>256) + Error("length of target phrases must be in [1,256]\n"); + if (mode<0 || mode>DATA_PHRASE_IGN_ALL) + Error("wrong value of DataPhraseBin mode\n"); + } + + do_constructor_work(); +} + +//******************* + +DataPhraseBin::DataPhraseBin(char *p_fname, float p_rcoeff, int p_src_phlen, int p_tgt_phlen, int p_mode) + : DataFile::DataFile(NULL, p_fname, p_rcoeff), + mode(p_mode), src_phlen(p_src_phlen), tgt_phlen(p_tgt_phlen), + iwlist(NULL), inbphw(NULL), icnbphw(NULL), + owlist(NULL), onbphw(NULL), ocnbphw(NULL) +{ + + do_constructor_work(); + // TODO: counting ? +} + +//******************* + +DataPhraseBin::~DataPhraseBin() +{ + + close(fd); + if (idim>0) delete [] input; + if (odim>0) delete [] target_vect; + if (inbphw) delete [] inbphw; + if (icnbphw) delete [] icnbphw; + if (onbphw) delete [] onbphw; + if (ocnbphw) delete [] ocnbphw; +} + +//******************* + +void DataPhraseBin::SetWordLists(WordList *p_iwlist, WordList *p_owlist) +{ + iwlist=p_iwlist; owlist=p_owlist; + if (iwlist->HasEOS()) { + iempty=iwlist->GetEOSIndex(); + printf (" - source word list uses special word (%d) for short phrases\n",iempty); + } + if (owlist->HasEOS()) { + oempty=owlist->GetEOSIndex(); + printf (" - target word list uses special word (%d) for short phrases\n",oempty); + } + +} + +//******************* + +bool DataPhraseBin::Next() +{ + bool ok=false; + WordID buf[max_buf_len]; + + // we may need to skip some phrase pairs in function of their length + while (!ok) { + int i; + uchar src_len, tgt_len; + + // read source phrase + if (!ReadBuffered(&src_len, sizeof(src_len))) return false; + + if ((int) src_len>max_buf_len) Error("The source phrase is too long, you need to recompile the program\n"); + if (!ReadBuffered((uchar*)buf, src_len*sizeof(WordID))) Error("DataPhraseBin::Next(): no source phrase left\n"); +#ifdef DEBUG + for (i=0; isrc_phlen) { + nbi++; // ignore: too many source words + ok=false; // won't be used, but we still need to read the target phrase to keep it in sync + } + else { + // copy source phrase into input vector + for (i=0; imax_buf_len) Error("The target phrase is too long, you need to recompile the program\n"); + if (!ReadBuffered((uchar*)buf, tgt_len*sizeof(WordID))) Error("DataPhraseBin::Next(): no target phrase left\n"); +#ifdef DEBUG + for (i=0; i tgt_phlen) { + nbi++; ok=false; continue; // ignore: too many target words + } + else { + // copy target phrase into output vector + for (i=0; i> input[idim + i]; + if (!aux_fs) + Error("Not enough auxiliary data available"); + } + } + +#ifdef DEBUG + printf("EX:"); + for (int i=0; i"); + for (int i=0; i +#include + +#include "Tools.h" +#include "Data.h" +#include "DataFile.h" + +#define DATA_FILE_PHRASE_BUF_SIZE (DATA_FILE_BUF_SIZE*sizeof(WordID)) // to have the same size than for DataNgraBin + +extern const char* DATA_FILE_PHRASEBIN; +typedef unsigned char uchar; + +// Syntax of a line in data description: +// DataPhraseBin [flags] +// 1: skip too short source phrases +// 16: skip too short target phrases +// Phrase pairs for which the source or target part is too long are always skipped +// (there is not reasonable way to "back-off" to a shorter phrase pair +// +// format of binary file +// header: (17 int = 68 bytes) +// int sizeof(WordID) +// int max_phrase_len +// uint voc_size \ source +// WordID unk, bos, eos WordIDs / +// int* array of number of source phrases for each length 1..max_phrase_len +// uint voc_size \ target +// WordID unk, bos, eos WordIDs / +// int* array of number of targ phrases for each length 1..max_phrase_len + +class DataPhraseBin : public DataFile +{ +private: + void do_constructor_work(); +protected: + int fd; // UNIX style binary file + int max_len; // max length wof words in phrase, read from file + int mode; // TODO + int src_phlen, tgt_phlen; // filter: max length of source and target phrases + // input + int ivocsize; // vocab size (including , and ) + WordList *iwlist; // pointer to source word list + WordID ibos,ieos,iunk; // word id of BOS, EOS and UNK in source vocabulary + WordID iempty; // word id of empty phrase (used to simulate shorter ones) + // set to EOS if present in vocabulary, NULL_WORD else + int *inbphw; // array[max_len+1] of nb of phrases per length + // indices start at 1, indice 0 gives the total count + int *icnbphw; // same, but cumulated number + // ouput + int ovocsize; // vocab size (including , and ) + WordList *owlist; // pointer to source word list + WordID obos,oeos,ounk; // word id of BOS, EOS and UNK in target vocabulary + WordID oempty; // word id of empty phrase (used to simulate shorter ones) + int *onbphw, *ocnbphw; + // stats (in addition to nbex in mother class) + int nbi; // ignored phrases (too long source or target part) + + // read buffer for faster File IO, we read BYTES not WordID !! + uchar buf_bytes[DATA_FILE_PHRASE_BUF_SIZE]; + int buf_n; // actual size of data in buffer + int buf_pos; // current position + bool ReadBuffered(uchar *data, size_t cnt) { +#if 0 + read(fd, data, cnt); +#else + for (size_t i=0; i=buf_n) { + // read new block of data, we can get less than requested + buf_n = read(fd, buf_bytes, DATA_FILE_PHRASE_BUF_SIZE); + if (buf_n<=0) return false; // no data left + buf_pos=0; + } + data[i]=buf_bytes[buf_pos]; + } +#endif + return true; + } +public: + DataPhraseBin(char*, ifstream&, int, const string&, int, const string&, int, DataPhraseBin* =NULL); // optional object to initialize when adding factors + DataPhraseBin(char*, float =1.0, int =5, int =5, int =17); + virtual void SetWordLists(WordList*, WordList*); + virtual ~DataPhraseBin(); + virtual bool Next(); + virtual void Rewind(); +}; + +#endif diff --git a/ErrFct.cpp b/ErrFct.cpp new file mode 100644 index 0000000..70ffef1 --- /dev/null +++ b/ErrFct.cpp @@ -0,0 +1,80 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "ErrFct.h" +#include "Blas.h" + +ErrFct::ErrFct (Mach &mach) + : dim(mach.GetOdim()), bsize(mach.GetBsize()), + output(mach.GetDataOut()), target(NULL) +{ +#ifdef BLAS_CUDA + gpu_conf = mach.GetGpuConfig(); + Gpu::SetConfig(gpu_conf); + grad = Gpu::Alloc(dim*bsize, "gradient in Error Function"); +#else + grad = new REAL[dim*bsize]; +#endif +} + +ErrFct::ErrFct (const ErrFct &efct) + : dim(efct.dim), bsize(efct.bsize), + output(efct.output), target(efct.target) +{ +#ifdef BLAS_CUDA + gpu_conf = efct.gpu_conf; + Gpu::SetConfig(gpu_conf); + grad = Gpu::Alloc(dim*bsize, "gradient in Error Function"); +#else + grad = new REAL[dim*bsize]; +#endif +} + +//************************************************************************************** + +REAL ErrFct::CalcValue(int eff_bsize) +{ + Error("ErrFct::CalcValue() should be overriden\n"); + return 0.0; +} + +void ErrFct::CalcValueBatch(int eff_bsize, REAL *res) +{ + Error("ErrFct::CalcValueBatch() should be overriden\n"); +} + +void ErrFct::CalcMax(int eff_bsize, REAL *res, int *idx) +{ + Error("ErrFct::CalcMax() should be overriden\n"); +} + +REAL ErrFct::CalcGrad(int eff_bsize) +{ + Error("ErrFct::CalcGrad() should be overriden\n"); + return 0.0; +} diff --git a/ErrFct.h b/ErrFct.h new file mode 100644 index 0000000..5c115b7 --- /dev/null +++ b/ErrFct.h @@ -0,0 +1,80 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * Class definition of a general error function + */ + +#ifndef _ErrFct_h +#define _ErrFct_h + +#include +#include "Tools.h" +#include "Mach.h" +#include "Data.h" + +#define LOG_PROBA_NONE 999 // special value of log proba to indicate that no calculation was done + // This happens e.g. for NULL_WORD + +class ErrFct +{ +private: +protected: + int dim; // output dimension of machine + int bsize; + REAL *output; // pointer to output data (stored in machine) + REAL *target; // pointer to target data (stored in trainer) + REAL *grad; // calculated gradient (stored in this class) +#ifdef BLAS_CUDA + size_t gpu_conf; // GPU configuration index; this is needed to run on multiple devices in parallel +#endif +public: + ErrFct(Mach&); + ErrFct(const ErrFct&); // we must redefine the copy constructor +#ifdef BLAS_CUDA + virtual ~ErrFct() { cublasFree(grad); } +#else + virtual ~ErrFct() { delete [] grad; } +#endif + void SetOutput(REAL *p_output) {output=p_output; } + void SetTarget(REAL *p_target) {target=p_target; } + REAL *GetGrad() {return grad; }; +#ifdef BLAS_CUDA + size_t GetGpuConfig() { return gpu_conf; } // return GPU configuration index used +#endif + virtual REAL CalcValue(int=0); // Calculate value of error function = sum over all examples in minibatch + virtual REAL CalcValueNull(int=0) { // special version that checks for NULL targets + Error("ErrFct::CalcValueNull() should be overriden\n"); return 0.0; + } + virtual void CalcValueBatch(int, REAL*); // Calculate value of error function, returns array for all values in mini batch + // (the vector must be allocated by the caller) + virtual void CalcMax(int, REAL*, int*); // returns max value (over all outputs) and index for each example in minibatch + // (the vectors must be allocated by the caller) + virtual REAL CalcGrad(int=0); // calculate NEGATIF gradient of error function + virtual REAL CalcGradNull(int=0) { // special version that checks for NULL targets + Error("ErrFct::CalcGradNull() should be overriden\n"); + return 0.0; + } +#ifdef BLAS_CUDA + virtual void CalcGradCumul(int eff_bsize) { Error("override ErrFct::CalcGradCumul()\n"); } + virtual void InitGradCumul() { Error("override ErrFct::SetGradCumul()\n"); } + virtual REAL GetGradCumul() { Error("override ErrFct::GetGradCumul()\n"); return 0; } +#endif +}; + +#endif diff --git a/ErrFctCrossEnt.cpp b/ErrFctCrossEnt.cpp new file mode 100644 index 0000000..86287a4 --- /dev/null +++ b/ErrFctCrossEnt.cpp @@ -0,0 +1,93 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "ErrFctCrossEnt.h" + +//********************************************************************************** +// E = sum_i d_i ln o_i +REAL ErrFctCrossEnt::CalcValue(int eff_bsize) { + REAL *optr=output; + REAL *tptr=target; + double err=0.0; + + if (eff_bsize<=0) eff_bsize=bsize; + for (int i=0; i 0 + * This is usually used with softmax outputs + */ + +#ifndef _ErrFctCrossEnt_h +#define _ErrFctCrossEnt_h + +#include +#include "Tools.h" +#include "ErrFct.h" + + +class ErrFctCrossEnt : public ErrFct +{ +public: + ErrFctCrossEnt(Mach &mach) : ErrFct(mach) {}; + virtual REAL CalcValue(int=0); // Calculate value of error function = sum over all examples in minibatch + virtual void CalcValueBatch(int, REAL*); // Calculate value of error function, returns array for all values in mini batch + // (the vector must be allocated by the caller) + virtual REAL CalcGrad(int=0); // calculate NEGATIF gradient of error function +}; + +#endif diff --git a/ErrFctMCE.cpp b/ErrFctMCE.cpp new file mode 100644 index 0000000..c5ede91 --- /dev/null +++ b/ErrFctMCE.cpp @@ -0,0 +1,116 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "ErrFctMCE.h" + +//************************************************************************************** + +REAL ErrFctMCE::CalcValue(int eff_bsize) { + REAL *optr=output; + REAL *tptr=target; + int nb_err=0; + + if (eff_bsize<=0) eff_bsize=bsize; + for (int b=0; b omax) {omax=*optr; oidx=i;} + if (*tptr > tmax) {tmax=*tptr; tidx=i;} +//printf("%f %f\n", *optr, *tptr); + optr++; tptr++; + } + if (oidx!=tidx) nb_err++; +//printf("b=%d, oidx=%d, tidx=%d, err=%d\n", b, oidx, tidx, nb_err); + } + + return (REAL) nb_err; +} + +//************************************************************************************** + +void ErrFctMCE::CalcValueBatch(int eff_bsize, REAL *res) { + REAL *optr=output; + REAL *tptr=target; + + if (eff_bsize<=0) eff_bsize=bsize; + for (int b=0; b omax) {omax=*optr; oidx=i;} + if (*tptr > tmax) {tmax=*tptr; tidx=i;} + optr++; tptr++; + } + *res++ = (oidx == tidx) ? 1 : 0; + } +} + + +//************************************************************************************** + +#if 0 // not used any more use CalcValueBatch instead +REAL ErrFctMCE::CalcValueNth(int idx) { + REAL *optr=output + idx*dim; + REAL *tptr=target + idx*dim; + + REAL omax=optr[0], tmax=tptr[0]; + int oidx=0, tidx=0; + for (int i=0; i omax) {omax=*optr; oidx=i;} + if (*tptr > tmax) {tmax=*tptr; tidx=i;} +//printf("%f %f\n", *optr, *tptr); + optr++; tptr++; + } + + return (oidx!=tidx) ? 1.0 : 0.0; +} +#endif + + +//************************************************************************************** +REAL ErrFctMCE::CalcGrad(int eff_bsize) { + REAL *optr=output; + REAL *tptr=target; + REAL *gptr=grad; + int nb_err=0; + + if (eff_bsize<=0) eff_bsize=bsize; + + for (int b=0; b omax) {omax=*optr; oidx=i;} + if (*tptr > tmax) {tmax=*tptr; tidx=i;} + *gptr++ = -(*optr++ - *tptr++); + } + if (oidx!=tidx) nb_err++; + } + return (REAL) nb_err; +} diff --git a/ErrFctMCE.h b/ErrFctMCE.h new file mode 100644 index 0000000..1b3f652 --- /dev/null +++ b/ErrFctMCE.h @@ -0,0 +1,45 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * Class definition of ``mean classification error'' function (MCE) + * we use MSE for training, but the value of the error function is + * the number of wrongly classified examples + */ + +#ifndef _ErrFctMCE_h +#define _ErrFctMCE_h + +#include +#include "Tools.h" +#include "ErrFct.h" + + +class ErrFctMCE : public ErrFct +{ +public: + ErrFctMCE(Mach &mach) : ErrFct(mach) {}; + virtual REAL CalcValue(int=0); // Calculate value of error function = sum over all examples in minibatch + virtual void CalcValueBatch(int, REAL*); // Calculate value of error function, returns array for all values in mini batch + // (the vector must be allocated by the caller) + virtual REAL CalcGrad(int=0); // calculate NEGATIF gradient of error function +}; + +#endif diff --git a/ErrFctMSE.cpp b/ErrFctMSE.cpp new file mode 100644 index 0000000..5d9e09f --- /dev/null +++ b/ErrFctMSE.cpp @@ -0,0 +1,97 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "ErrFctMSE.h" + +//************************************************************************************** + +REAL ErrFctMSE::CalcValue(int eff_bsize) { + REAL mse=0.0, val; + REAL *optr=output; + REAL *tptr=target; + + if (eff_bsize<=0) eff_bsize=bsize; + for (int i=0; iGetClassSizes(); + n_classes = class_sizes.size(); + +#ifdef BLAS_CUDA + if (grad_class) + cudaFree(grad_class); + grad_class = Gpu::Alloc(n_classes*bsize, "class gradient in ErrFctSoftmClassCrossEntNgram"); + // allocate GPU memory to store errors (class and conditional NLLs) + // and host memory to transfer it + if (err) + cudaFree(err); + err = Gpu::Alloc(2, "sum of class NLL, then sum of conditional NLL"); + + if (host_err) + delete [] host_err; + host_err = new REAL[2]; +#else + if (grad_class) + delete [] grad_class; + grad_class = new REAL[n_classes*bsize]; +#endif + + // Build the output layer (softmax with classes) + mach_class->SetUp(wlist); +} + +//************ +// this->class_output contains the probability of each of the n_classes class, +// for all examples in the minibatch. +// this->output contains, for each example in the minibatch, a collection of +// conditional probability vectors, one for each class: for each word i +// in that class c, P(w=i|c, h), the conditional probability that the next +// word (w) is i, given the class c and given the context (h). +// In this function, only the conditional probabilities for words belonging to the +// class of the target word, c(t), are correct (other are garbage). +REAL ErrFctSoftmClassCrossEntNgram::CalcValue(int eff_bsize) +{ + double err_value = 0.; + if (eff_bsize <= 0) + eff_bsize = bsize; + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + // The first component of the cost is the NLL of the correct class + err_value += Gpu::ErrFctSoftmCrossEntNgramCalcValue(eff_bsize, n_classes, + output_class, target_class); + // Second part is the conditional NLL of the correct word in the class. + err_value += Gpu::ErrFctSoftmCrossEntNgramCalcValue(eff_bsize, dim, output, target); +#else + REAL *tcptr=target_class; + REAL *ocptr=output_class; + REAL *tptr = target; + REAL *optr = output; + for (int b=0; b max_oclass) { + argmax = i; + max_oclass = ocptr[i]; + } + } + + if ((int) *tcptr != argmax) + err_value++; + + + ocptr += n_classes; + tcptr++; + } +#endif + return (REAL) err_value; +} diff --git a/ErrFctSoftmClassCrossEntNgram.h b/ErrFctSoftmClassCrossEntNgram.h new file mode 100644 index 0000000..732ba5e --- /dev/null +++ b/ErrFctSoftmClassCrossEntNgram.h @@ -0,0 +1,103 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * Cross-entropy error function when the probability of a word is factored + * into probability of a category times probability of the word given the category. + * The category predictor is a softmax, and so is each of the word predictors. + * + * This class should be used in conjunction with MachSoftmaxClass. + */ + +#ifndef _ErrFctSoftmClassCrossEntNgram_h +#define _ErrFctSoftmClassCrossEntNgram_h + +#include "ErrFct.h" +#include "MachSoftmaxClass.h" + + +class ErrFctSoftmClassCrossEntNgram : public ErrFct +{ +private: + // Buffer for the class index of target words + REAL* target_class; + // Buffer for cached information about the target class: offset (wrt the full output) + // and number of words in that class. This buffer is always in host memory, + // as we need to perform pointer arithmetic on it. + int* target_class_info; + // Buffer for the predicted class probabilities + REAL* output_class; + // Buffer for the gradient wrt these probabilities + REAL* grad_class; + +#ifdef BLAS_CUDA + REAL* err; + REAL* host_err; +#endif + +protected: + int n_classes; + std::vector class_sizes; + WordList* wlist; + MachSoftmaxClass* mach_class; + +public: + ErrFctSoftmClassCrossEntNgram(Mach &mach); + ErrFctSoftmClassCrossEntNgram(const ErrFctSoftmClassCrossEntNgram&); + virtual ~ErrFctSoftmClassCrossEntNgram(); + virtual void SetUp(MachSoftmaxClass* mach_class, WordList* wlist); + + virtual void SetOutputClass(REAL* p_output_class) + { + output_class = p_output_class; + } + virtual void SetTargetClassInfo(REAL* p_target_class, int* p_target_class_info) + { + target_class = p_target_class; + target_class_info = p_target_class_info; + // The MachSoftmaxClass needs to know where the target class is, + // so it can compute only the conditional probabilities of words + // in that class. It does not actually need the index, just the info. + if (mach_class) { + mach_class->SetTargetInfo(p_target_class_info); + } + } + virtual REAL* GetGradClass() + { + return grad_class; + } + + virtual REAL CalcValue(int=0); // Calculate value of error function = sum over all examples in minibatch + virtual void CalcValueBatch(int, REAL*); // Calculate value of error function, returns array for all values in mini batch + // (the vector must be allocated by the caller) + virtual void CalcMax(int, REAL*, int*); // returns max value (over all outputs) and index for each example in minibatch + // (the vectors must be allocated by the caller) + + // calculate NEGATIVE gradient of error function + virtual REAL CalcGrad(int eff_bsize=0); + // special version that checks for NULL targets + virtual REAL CalcGradNull(int eff_bsize=0); + + // Compute classification error on word classes + virtual REAL CalcWordClassError(int eff_bsize=0); + +}; + +#endif diff --git a/ErrFctSoftmCrossEntNgram.cpp b/ErrFctSoftmCrossEntNgram.cpp new file mode 100644 index 0000000..552edbe --- /dev/null +++ b/ErrFctSoftmCrossEntNgram.cpp @@ -0,0 +1,262 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "ErrFctSoftmCrossEntNgram.h" +#include "Blas.h" + +ErrFctSoftmCrossEntNgram::ErrFctSoftmCrossEntNgram(Mach &mach) + : ErrFct(mach) +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + err = Gpu::Alloc(1, "ErrFctSoftmCrossEntNgram: err variable"); +#endif +} + +ErrFctSoftmCrossEntNgram::ErrFctSoftmCrossEntNgram(const ErrFctSoftmCrossEntNgram &efct) + : ErrFct(efct) +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + err = Gpu::Alloc(1, "ErrFctSoftmCrossEntNgram: err variable"); +#endif +} + +ErrFctSoftmCrossEntNgram::~ErrFctSoftmCrossEntNgram() +{ +#ifdef BLAS_CUDA + if (err) cudaFree(err); +#endif +} + +//*********************************************************************************r +// E = log(sum_i d_i ln o_i) +// = ln o_t where t is the target index +// output: dimension voc_size +// target: dimension 1 with values [0,voc_size[ +// We also take the log since this can't be done later if bsize>1 + +REAL ErrFctSoftmCrossEntNgram::CalcValue(int eff_bsize) +{ + if (eff_bsize<=0) eff_bsize=bsize; + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + return Gpu::ErrFctSoftmCrossEntNgramCalcValue(eff_bsize, dim, output, target); +#else + REAL *tptr=target; + REAL *optr=output; + double lerr=0.0; + + for (int b=0; bmax) { max=*optr; idx=i; } + } + *res++ = max; + *pos++ = idx; + } +#endif +} + + + + +//********************************************************************************** +// returns the target for one example in the minibatch +// idx should be in [0,bsize) +// (special version which handles NULL_WORDs) + +#if 0 // not used any more use CalcValueBatch instead +REAL ErrFctSoftmCrossEntNgram::CalcValueNth(int idx) +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + return Gpu::ErrFctSoftmCrossEntNgramCalcValueNth(eff_bsize, dim, output, target); +#else + REAL *optr=output + idx*dim; // softmax dim + REAL *tptr=target + idx*1; // target dim is 1 ! + + if ((int) *tptr == NULL_WORD) return -1; + return safelog(optr[(int) *tptr]); +#endif +} +#endif + + +// We include here the derivation of the softmax outputs since we have +// dE/da_k = sum_i dE/do_i do_i/da_k +// Due to the sum, dE/do_i and do_i/da_k can't be calculated separately +// dE/do_i = d_i/o_i +// do_i/da_k = o_i (kronecker_ik - o_k) +// -> dE/da_k = sum_i d_i/o_i * o_i (kronecker_ik - o_k) +// = sum_i d_i (kronecker_ik - o_k) +// = (kronecker_tk - o_k) since d_i=0 for i!=t + +REAL ErrFctSoftmCrossEntNgram::CalcGrad(int eff_bsize) +{ + if (eff_bsize<=0) eff_bsize=bsize; + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + Gpu::ErrFctSoftmCrossEntNgramCalcGrad(eff_bsize, dim, output, grad, target, err); + REAL res = 0; + Gpu::MemcpyAsync(&res, err, sizeof(REAL), cudaMemcpyDeviceToHost); + Gpu::StreamSynchronize(); + return res; +#else + + REAL *optr=output; + REAL *gptr=grad; + REAL *tptr=target; + uint tidx; + err=0.0; + int n=eff_bsize*dim; REAL f1=-1.0; + + memcpy(grad,output,n*sizeof(REAL)); + SCAL(&n,&f1,grad,&inc1); + for (int b=0; b 0 + * This is usually used with softmax outputs + */ + +#ifndef _ErrFctSoftmCrossEntNgram_h +#define _ErrFctSoftmCrossEntNgram_h + +#include +#include "Tools.h" +#include "ErrFct.h" +#ifdef BLAS_CUDA +# include "Gpu.cuh" +#endif + + +class ErrFctSoftmCrossEntNgram : public ErrFct +{ +private: + // the private var "dim" is set to the dimension of the data by the constructor ErrFct() + // this is usually a large softmax layer + // The dimension of the targets itslef is always ONE since we use the index in the word list ! +#ifdef BLAS_CUDA + REAL *err; // The last value computed by CalcGrad +#else + REAL err; +#endif +public: + ErrFctSoftmCrossEntNgram(Mach &mach); + ErrFctSoftmCrossEntNgram(const ErrFctSoftmCrossEntNgram&); + virtual ~ErrFctSoftmCrossEntNgram(); + virtual REAL CalcValue(int=0); // Calculate value of error function = sum over all examples in minibatch + virtual REAL CalcValueNull(int=0); // special version that checks for NULL targets + virtual void CalcValueBatch(int, REAL*); // Calculate value of error function, returns array for all values in mini batch + // (the vector must be allocated by the caller) + virtual void CalcMax(int, REAL*, int*); // returns max value (over all outputs) and index for each example in minibatch + // (the vectors must be allocated by the caller) + virtual REAL CalcGrad(int=0); // calculate NEGATIF gradient of error function + virtual REAL CalcGradNull(int=0); // special version that checks for NULL targets +#ifdef BLAS_CUDA + virtual void CalcGradCumul(int eff_bsize) { + if (eff_bsize<=0) eff_bsize=bsize; + Gpu::ErrFctSoftmCrossEntNgramCalcGradCumul(eff_bsize, dim, output, grad, target); + } + virtual void InitGradCumul() { Gpu::ResSet(0.0); }; + virtual REAL GetGradCumul() { return Gpu::ResGet(); }; +#endif +}; + +#endif diff --git a/ErrFctSoftmCrossEntNgramMulti.cpp b/ErrFctSoftmCrossEntNgramMulti.cpp new file mode 100644 index 0000000..8047242 --- /dev/null +++ b/ErrFctSoftmCrossEntNgramMulti.cpp @@ -0,0 +1,183 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "ErrFctSoftmCrossEntNgramMulti.h" +#include "Blas.h" + +#ifdef BLAS_CUDA +#include "Gpu.cuh" +#endif + +ErrFctSoftmCrossEntNgramMulti::ErrFctSoftmCrossEntNgramMulti(Mach &mach, int n) + : ErrFctSoftmCrossEntNgram(mach), nb(n) // this allocates memory before we change the variable "dim" +{ + if (mach.GetOdim()%nb != 0) + Error("ErrFctSoftmCrossEntNgramMulti: output layer size is not an integer multiple"); + dim = mach.GetOdim() / nb; +} + + +//********************************************************************************** +// E = log(sum_i d_i ln o_i) +// = ln o_t where t is the target index +// output: dimension voc_size +// target: dimension 1 with values [0,voc_size[ +// We also take the log since this can't be done later if bsize>1 + +REAL ErrFctSoftmCrossEntNgramMulti::CalcValue(int eff_bsize) +{ + if (eff_bsize<=0) eff_bsize=bsize; + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + Error("TODO"); + return 0; + //return Gpu::ErrFctSoftmCrossEntNgramMultiCalcValue(eff_bsize, dim, nb, output, target); +#else + REAL *tptr=target; + REAL *optr=output; + double err=0.0; + + for (int b=0; b1 + +#if 0 // not used any more use CalcValueBatch instead +REAL ErrFctSoftmCrossEntNgramMulti::CalcValueNth(int idx) +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + Error("CUDA: ErrFctSoftmCrossEntNgramMulti::CalcValueNth() not implemented"); + return 0.0; +#else + Error("ErrFctSoftmCrossEntNgramMulti::CalcValueNth() not yet implemented"); + REAL *optr=output + idx*nb*dim; + REAL *tptr=target + idx*nb; + + double err=0.0; + for (int d=0; d dE/da_k = sum_i d_i/o_i * o_i (kronecker_ik - o_k) +// = sum_i d_i (kronecker_ik - o_k) +// = (kronecker_tk - o_k) since d_i=0 for i!=t +REAL ErrFctSoftmCrossEntNgramMulti::CalcGrad(int eff_bsize) +{ + if (eff_bsize<=0) eff_bsize=bsize; + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + REAL err = Gpu::ErrFctSoftmCrossEntNgramMultiCalcGrad(eff_bsize, dim, nb, output, grad, target); + return err; +#else + + REAL *optr=output; + REAL *gptr=grad; + REAL *tptr=target; + int tidx; + REAL err=0.0; + int n=eff_bsize*nb*dim; + REAL f1=-1.0; + + memcpy(grad,output,n*sizeof(REAL)); + SCAL(&n,&f1,grad,&inc1); // TODO: can be speed-up since many phrase are incomplete + for (int b=0; b 0 + * This is usually used with softmax outputs + */ + +#ifndef _ErrFctSoftmCrossEntNgramMulti_h +#define _ErrFctSoftmCrossEntNgramMulti_h + +#include +#include "Tools.h" +#include "ErrFct.h" +#include "ErrFctSoftmCrossEntNgram.h" +#ifdef BLAS_CUDA +# include "Gpu.cuh" +#endif + + +class ErrFctSoftmCrossEntNgramMulti : public ErrFctSoftmCrossEntNgram +{ +private: + int nb; // number of separate output n-gram each of dimension "dim" + // -> therefore the total size of the gradient is nb*dim !! +public: + ErrFctSoftmCrossEntNgramMulti(Mach &mach, int n); + virtual REAL CalcValue(int=0); // Calculate value of error function = sum over all examples in minibatch + virtual void CalcValueBatch(int, REAL*); // Calculate value of error function, returns array for all values in mini batch + // (the vector must be allocated by the caller) + virtual void CalcMax(int, REAL*, int*) { printf ("ERROR: Unimplemenetd function");}; // returns max value (over all outputs) and index for each example in minibatch + // (the vectors must be allocated by the caller) + virtual REAL CalcGrad(int=0); // calculate NEGATIF gradient of error function +#ifdef BLAS_CUDA + virtual void InitGradCumul() { Gpu::ResSet(0.0); }; + virtual REAL GetGradCumul() { return Gpu::ResGet(); }; +#endif +}; + +#endif diff --git a/Gpu.cuh b/Gpu.cuh new file mode 100644 index 0000000..1f3eae8 --- /dev/null +++ b/Gpu.cuh @@ -0,0 +1,324 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + + +#ifndef _Gpu_cuh +#define _Gpu_cuh + +#include "Tools.h" +#include +#include +#include +#include +#include + +#define GPU_CUBLAS_V2 +#ifdef GPU_CUBLAS_V2 +#include +#endif + +#define CUDA float // memory on the GPU card +#define CUDA_SIZE (sizeof(float)) // memory on the GPU card + +extern curandGenerator_t cuda_gen; +extern string cuda_user_list; ///< user specified list of GPUs + +/** + * provides an interface to use Gpu with Cuda + */ +class Gpu +{ +public: + /** + * initializes Cuda and creates lock files + * @note selects first device and stream + * @returns configuration index 0 + */ + static size_t Init(); + + /** + * removes lock-files and deletes all configurations + */ + static void Unlock(); + + /** + * creates a new Gpu stream on next device + * @note selects the next device and the new stream + * @returns new configuration index + */ + static size_t NewConfig(); + + /** + * gets current configuration index + */ + static inline size_t GetConfig() { + return Gpu::curConfIndex; } + + /** + * sets current device and stream + * @param stCfg index of configuration to use + */ + static inline void SetConfig(size_t stCfg) { + if (stCfg != Gpu::curConfIndex) ChangeConfig(stCfg % Gpu::vConfigs.size()); } + + /** + * gets number of devices + */ + static inline size_t GetDeviceCount() { + return vDevices.size(); } + + /** + * gets device index + * @param stCfg index of configuration + */ + static inline size_t GetDevice(size_t stCfg) { + return Gpu::vConfigs[stCfg % Gpu::vConfigs.size()].devId; } + + /** + * sets current device with default stream + * @param stDevId device index + */ + static void SetDevice(size_t stDevId); + + /** + * gets Cuda device number + * @param stDevId device index + */ + static inline int GetCudaDevice(size_t stDevId) { + return Gpu::vDevices[stDevId % Gpu::vDevices.size()].number; } + + /** + * gets device properties + * @param stCfg index of configuration + */ + static inline const cudaDeviceProp& GetDeviceProp(size_t stCfg) { + return Gpu::vDevices[Gpu::vConfigs[stCfg % Gpu::vConfigs.size()].devId].props; } + + /** + * allocates memory on Gpu and checks error + * @param dim data size + * @param msg message to print in case of error + * @returns pointer to memory block, or NULL in case of error + */ + static REAL* Alloc(int dim, const char* msg); + + /** + * copies data between host and Gpu + * @param dst destination memory address + * @param src source memory address + * @param count size in bytes to copy + * @param kind type of transfer + * @returns error code + */ + static inline cudaError_t MemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind) { + return cudaMemcpyAsync(dst, src, count, kind, Gpu::curStream); } + + /** + * copies data between host and Gpu + * @param dst destination memory address + * @param dpitch pitch of destination memory + * @param src source memory address + * @param spitch pitch of source memory + * @param width width of matrix transfer (columns in bytes) + * @param height height of matrix transfer (rows) + * @param kind type of transfer + * @returns error code + */ + static inline cudaError_t Memcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) { + return cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, Gpu::curStream); } + + /** + * initializes or sets Gpu memory to a value + * @param devPtr pointer to Gpu memory + * @param value value to set for each byte of specified memory + * @param count size in bytes to set + * @returns error code + */ + static inline cudaError_t MemsetAsync(void* devPtr, int value, size_t count) { + return cudaMemsetAsync(devPtr, value, count, Gpu::curStream); } + + /** + * checks if streams are used concurrently within a device + * @note depends on the number of new configurations + */ + static inline bool UseConcurrentStreams() { + return Gpu::useConcurrentStreams; + } + + /** + * waits for current stream tasks to complete + * @returns error code + */ + static inline cudaError_t StreamSynchronize() { + return cudaStreamSynchronize(Gpu::curStream); } + + /** + * checks error + * @param msg message to print in case of error + */ + static void CheckError(const char* msg); + + + /** Blas methods */ + + static inline void CublasScopy(int n, const REAL* x, int incx, REAL* y, int incy) { +#ifdef GPU_CUBLAS_V2 + cublasScopy(Gpu::curCbHandle, n, x, incx, y, incy); } +#else + cublasScopy( n, x, incx, y, incy); } +#endif + + static inline REAL CublasSasum(int n, const REAL* x, int incx) { +#ifdef GPU_CUBLAS_V2 + REAL result; cublasSasum(Gpu::curCbHandle, n, x, incx, &result); return result; } +#else + return cublasSasum( n, x, incx); } +#endif + + static inline void CublasSaxpy(int n, REAL alpha, const REAL* x, int incx, REAL* y, int incy) { +#ifdef GPU_CUBLAS_V2 + cublasSaxpy(Gpu::curCbHandle, n, &alpha, x, incx, y, incy); } +#else + cublasSaxpy( n, alpha, x, incx, y, incy); } +#endif + + static inline void CublasSscal(int n, REAL alpha, REAL* x, int incx) { +#ifdef GPU_CUBLAS_V2 + cublasSscal(Gpu::curCbHandle, n, &alpha, x, incx); } +#else + cublasSscal( n, alpha, x, incx); } +#endif + + static inline void CublasSgemv(char trans, int m, int n, REAL alpha, const REAL* A, int lda, const REAL* x, int incx, REAL beta, REAL* y, int incy) { +#ifdef GPU_CUBLAS_V2 + cublasOperation_t co = ((trans == 'N') ? CUBLAS_OP_N : ((trans == 'T') ? CUBLAS_OP_T : CUBLAS_OP_C)); + cublasSgemv(Gpu::curCbHandle, co, m, n, &alpha, A, lda, x, incx, &beta, y, incy); } +#else + cublasSgemv( trans, m, n, alpha, A, lda, x, incx, beta, y, incy); } +#endif + + static inline void CublasSger(int m, int n, REAL alpha, const REAL* x, int incx, const REAL* y, int incy, REAL* A, int lda) { +#ifdef GPU_CUBLAS_V2 + cublasSger(Gpu::curCbHandle, m, n, &alpha, x, incx, y, incy, A, lda); } +#else + cublasSger( m, n, alpha, x, incx, y, incy, A, lda); } +#endif + + static inline void CublasSgemm(char transa, char transb, int m, int n, int k, REAL alpha, const REAL* A, int lda, const REAL* B, int ldb, REAL beta, REAL* C, int ldc) { +#ifdef GPU_CUBLAS_V2 + cublasOperation_t coa = ((transa == 'N') ? CUBLAS_OP_N : ((transa == 'T') ? CUBLAS_OP_T : CUBLAS_OP_C)); + cublasOperation_t cob = ((transb == 'N') ? CUBLAS_OP_N : ((transb == 'T') ? CUBLAS_OP_T : CUBLAS_OP_C)); + cublasSgemm(Gpu::curCbHandle, coa, cob, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc); } +#else + cublasSgemm( transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } +#endif + + + /* methods used to launch kernels on Gpu */ + + static void MachTabForw(const int bsize, const int odim, REAL *gpu_data_in, REAL *gpu_t, REAL *gpu_data_out); + + static void MachTabBackw(const REAL lrate, const int bsize, const int odim, REAL *gpu_data_in, REAL *gpu_t, REAL *gpu_grad_out); + + static void MachSoftmaxForw(const int bsize, const int odim, REAL *gpu_data_out); + static void MachSoftmaxStableForw(const int bsize, const int odim, REAL *gpu_data_out); + + static void LinRectifForw(const int n, REAL *gpu_data_out); + static void LinRectifBackw(const int n, REAL *gpu_data_out, REAL *gpu_grad_out); + + static void DropOut(const int n, REAL *gpu_vect, REAL *rand, REAL thresh); + + static REAL ErrFctSoftmClassError(const int bsize, const int n_classes, REAL *gpu_class_out, REAL *gpu_class_target); + + static REAL ErrFctSoftmCrossEntNgramCalcValue(const int bsize, const int odim, REAL *gpu_data_out, REAL *gpu_target); + static REAL ErrFctSoftmCrossEntNgramCalcValueNull(const int bsize, const int odim, REAL *gpu_data_out, REAL *gpu_target); + static void ErrFctSoftmCrossEntNgramCalcValueBatch(const int eff_bsize, const int dim, REAL *output, REAL *target, REAL *res); + // not used anymore + // static REAL ErrFctSoftmCrossEntNgramCalcValueNth(const int idx, const int odim, REAL *gpu_data_out, REAL *gpu_target); + static void ErrFctSoftmCrossEntNgramCalcMax(const int eff_bsize, const int dim, REAL *output, REAL *target, REAL *res, int *pos); + static void ErrFctSoftmCrossEntNgramCalcGrad(const int bsize, const int odim, REAL *gpu_data_out, REAL *gpu_grad, REAL *gpu_target, REAL * gpu_res); + static void ErrFctSoftmCrossEntNgramCalcGradNull(const int bsize, const int odim, REAL *gpu_data_out, REAL *gpu_grad, REAL *gpu_target, REAL * gpu_res); + static void ErrFctSoftmCrossEntNgramCalcGradCumul(const int bsize, const int odim, REAL *gpu_data_out, REAL *gpu_grad, REAL *gpu_target); + + static REAL ErrFctSoftmCrossEntNgramMultiCalcGrad(const int bsize, const int dim, const int nb, REAL *gpu_data_out, REAL *gpu_grad, REAL *gpu_target); + + static void MachSoftmaxClassLinForw(const int bsize, const int idim, const int odim, REAL* input, REAL* weights, REAL* bias, REAL* output, int* class_info, const int max_size); + + static void MachSoftmaxClassSoftmForw(const int bsize, const int odim, REAL* gpu_data_out, int* class_info, const int max_size, const int stable); + static void ErrFctSoftmClassCrossEntNgramCalcGrad(const int bsize, const int odim, REAL* gpu_data_out, REAL* gpu_grad, REAL* gpu_target, int* class_info, REAL* gpu_res); + static void MachSoftmaxClassLinGradIn(const int bsize, const int idim, const int odim, REAL* grad_out, REAL* weights, REAL* grad_in, int* class_info, const int max_size); + static void MachSoftmaxClassLinGradUpdate(const int bsize, const int idim, const int odim, REAL* input, REAL* grad_out, REAL* weights, REAL* bias, int* class_info, const int max_size, const REAL lrate, const REAL wdecay); + + static void CopyVectorToMatrix(REAL * mat, REAL * vec, const int M, const int N); + static void CopyMatrixToMatrixStrided(REAL * dst, REAL * src, const int M, const int N, const int row_stride); + static void CopyMatrixStridedToMatrix(REAL * dst, REAL * src, const int M, const int N, const int row_stride); + + static void BatchedAXPY(const int n, const REAL a, REAL * x, const int incx, REAL * y, const int incy, const int nb_batch); + + static void ElemwiseExp(const int size, REAL *gpu_data_in, REAL *gpu_data_out); + static void ElemwiseTanh(const int size, REAL *gpu_data_in, REAL *gpu_data_out); + static void ElemwiseTanhGrad(const int size, REAL *gpu_data_out, REAL *gpu_grad_out, REAL* gpu_grad_in); + + static void MemSet(REAL *adr, REAL val, int len); + + static void ResSet(REAL val); + static REAL ResGet(); + + +private: + /** + * Gpu device + */ + struct Device { + int number; ///< Gpu device number + cudaDeviceProp props; ///< device properties + }; + + /** + * Gpu configuration + */ + struct Config { + size_t devId; ///< device index + cudaStream_t stream; ///< Gpu stream +#ifdef GPU_CUBLAS_V2 + cublasHandle_t cbHandle; ///< Cublas handle +#endif + }; + + static size_t curDevIndex; ///< current device index + static size_t curConfIndex; ///< current configuration index + static cudaStream_t curStream; ///< current stream + static bool useConcurrentStreams; ///< status of concurrent streams +#ifdef GPU_CUBLAS_V2 + static cublasHandle_t curCbHandle; ///< current Cublas handle +#endif + static cudaDeviceProp* curDevProps; ///< current device properties + static std::vector vDevices; ///< vector of Gpu devices to be used + static std::vector vConfigs; ///< vector of Gpu configurations + + /** + * changes current configuration + * @param stCfg index of configuration to use + */ + static void ChangeConfig(size_t stCfg); + +}; + +#endif diff --git a/Hypo.cpp b/Hypo.cpp new file mode 100644 index 0000000..4ce23b6 --- /dev/null +++ b/Hypo.cpp @@ -0,0 +1,81 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + + +#include "Hypo.h" +#include "Tools.h" + +#include + +void Align::Print(outputfilestream &outf) +{ + if (sb==se) outf << sb; + else outf << sb << "-" << se; + outf << "="; + if (tb==te) outf << tb; + else outf << tb << "-" << te; +} + +void Hypo::Write(outputfilestream &outf) +{ + outf << id << NBEST_DELIM2 << trg << NBEST_DELIM2; + for (vector::iterator i = f.begin(); i != f.end(); i++) + outf << (*i) << " "; + outf << NBEST_DELIM << " " << s; + + if (a.size()>0) { + outf << " " << NBEST_DELIM; + for (vector::iterator i = a.begin(); i != a.end(); i++) { + outf << " "; (*i).Print(outf); + } + } + +#ifdef BOLT_NBEST + outf << " " << extra; +#endif + + outf << endl; +} + +float Hypo::CalcGlobal(Weights &w) +{ + + uint sz=w.val.size(); + if (szs > h2.s); +} + diff --git a/Hypo.h b/Hypo.h new file mode 100644 index 0000000..d164ad5 --- /dev/null +++ b/Hypo.h @@ -0,0 +1,112 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * Basic functions to process one hypothesis + */ + + +#ifndef _HYPO_H_ +#define _HYPO_H_ + +using namespace std; + +#include +#include +#include +#include + +#include "Tools.h" +#include "Toolsgz.h" + +#define NBEST_DELIM "|||" +#define NBEST_DELIM2 " ||| " + +class PtableMosesPtree; // forward declaration for friendship in hypo + +class Align { + public: + int sb; // begining of source phrase + int se; // end of source phrase + int tb; // begining of target phrase + int te; // end of target phrase + public: + Align(int p1, int p2, int p3, int p4) : sb(p1), se(p2), tb(p3), te(p4) {}; + void Print(outputfilestream&); +}; + +class Hypo { +protected: + int id; + string trg; // translation + vector f; // feature function scores + vector trgw; // translation segmented into words + float s; // global score + vector a; // alignments + string extra; // additonal fields at the end of the line which are preserved + vector p_aux; //Aux data + int aux_dim; +public: + Hypo() {}; + ~Hypo() {}; + + //Hypo(int p_id,string p_trg, vector &p_f, float p_s) : id(p_id),trg(p_trg),f(p_f),s(p_s) { a.clear(); extra.clear(); } + Hypo(int p_id,string p_trg, vector &p_f, float p_s, vector& paux , int auxdim =0) : id(p_id),trg(p_trg),f(p_f),s(p_s), p_aux(paux), aux_dim(auxdim){ + a.clear(); extra.clear(); + } + + Hypo(int p_id,string p_trg, vector &p_f, float p_s, vector &p_a, vector& paux, int auxdim =0) : id(p_id),trg(p_trg),f(p_f),s(p_s), a(p_a), p_aux(paux), aux_dim(auxdim){ + extra.clear(); + } + Hypo(int p_id,string p_trg, vector &p_f, float p_s, string &p_e, vector& paux,int auxdim =0) : id(p_id),trg(p_trg),f(p_f),s(p_s), extra(p_e), p_aux(paux), aux_dim(auxdim){ + a.clear(); + } + + float CalcGlobal(Weights&); + void AddID(int o) {id+=o;}; + void Write(outputfilestream&); + bool operator< (const Hypo&) const; + // bool CompareLikelihoods (const Hypo&, const Hypo&) const; + void SetFeature(float val, const int pos) {if(pos>0) f[pos-1]=val; else f.push_back(val); } + void AddFeature(float val, const int pos) {f[pos-1] +=val;} + void SetFeature(vector &values, const int pos) + { + if (pos>0) { // replace existing scores (bound checks were done before) + uint s=values.size(); + for (uint p=0; p::iterator i=values.begin(); i!=values.end(); i++) f.push_back(*i); + } + } + void AddFeature(vector &values, const int pos) + { + uint s=values.size(); + for (uint p=0; p& GetAuxData() { return p_aux;} + int GetAuxDim() { return aux_dim;} + int NbPhrases() {return a.size(); } + friend class PtableMosesPtree; + friend class NBest; +}; + +#endif diff --git a/KENLM b/KENLM new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..a8efc80 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,842 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + + + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Lrate.cpp b/Lrate.cpp new file mode 100644 index 0000000..da9255f --- /dev/null +++ b/Lrate.cpp @@ -0,0 +1,238 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#include "Lrate.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace bpo = boost::program_options; + + +/** + * creates a new Lrate object corresponding to given options + * @param sParams parameters string + * @returns new Lrate object + */ +Lrate* Lrate::NewLrate(std::string sParams) +{ + // parameters available + bpo::options_description od; + od.add_options() + ("type" , bpo::value()->required() , "type of learning rate") + ("beg" , bpo::value()->default_value(5E-03), "initial learning rate") + ("mult" , bpo::value()->default_value(7E-08), "learning rate multiplier") + ("min" , bpo::value()->default_value(1e-5) , "learning rate minimum value") + ("stop" , bpo::value()->default_value(0.0) , "learning rate stop value") + ("maxiter" , bpo::value()->default_value(10) , "maximum number of iterations without improvement"); + + // read parameters + bpo::variables_map vm; + try { + bpo::store( + bpo::command_line_parser(std::vector(1, sParams)). + extra_style_parser(Lrate::parse_params).options(od).run(), vm); + bpo::notify(vm); + } + catch (bpo::error &e) { + // error handling + ErrorN("parsing learning rate parameters \"%s\": %s", sParams.c_str(), e.what()); + return NULL; + } + std::string sType = vm["type"].as(); + REAL rBeg = vm["beg" ].as(); + REAL rMult = vm["mult"].as(); + REAL rStop = vm["stop"].as(); + REAL rMin = vm["min"].as(); + REAL rMaxIter = vm["maxiter"].as(); + + // create new lrate object + Lrate* pNewLrate = NULL; + const char* sType_cstr = sType.c_str(); + if (strcasecmp(sType_cstr, "Decay") == 0) + pNewLrate = new LrateExpDecay(rBeg, rMult, rStop, rMin, rMaxIter); + else if (strcasecmp(sType_cstr, "AdaGrad") == 0) + pNewLrate = new LrateAdaGrad(rBeg, rMult, rStop, rMin, rMaxIter); + else if (strcasecmp(sType_cstr, "Divide") == 0) + pNewLrate = new LrateTestAndDivide(rBeg, rMult, rStop, rMin, rMaxIter); + else if (strcasecmp(sType_cstr, "DivideAndRecover") == 0) + pNewLrate = new LrateDivideAndRecover(rBeg, rMult, rStop, rMin, rMaxIter); + else + ErrorN("parsing learning rate parameters \"%s\": unknown type '%s'", sParams.c_str(), sType.c_str()); + if (NULL == pNewLrate) + ErrorN("parsing learning rate parameters \"%s\": can't allocate type '%s'", sParams.c_str(), sType.c_str()); + return pNewLrate; +} + + +/** + * prints information about learning rate to standard output + */ +void Lrate::Info() const +{ + printf(" lower bound: %e", lrate_min); + if (lrate_stop>0 || lrate_maxiter>0) { + printf(", stopping"); + if (lrate_stop>0) printf(" when lrate<%e", lrate_stop); + if (lrate_stop>0 && lrate_maxiter>0) printf(" or"); + if (lrate_maxiter>0) printf(" after %d iterations without improvement", lrate_maxiter); + } + printf("\n"); +} + + +/** + * parses parameters (type and other options) + * @param vsTokens vector of tokens + * @return vector of options + * @note throws exception of class boost::program_options::error in case of error + */ +std::vector Lrate::parse_params(const std::vector &vsTokens) +{ + std::vector voParsed; + + // put tokens in stream + std::stringstream ssTokens; + std::vector::const_iterator iEnd = vsTokens.end(); + for (std::vector::const_iterator iT = vsTokens.begin() ; iT != iEnd ; iT++) + ssTokens << *iT << ' '; + + // read type (if written without parameter name) + std::string sReadType; + ssTokens >> sReadType; + if (!sReadType.empty()) { + const std::string sTypeParam("type"); + if (sTypeParam != sReadType.substr(0, sReadType.find('='))) + voParsed.insert(voParsed.end(), bpo::option(sTypeParam, std::vector(1, sReadType))); + else { + // no type without parameter name + ssTokens.seekg(0); + ssTokens.clear(); + } + } + + // read other parameters + ParseParametersLine(ssTokens, voParsed); + + // handle errors + if (ssTokens.bad()) + throw bpo::error("internal stream error"); + + return voParsed; +} + + +/** + * prints information about learning rate to standard output + */ +void LrateExpDecay::Info() const +{ + printf(" - decaying learning rate: %6.2e, decay factor=%6.2e\n", lrate_beg, lrate_mult); + Lrate::Info(); +} + + +/** + * updates learning rate after a forward + * @param iNbEx number of examples seen + */ +void LrateExpDecay::UpdateLrateOnForw(ulong iNbEx) +{ + lrate = lrate_beg / (1.0 + iNbEx * lrate_mult); // quadratic decrease + if (lrateGetNbForw () >= pPrevMach->GetNbForw ()) + && (pMach->GetNbBackw() >= pPrevMach->GetNbBackw()) + && pMach->CopyParams(pPrevMach) ) + printf("done\n"); + else + // the machine file has been changed outside + printf("error: the best machine file has changed\n"); + delete pPrevMach; + } + ifs.close(); + return false; + } +} diff --git a/Lrate.h b/Lrate.h new file mode 100644 index 0000000..a259c42 --- /dev/null +++ b/Lrate.h @@ -0,0 +1,324 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _Lrate_h_ +#define _Lrate_h_ + +#include "Mach.h" +#include "Tools.h" +#include +#include +#include +#include + + +/** + * abstract base class to compute learning rates + */ +class Lrate +{ +public: + + /** + * type of Lrate + */ + enum LRateType { + LR_Type_Base = 0, + LR_Type_ExpDecay, + LR_Type_AdaGrad, + LR_Type_TestAndDivide, + LR_Type_DivideAndRecover + }; + + /** + * creates a new Lrate object corresponding to given options + * @param sParams parameters string + * @returns new Lrate object + */ + static Lrate* NewLrate(std::string sParams); + + /** + * destroys learning rate object + */ + virtual ~Lrate() {} + + /** + * gets Lrate type + */ + virtual inline Lrate::LRateType GetType() const { return Lrate::LR_Type_Base; } + + /** + * gets current learning rate value + */ + inline REAL GetLrate() const { return lrate; } + + /** + * checks if learning rate stop value is reached + * @return true if current value is less than stop value + */ + inline bool StopReached() const + { + if (lrate <= lrate_stop) { + printf(" - minimal allowed learning rate reached\n"); + return true; + } + if (lrate_iter_nogain>=lrate_maxiter) { + printf(" - no improvements after %d iterations\n", lrate_maxiter); + return true; + } + return false; + } + + /** + * prints information about learning rate to standard output + */ + virtual void Info() const; + + /** + * updates learning rate after a forward + * @param iNbEx number of examples seen + */ + virtual inline void UpdateLrateOnForw(ulong iNbEx) {} + + /** + * updates learning rate after a backward + */ + virtual inline void UpdateLrateOnBackw() {} + + /** + * updates learning rate after a cross-validation + * @param rErrDev current average error + * @param rBestErrDev best average error + * @param sBestFile name of best machine file + * @param pMach pointer to machine object which could be reloaded + * @returns true if performance is better + */ + virtual inline bool UpdateLrateOnDev(REAL rErrDev, REAL rBestErrDev, const char* sBestFile, Mach*& pMach) + { + if (rErrDev < rBestErrDev) lrate_iter_nogain=0; + else lrate_iter_nogain++; + return (rErrDev < rBestErrDev); + } + + +protected: + + REAL lrate; ///< current value + REAL lrate_beg; ///< value at beginning + REAL lrate_mult; ///< multiplier + REAL lrate_stop; ///< stop value + REAL lrate_min; ///< minimal value (lower bound) + int lrate_maxiter; ///< maximum number of iterations without improvements + int lrate_iter_nogain; ///< counts the number of iterations without improvements + + /** + * creates new learning rate object + * @param rLrateBeg learning rate value at beginning + * @param rLrateMult learning rate multiplier + * @param rLrateStop learning stop value + * @param rLrateMin learning rate minimum value + * @param rLrateMaxIter maximum number of iterations without improvement + */ + Lrate(REAL rLrateBeg = 0.01, REAL rLrateMult = 0, REAL rLrateStop = 0, REAL rLrateMin = 1e-5, int rLrateMaxIter = 10) : + lrate(rLrateBeg), lrate_beg(rLrateBeg), lrate_mult(rLrateMult), lrate_stop(rLrateStop), lrate_min(rLrateMin), lrate_maxiter(rLrateMaxIter), lrate_iter_nogain(0) {} + + +private: + + /** + * parses parameters (type and other options) + * @param vsTokens vector of tokens + * @return vector of options + * @note throws exception of class boost::program_options::error in case of error + */ + static std::vector parse_params(const std::vector &vsTokens); + +}; + + +/** + * learning rate with exponential decay + */ +class LrateExpDecay : public Lrate +{ +public: + + /** + * creates new learning rate object + * @param rLrateBeg learning rate value at beginning + * @param rLrateMult learning rate multiplier + * @param rLrateStop learning stop value + * @param rLrateMin learning rate minimum value + * @param rLrateMaxIter maximum number of iterations without improvement + */ + LrateExpDecay(REAL rLrateBeg = 0.01, REAL rLrateMult = 0, REAL rLrateStop = 0, REAL rLrateMin = 1e-5, int rLrateMaxIter = 10) : + Lrate(rLrateBeg, rLrateMult, rLrateStop, rLrateMin, rLrateMaxIter) {} + + /** + * destroys learning rate object + */ + virtual ~LrateExpDecay() {} + + /** + * gets Lrate type + */ + virtual inline Lrate::LRateType GetType() const { return Lrate::LR_Type_ExpDecay; } + + /** + * prints information about learning rate to standard output + */ + virtual void Info() const; + + /** + * updates learning rate after a forward + * @param iNbEx number of examples seen + */ + virtual void UpdateLrateOnForw(ulong iNbEx); + +}; + + +/** + * learning rate modified during backward + */ +class LrateAdaGrad : public Lrate +{ +public: + + /** + * creates new learning rate object + * @param rLrateBeg learning rate value at beginning + * @param rLrateMult learning rate multiplier + * @param rLrateStop learning stop value + * @param rLrateMin learning rate minimum value + * @param rLrateMaxIter maximum number of iterations without improvement + */ + LrateAdaGrad(REAL rLrateBeg = 0.01, REAL rLrateMult = 0, REAL rLrateStop = 0, REAL rLrateMin = 1e-5, int rLrateMaxIter = 10) : + Lrate(rLrateBeg, rLrateMult, rLrateStop, rLrateMin, rLrateMaxIter) {} + + /** + * destroys learning rate object + */ + virtual ~LrateAdaGrad() {} + + /** + * gets Lrate type + */ + virtual inline Lrate::LRateType GetType() const { return Lrate::LR_Type_AdaGrad; } + + /** + * updates learning rate after a backward + */ + virtual inline void UpdateLrateOnBackw() { Lrate::UpdateLrateOnBackw(); } + +}; + + +/** + * learning rate modified in function of the performance on the development data + */ +class LrateTestAndDivide : public Lrate +{ +public: + + /** + * creates new learning rate object + * @param rLrateBeg learning rate value at beginning + * @param rLrateMult learning rate multiplier + * @param rLrateStop learning stop value + * @param rLrateMin learning rate minimum value + * @param rLrateMaxIter maximum number of iterations without improvement + */ + LrateTestAndDivide(REAL rLrateBeg = 0.01, REAL rLrateMult = 0, REAL rLrateStop = 0, REAL rLrateMin = 1e-5, int rLrateMaxIter = 10) : + Lrate(rLrateBeg, rLrateMult, rLrateStop, rLrateMin, rLrateMaxIter) {} + + /** + * destroys learning rate object + */ + virtual ~LrateTestAndDivide() {} + + /** + * gets Lrate type + */ + virtual inline Lrate::LRateType GetType() const { return Lrate::LR_Type_TestAndDivide; } + + /** + * prints information about learning rate to standard output + */ + virtual inline void Info() const; + + /** + * updates learning rate after a cross-validation + * @param rErrDev current average error + * @param rBestErrDev best average error + * @param sBestFile name of best machine file + * @param pMach pointer to machine object + * @returns true if performance is better + */ + virtual bool UpdateLrateOnDev(REAL rErrDev, REAL rBestErrDev, const char* sBestFile, Mach*& pMach); + +}; + + +/** + * learning rate modified in function of the performance on the development data + * @note previous best machine is reloaded if performance decrease + */ +class LrateDivideAndRecover : public LrateTestAndDivide +{ +public: + + /** + * creates new learning rate object + * @param rLrateBeg learning rate value at beginning + * @param rLrateMult learning rate multiplier + * @param rLrateStop minimum value + * @param rLrateMin learning rate minimum value + * @param rLrateMaxIter maximum number of iterations without improvement + */ + LrateDivideAndRecover(REAL rLrateBeg = 0.01, REAL rLrateMult = 0, REAL rLrateStop = 0, REAL rLrateMin = 1e-5, int rLrateMaxIter = 10) : + LrateTestAndDivide(rLrateBeg, rLrateMult, rLrateStop, rLrateMin, rLrateMaxIter) {} + + /** + * destroys learning rate object + */ + virtual ~LrateDivideAndRecover() {} + + /** + * gets Lrate type + */ + virtual inline Lrate::LRateType GetType() const { return Lrate::LR_Type_DivideAndRecover; } + + /** + * updates learning rate after a cross-validation + * @param rErrDev current average error + * @param rBestErrDev best average error + * @param sBestFile name of best machine file + * @param pMach pointer to machine object which will be reloaded if performance decrease + * @returns true if performance is better + */ + virtual bool UpdateLrateOnDev(REAL rErrDev, REAL rBestErrDev, const char* sBestFile, Mach*& pMach); + +}; + + +#endif // _Lrate_h_ diff --git a/Mach.cpp b/Mach.cpp new file mode 100644 index 0000000..0e561d2 --- /dev/null +++ b/Mach.cpp @@ -0,0 +1,510 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include +#include + +#include "Tools.h" +#include "Mach.h" +#include "MachCopy.h" +#include "MachTab.h" +#include "MachLin.h" +#include "MachSig.h" +#include "MachTanh.h" +#include "MachSoftmax.h" +#include "MachSoftmaxStable.h" +#include "MachSoftmaxClass.h" +#include "MachLinRectif.h" +#include "MachSeq.h" +#include "MachPar.h" +#include "MachSplit.h" +#include "MachSplit1.h" +#include "MachJoin.h" + +vector signal_mach; +int Mach::fileid=-1; +std::map prSharedMachines; // to store Mach pointers for sharing using clone() function + +#ifdef BLAS_CUDA +# include "Blas.h" +#else + +int inc1=1; +#endif + +void HandlerSigUSR1(int s) { + time_t now; + time(&now); // TODO: ctime is not rentrant ! use ctime_r() instead if needed + cout << " - catched signal USR1 at " << ctime(&now) << endl; + signal_mach[0]->Info(false, (char*)" - "); + cout.flush(); + //for (uint i=0; i<1; i++) signal_mach[i]->Info(false, (char*)" - "); + signal(SIGUSR1, HandlerSigUSR1); +} + +//*********************************************** + +#ifdef BLAS_CUDA +void Mach::do_alloc() +{ + Gpu::Init(); + + data_out = Gpu::Alloc(odim*bsize, "output data for a machine"); + data_in=NULL; // should be set later by SetDataIn() + drop_out_rand = NULL; // will be allocated when calling SetDropOut() + grad_in = Gpu::Alloc(idim*bsize, "input gradient for a machine"); + grad_out=NULL; // should be set later by SetGradOut() +} + +void Mach::SetDropOut(const REAL v) { + if (v<0 || v>=1.0) Error("SetDropOut: the value must be in [0,1)"); + if (drop_out_rand) cublasFree(drop_out_rand); + if (v>0) { + drop_out_rand = Gpu::Alloc(odim*bsize, "buffer for random values for drop-out"); + } + drop_out=v; +} +#endif + +//*********************************************** + +#ifndef BLAS_CUDA +void Mach::do_alloc() +{ + if (odim*bsize>0) { + data_out=::new REAL[odim*bsize]; + if (!data_out) Error ("can't allocate memory for data_out"); + drop_out_rand = NULL; // will be allocated when calling SetDropOut() + } + else { data_out=drop_out_rand=NULL; } + data_in=NULL; // should be set later by SetDataIn() + if (idim*bsize>0) { + grad_in=::new REAL[idim*bsize]; + if (!grad_in) Error ("can't allocate memory for grad_in"); + } + else grad_in=NULL; + grad_out=NULL; // (luint) this) should be set later by SetGradOut() +} + +void Mach::SetDropOut(const REAL v) { + if (v<0 || v>=1.0) Error("SetDropOut: the value must be in [0,1)"); + if (drop_out_rand) delete drop_out_rand; + if (v>0) { + drop_out_rand = ::new REAL[odim*bsize]; + if (!drop_out_rand) Error ("can't allocate memory for drop_out"); + } + drop_out=v; +} +#endif + + +Mach::Mach(const int p_idim, const int p_odim, const int p_bsize, const ulong p_nbfw, const ulong p_nbbw) + : idim(p_idim), odim(p_odim), bsize(p_bsize), nb_forw(p_nbfw), nb_backw(p_nbbw), update(true), lr_coeff(1.0), drop_out(0.0), drop_out_rand(NULL) +{ + do_alloc(); +#ifdef BLAS_CUDA + gpu_conf = Gpu::GetConfig(); +#endif + + // setup SIGUSR1 handler + //cout << " - setting up handler for signal USR1" << endl; + if (signal_mach.empty()) signal(SIGUSR1, HandlerSigUSR1); + signal_mach.push_back(this); +} + +Mach::Mach(const Mach &m, const int p_idim) +{ + if (p_idim > 0) + idim = p_idim; + else + idim = m.idim; + odim = m.odim; + bsize = m.bsize; + nb_forw = m.nb_forw; + nb_backw = m.nb_backw; + update = m.update; + lr_coeff = m.lr_coeff; + drop_out = m.drop_out; + drop_out_rand = NULL; +#ifdef BLAS_CUDA + gpu_conf = m.gpu_conf; // this is very important ! we share the weights so they must be on the same machine + Gpu::SetConfig(gpu_conf); +#endif + do_alloc(); + data_in = m.data_in; + grad_out = m.grad_out; + + // setup SIGUSR1 handler + //cout << " - setting up handler for signal USR1" << endl; + if (signal_mach.empty()) signal(SIGUSR1, HandlerSigUSR1); + signal_mach.push_back(this); +} + +/******************************************* + * + ********************************************/ + +Mach::~Mach() +{ +#ifdef BLAS_CUDA + if (data_out) cublasFree(data_out); + if (drop_out_rand) cublasFree(drop_out_rand); + if (grad_in) cublasFree(grad_in); +#else + if (data_out) delete [] data_out; + if (drop_out_rand) delete [] drop_out_rand; + if (grad_in) delete [] grad_in; +#endif + signal_mach.pop_back(); //TODO: we should search for correct machine and delete it +} + +//----------------------------------------------- +// File output +//----------------------------------------------- + +void Mach::WriteParams(ostream &of) { + // write machine specific params + of.write((char*) &nb_forw, sizeof(ulong)); + of.write((char*) &nb_backw, sizeof(ulong)); +} + +void Mach::WriteData(ostream &of) { + const int i=0, s=sizeof(REAL); + of.write((char*) &i, sizeof(int)); + of.write((char*) &s, sizeof(int)); +} + +void Mach::Write(ostream &of) +{ + char header[file_header_size]; + for (int i=0; iReadParams(inpf); + + int s; + inpf.read((char*) &s,sizeof(int)); // number of elements + inpf.read((char*) &v,sizeof(int)); // size in bytes of each element + if (v != sizeof(REAL)) { + ErrorN( "binary data on file uses %d bytes while the current code is compiled for %lu bytes\n", v, sizeof(REAL)); + } + + //Loic: handling special case of MachTab + if(m->GetMType() == file_header_mtype_tab){ + MachTab* mt = static_cast(m); + // if version > 3 then check share-id + if(Mach::fileid >= file_header_version3){ + m->ReadData(inpf, s, bs); + if(prSharedMachines[mt->GetShareId()] == NULL){ + //fprintf(stderr, " ... new primary MachTab with share-id %d\n", mt->GetShareId()); + prSharedMachines[mt->GetShareId()] = mt; + if(mt->GetTabAdr() == NULL) { + Error("Mach::Read: machine should have its weights allocated!\n"); + } + } else { + //fprintf(stderr, " ... cloning secondary MachTab with share-id %d\n", mt->GetShareId()); + m = prSharedMachines[mt->GetShareId()]->Clone(); + } + + } else { // before file_header_version3, all MachTab in a MachPar share the weights + if(prSharedMachines[-1] == NULL ){ + prSharedMachines[-1]=m; + } else { + m = prSharedMachines[-1]->Clone(); + //fprintf(stderr, " cloning MachTab, address = %p\n", mt->GetTabAdr()); + } + } + } + else if(Mach::fileid >= file_header_version4 && Mach::canShare(mtype)) { + //fprintf(stderr, "Shareable machine mtype = %d\n", mtype); + Shareable* sharem = dynamic_cast(m); + //fprintf(stderr, "Shareable: external=%d share-id=%d\n", sharem->HasExternalData(), sharem->GetShareId()); + if(sharem->HasExternalData()){ + if(prSharedMachines[sharem->GetShareId()] != NULL){ + //fprintf(stderr, " ... secondary machine with share-id %d -> cloning primary machine\n", sharem->GetShareId()); + m = (MachLin*)prSharedMachines[sharem->GetShareId()]->Clone(); + } else { + ErrorN("Found a secondary machine with shareid=%d, but the primary machine is not yet created\n", sharem->GetShareId()); + } + } else { + if(sharem->GetShareId() != -1){ + //fprintf(stderr, " ... new primary machine with share-id %d\n", sharem->GetShareId()); + prSharedMachines[sharem->GetShareId()] = m; + } + //else { fprintf(stderr, " ... new primary machine with no sharing\n"); } + m->ReadData(inpf, s, bs); + } + } else { + //fprintf(stderr, " ... new machine without sharing type=%d\n", m->GetMType()); + m->ReadData(inpf, s, bs); + // TODO: check EOF + } + return m; +} + +//----------------------------------------------- +// Tools +//----------------------------------------------- + +void Mach::Info(bool detailed, char *txt) +{ + if (detailed) { + cout << " - dimensions: in=" << idim << ", out=" << odim << endl; + cout << " - number of parallel examples=" << bsize << endl; + if (drop_out>0) + cout << " - drop-out: " << drop_out << endl; + cout << " - number of passes: " << nb_forw << "/" << nb_backw << endl; + } + else { + if (drop_out>0) + printf("%sMach %d-%d, bs=%d, drop-out=%4.2f, passes=%lu/%lu", txt, idim, odim, bsize, drop_out, nb_forw, nb_backw); + else + printf("%sMach %d-%d, bs=%d, passes=%lu/%lu", txt, idim, odim, bsize, nb_forw, nb_backw); + if (lr_coeff != 1.0) printf(", lrate-coeff=%.2f", lr_coeff); +#ifdef BLAS_CUDA + printf(", on GPU %d", Gpu::GetCudaDevice(Gpu::GetDevice(gpu_conf))); +#endif + tm.disp(", "); + printf("\n"); + } +} + +bool Mach::CopyParams(Mach* mach) +{ + // type, idim, odim and bsize must be equals + if ( (NULL != mach) + && (mach->GetMType() == this->GetMType()) + && (mach->idim == this->idim ) + && (mach->odim == this->odim ) + && (mach->bsize == this->bsize) ) { + this->nb_forw = mach->nb_forw; + this->nb_backw = mach->nb_backw; + this->update = mach->update; + return true; + } + else + return false; +} + +//----------------------------------------------- +// Training +//----------------------------------------------- + +void Mach::Forw(int eff_bsize, bool in_train) +{ + if (idim!=odim) + Error("Mach::Forw(): call to default Forw() function with different dimensions"); + if (eff_bsize<=0) eff_bsize=bsize; + if (!data_in) + Error("Mach::Forw(): input data is not set"); + + tm.start(); + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + COPY(eff_bsize*idim,data_in,1,data_out,1); // this does work on host or GPU +#else + int dim=eff_bsize*idim; + COPY(&dim,data_in,&inc1,data_out,&inc1); // this does work on host or GPU +#endif + nb_forw += (ulong) eff_bsize; + + tm.stop(); +} + +void Mach::Backw (const float lrate, const float wdecay, int eff_bsize) +{ + if (idim!=odim) + Error("Mach::Backw(): call to default Train() function with different dimensions"); + if (!grad_out) + Error("Mach::Backw(): output gradient is not set"); + + if (eff_bsize<=0) eff_bsize=bsize; +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + COPY(eff_bsize*idim,grad_out,1,grad_in,1); +#else + memcpy(grad_in,grad_out,eff_bsize*idim*sizeof(REAL)); +#endif + nb_backw += (ulong) eff_bsize; +} + +//****************************************** + +void GpuUnlock() +{ +#ifdef BLAS_CUDA + Gpu::Unlock(); +#endif +} + +//*********************************************** +// Find sub-machines matching desired mtype in parent_mach (depth-first). + +// Returns the first sub-machine found (depth-first). +// Returns NULL if none is found. +Mach* FindFirstMatching(int mtype, Mach* parent_mach) +{ + MachMulti* mach_multi = NULL; + if (parent_mach->GetMType() == mtype) { + return parent_mach; + } + else if ((mach_multi = dynamic_cast(parent_mach))) { + // Maybe a sub-machine will have the right mtype + int nb_sub_mach = mach_multi->MachGetNb(); + for (int i=0; iMachGet(i)); + if (found_mach != NULL) { + return found_mach; + } + } + } + return NULL; +} + +// Helper function for FindAllMatching +void EnqueueAllMatching(int mtype, Mach* parent_mach, std::vector queue) +{ + MachMulti* mach_multi = NULL; + if (parent_mach->GetMType() == mtype) { + queue.push_back(parent_mach); + } + if ((mach_multi = dynamic_cast(parent_mach))) { + // Maybe sub-machines will have the right mtype + int nb_sub_mach = mach_multi->MachGetNb(); + for (int i=0; iMachGet(i), queue); + } + } +} + +// Returns all matching sub-machines in a vector. +std::vector FindAllMatching(int mtype, Mach* parent_mach) +{ + std::vector rval; + EnqueueAllMatching(mtype, parent_mach, rval); + return rval; +} diff --git a/Mach.h b/Mach.h new file mode 100644 index 0000000..8b1b539 --- /dev/null +++ b/Mach.h @@ -0,0 +1,165 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _Machine_h +#define _Machine_h + +#include +#include +#include "Tools.h" +#include "Blas.h" +#include "Timer.h" + +// list of all known machine types, +// this is needed for the general file read function + +#define file_header_name "HPerf" +#define file_header_version1 1 // initial version +#define file_header_version2 2 // 2013/12/08: switched to ulong for nb_forw and nb_backw +#define file_header_version3 3 // 2015/03/18: added sharing information for MachTab +#define file_header_version4 4 // 2015/06/05: generalized sharing information: all simple machine can share its weights +#define file_header_version file_header_version4 +#define file_header_size 16 + +#define file_header_mtype_base 0 +#define file_header_mtype_tab 1 +#define file_header_mtype_tabsh 2 +#define file_header_mtype_lin 3 +#define file_header_mtype_sig 4 +#define file_header_mtype_tanh 5 +#define file_header_mtype_softmax 6 +#define file_header_mtype_stab 7 +#define file_header_mtype_softmax_stable 8 +#define file_header_mtype_lin_rectif 9 +#define file_header_mtype_softmax_class 10 +#define file_header_mtype_copy 11 +#define file_header_mtype_multi 16 +#define file_header_mtype_mseq 17 +#define file_header_mtype_msplit1 18 +#define file_header_mtype_mpar 19 +#define file_header_mtype_msplit 20 +#define file_header_mtype_mjoin 21 +#define file_header_mtype_combined 31 +#define file_header_mtype_max 32 +#define file_header_mtype_avr 33 + +class Mach +{ +private: + void do_alloc(); // perform allocation of dynamic data structures +protected: + static int fileid; + int idim, odim; // input and output dimension + int bsize; // block size (nb of example used in parallel) + ulong nb_forw; // nb of forward examples processed + ulong nb_backw; // nb of backward examples processed + bool update; // update internal parameters + REAL lr_coeff; // machine specific learning coefficient (default 1.0) + // drop-out + REAL drop_out; // dropout probability, 0: not used (default), >0 apply drop-out (in training), <0 scale weighted sum (in testing) + REAL *drop_out_rand; // random values for the whole output vector + +#if 0 + // recurrent conncetions: user specified parameters + uint rec_hist; // nb of examples which are memorized + uint rec_step; // number of step before we do an update of the weights + uint rec_span; // number of step we go back during update + // rec_span can be larger than rec_step ! + // both must be smaller or equal than rec_hist + // recurrent conncetions: for internal handling + // all the buffers are circular, no data is moved once stored + uint rec_ipos; // position in array where to add the new examples + // starts with 0, and wraps around once the end is reached + uint rec_len; // numnber of examples memorized in the buffers +#endif + + // CUDA: the following four variables refer to device memory + // the size of these buffers is: DIM * bsize * rec_hist + REAL *data_in; // input data (pointer) + // CUDA: we need to allocate device memory + REAL *data_out; // output data (allocated by machine) + REAL *grad_in; // input gradients (allocated by machine) + REAL *grad_out; // output gradients (pointer) + // CUDA: we need to allocate device memory + + Timer tm; // count real and user time +#ifdef BLAS_CUDA + size_t gpu_conf; // GPU configuration index; this is needed to run on multiple devices in parallel +#endif + // File or stream I/O, the following functions can be overloaded by subclass + // the main functions Read() and Write() should not be modified ! + virtual void ReadParams(istream&, bool=true); // read all params + virtual void ReadData(istream&, size_t, int=0); // read binary data + virtual void WriteParams(ostream&); // write all params + virtual void WriteData(ostream&); // write binary data + Mach(const Mach &, const int=0); // create a copy of the machine +public: + Mach(const int=0, const int=0, const int=128, const ulong=0, const ulong=0); + virtual ~Mach(); + virtual Mach *Clone() {return new Mach(*this);} // create a copy of the machine + // Tools + virtual int GetMType() {return file_header_mtype_base;}; // get type of machine + virtual int GetIdim() {return idim;} + int GetOdim() {return odim;} + int GetBsize() {return bsize;} + + virtual void SetBsize(int bs) { + if (bs<1) Error("wrong value in SetBsize()"); else bsize=bs; } + ulong GetNbForw() {return nb_forw;} + ulong GetNbBackw() {return nb_backw;} + virtual void SetNbEx(ulong nf, ulong nb) {nb_forw=nf; nb_backw=nb;} + virtual ulong GetNbParams() {return 0;} // return the nbr of allocated parameters + void SetUpdataParams(bool up) {update=up;} // change flag to update internal parameters + void SetLrateCoeff(REAL v) {lr_coeff=v;} + virtual REAL* GetDataIn() {return data_in;} // return pointer on input data for chaining + virtual REAL* GetDataOut() {return data_out;} // return pointer on output data for chaining + virtual REAL* GetGradIn() {return grad_in;} // return pointer on input gradient for chaining + virtual REAL* GetGradOut() {return grad_out;} // return pointer on output gradient for chaining + virtual void SetDataIn(REAL *data) {data_in=data;} // set pointer of input data + virtual void SetGradOut(REAL *data) {grad_out=data;} // set pointer of output gradient + virtual void SetDropOut(const REAL v); // set drop-out fraction +#ifdef BLAS_CUDA + size_t GetGpuConfig() { return gpu_conf; } // return GPU configuration index used for this machine +#endif + virtual void Info(bool=false, char *txt=(char*)" - ");// display (detailed) information on machine + virtual bool CopyParams(Mach*); // copy parameters from another machine + // FILE IO + static Mach *Read(istream&, int=0); // read class from a stream + void Write(ostream&); // write content of class to a stream + // Training + virtual void Forw(int=0, bool=false); // calculate outputs for current inputs + // backprop gradients from output to input and update all weights + virtual void Backw (const float lrate, const float wdecay, int =0); + + static int GetFileId(){ return fileid;} + static void SetFileId(int id){ fileid = id;} + static bool canShare(int mtype){ return (mtype>=1 && mtype<=6) || (mtype>=8 && mtype<=10); } + +}; + +void GpuUnlock(); + +// Find sub-machines matching desired mtype in parent_mach (depth-first). +Mach* FindFirstMatching(int mtype, Mach* parent_mach); +std::vector FindAllMatching(int mtype, Mach* parent_mach); + +#endif diff --git a/MachAvr.cpp b/MachAvr.cpp new file mode 100644 index 0000000..4af0dfe --- /dev/null +++ b/MachAvr.cpp @@ -0,0 +1,271 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + */ + +using namespace std; +#include + +#include "Tools.h" +#include "MachAvr.h" + + +void MachAvr::do_alloc() +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + if (data_out) cublasFree(data_out); + if (winner) cublasFree(winner); + if (grad_in) cublasFree(grad_in); + + data_out = Gpu::Alloc(odim*bsize, "output data of multi-average machine"); + winner = Gpu::Alloc(odim*bsize, "winner of multi-average machine"); + grad_in = Gpu::Alloc(idim*bsize, "input gradient of multi-average machine"); + +#else + if (data_out) delete [] data_out; + if (winner) delete [] winner; + if (grad_in) delete [] grad_in; + data_out = (odim*bsize>0) ? new REAL[odim*bsize] : NULL; + winner = (odim*bsize>0) ? new REAL[odim*bsize] : NULL; + grad_in = (idim*bsize>0) ? new REAL[idim*bsize] : NULL; +#endif +} + + +/* + * constructor + */ + +MachAvr::MachAvr() + : MachCombined() +{ +} + +/* + * copy constructor + * create a copy of the machine without submachines + */ + +MachAvr::MachAvr(const MachAvr &m) + : MachCombined(m) +{ +} + +/* + * destructor + */ + +MachAvr::~MachAvr() +{ + // data_out and grad_in will be deleted by the desctuctor of Mach +} + +/* + * create a copy of the machine and all submachines + */ + +MachAvr *MachAvr::Clone() +{ + MachAvr *m = new MachAvr(*this); + if (m != NULL) + m->CloneSubmachs(*this); + return m; +} + +/* + * set pointer of input data + * all machines point to the same input + */ + +void MachAvr::SetDataIn(REAL *data) +{ + data_in=data; + for (vector::iterator mit=machs.begin(); mitSetDataIn(data); +} + +// set pointer of output gradient +void MachAvr::SetGradOut(REAL *data) +{ + grad_out=data; + if (machs.size() > 0) machs.back()->SetGradOut(data); +} + +/* + * add a machine to the set + */ + +void MachAvr::MachAdd(Mach *new_mach) +{ + if (machs.empty()) { + machs.push_back(new_mach); + // think about freeing memory + idim=new_mach->GetIdim(); + bsize=new_mach->GetBsize(); + data_in=new_mach->GetDataIn(); // TODO + grad_in=new_mach->GetGradIn(); + do_alloc(); + } + else { + if (new_mach->GetIdim() != idim) + ErrorN("input dimension of new average machine does not match (%d), should be %d",new_mach->GetIdim(),idim); + if (new_mach->GetOdim() != idim) + ErrorN("output dimension of new average machine does not match (%d), should be %d",new_mach->GetOdim(),idim); + if (bsize!=new_mach->GetBsize()) { + ErrorN("bunch size of new average machine does not match (%d), should be %d",new_mach->GetBsize(),bsize); + } + machs.push_back(new_mach); + + // connect TODO + new_mach->SetDataIn(data_in); // all machines have same input + new_mach->SetGradOut(NULL); // TODO + + // no new allocation is needed since idim and odim don't change + } + + activ_forw.push_back(true); + activ_backw.push_back(true); +} + +/* + * delete last machine from the set + */ + +Mach *MachAvr::MachDel() +{ + if (machs.empty()) { + Error("impossible to delete element from average machine: is already empty"); + } + + Mach *del_mach=machs.back(); + machs.pop_back(); + + if (machs.empty()) { + idim=odim=bsize=0; + data_in=data_out=grad_in=grad_out=NULL; + } + + activ_forw.pop_back(); + activ_backw.pop_back(); + + return del_mach; +} + +//----------------------------------------------- +// File input +//----------------------------------------------- + +void MachAvr::ReadData(istream &inpf, size_t s, int bs) +{ + MachCombined::ReadData(inpf, s, bs); + + idim = machs[0]->GetIdim(); + bsize = machs[0]->GetBsize(); + odim = machs[0]->GetOdim(); + do_alloc(); + + // connect first to the outside world + MachAvr::SetDataIn(data_in); // TODO: check + // TODO: grad_in=machs[0]->GetGradIn(); + + // connect last machine to the outside world + //data_out= TODO + //grad_out= +} + +// +// Tools +// + +void MachAvr::Info(bool detailed, char *txt) +{ + if (detailed) { + cout << "Information on multiple average machine" << endl; + MachCombined::Info(detailed,txt); + } + else { + printf("%sMultiple average machine [%u] %d- .. -%d, bs=%d, passes=%lu/%lu", txt, (uint) machs.size(), idim, odim, bsize, nb_forw, nb_backw); + tm.disp(", "); + tbackw.disp(" + back: "); + printf("\n"); + char ntxt[512]; + sprintf(ntxt,"%s ", txt); + for (unsigned int i=0; iInfo(detailed, ntxt); + } + printf("%stotal number of parameters: %lu (%d MBytes)\n", txt, GetNbParams(), (int) (GetNbParams()*sizeof(REAL)/1048576)); +} + +/* + * Forward pass + */ + +void MachAvr::Forw(int eff_bsize, bool in_train) +{ + if (machs.empty()) + Error("called Forw() for an empty multiple average machine"); + + tm.start(); + for (size_t i=0; iForw(eff_bsize,in_train); + } + + // take elementwise max +#ifdef BLAS_CUDA + //TODO +#else + vector moptr; // pointers on the output of the machines + REAL *optr=data_out; // create maximized output + for (size_t i=0; iGetDataOut()); + + // TODO: vectorize and consider deactivated machines in an efficient WAY + for (int b=0; bmax) { + max=moptr[i][b]; + winner[b]=i; // remember index i + } + } + *optr++=max; + } +#endif + // TODO nb_forw += (eff_bsize<=0) ? bsize : eff_bsize; + tm.stop(); +} + +void MachAvr::Backw(const float lrate, const float wdecay, int eff_bsize) +{ + if (machs.empty()) + Error("called Backw() for an empty average machine"); + + debugMachOutp("MachAvr Grad",grad_out,idim,odim,eff_bsize); + tbackw.start(); + + for (int i=machs.size()-1; i>=0; i--) { + if (activ_backw[i]) machs[i]->Backw(lrate,wdecay,eff_bsize); + } + nb_backw += (eff_bsize<=0) ? bsize : eff_bsize; + + tbackw.stop(); + debugMachInp("MachAvr Grad",grad_in,idim,odim,eff_bsize); +} diff --git a/MachAvr.h b/MachAvr.h new file mode 100644 index 0000000..d8e6621 --- /dev/null +++ b/MachAvr.h @@ -0,0 +1,69 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * This machines implements a set of INDEPENDENT machines which outputs are + * combined (max, average, etc according to the subclasses). The machines must have the + * same input and output dimension. The order of the forward and backward + * passes are not defined and may be in parallel on multiple CPUs or GPUs + * + * memory management: + * - data_in same pointer for all machines + * - data_out allocated (to calculate the max) + * - grad_in allocated (sum of Machine's grad_in) + * - grad_out points to following machine + * we also allocate internal storage to set some gradients + * of the individual machines to zero (this is faster than + * selective vector-wise backprop) + */ + +#ifndef _MachAvr_h +#define _MachAvr_h + +using namespace std; +#include + +#include "MachCombined.h" + +class MachAvr : public MachCombined +{ +private: + void do_alloc(); +protected: + virtual void ReadData(istream&, size_t, int=0); // read binary data + MachAvr(const MachAvr &); // create a copy of the machine (without submachines) +public: + MachAvr(); // create initial sequence with no machine + virtual ~MachAvr(); + virtual MachAvr *Clone(); // create a copy of the machine and all submachines + virtual int GetMType() {return file_header_mtype_avr;}; // get type of machine + // redfine connecting functions + virtual void SetDataIn(REAL*); // set pointer of input data + virtual void SetGradOut(REAL*); // set pointer of output gradient + // add and remove machines + virtual void MachAdd(Mach*); // add new machine after the existing ones + virtual Mach *MachDel(); + // standard functions + virtual void Info(bool=false, char *txt=(char*)""); // display (detailed) information on machine + virtual void Forw(int=0, bool=false); // calculate outputs for current inputs + virtual void Backw(const float lrate, const float wdecay, int=0); // calculate gradients at input for current gradients at output +}; + +#endif diff --git a/MachCombined.cpp b/MachCombined.cpp new file mode 100644 index 0000000..338481f --- /dev/null +++ b/MachCombined.cpp @@ -0,0 +1,254 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + */ + +using namespace std; +#include + +#include "Tools.h" +#include "MachCombined.h" + + +void MachCombined::do_alloc() +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + if (data_out) cublasFree(data_out); + if (winner) cublasFree(winner); + if (grad_in) cublasFree(grad_in); + + data_out = Gpu::Alloc(odim*bsize, "output data of a combined machine"); + winner = Gpu::Alloc(odim*bsize, "winner of a combined machine"); + grad_in = Gpu::Alloc(idim*bsize, "input gradient of a combined machine"); + +#else + if (data_out) delete [] data_out; + if (winner) delete [] winner; + if (grad_in) delete [] grad_in; + data_out = (odim*bsize>0) ? new REAL[odim*bsize] : NULL; + winner = (odim*bsize>0) ? new REAL[odim*bsize] : NULL; + grad_in = (idim*bsize>0) ? new REAL[idim*bsize] : NULL; +#endif +} + + +/* + * constructor + */ + +MachCombined::MachCombined() + : MachMulti(), winner(NULL) +{ +} + +/* + * copy constructor + * create a copy of the machine without submachines + */ + +MachCombined::MachCombined(const MachCombined &m) + : MachMulti(m), winner(NULL) +{ +} + +/* + * destructor + */ + +MachCombined::~MachCombined() +{ + // data_out and grad_in will be deleted by the desctuctor of Mach +} + +/* + * create a copy of the machine and all submachines + */ + +MachCombined *MachCombined::Clone() +{ + MachCombined *m = new MachCombined(*this); + if (m != NULL) + m->CloneSubmachs(*this); + return m; +} + +/* + * set pointer of input data + * all machines point to the same input + */ + +void MachCombined::SetDataIn(REAL *data) +{ + data_in=data; + for (vector::iterator mit=machs.begin(); mitSetDataIn(data); +} + +// set pointer of output gradient +void MachCombined::SetGradOut(REAL *data) +{ + grad_out=data; + if (machs.size() > 0) machs.back()->SetGradOut(data); +} + +/* + * add a machine to the set + */ + +void MachCombined::MachAdd(Mach *new_mach) +{ + if (machs.empty()) { + machs.push_back(new_mach); + // think about freeing memory + idim=new_mach->GetIdim(); + bsize=new_mach->GetBsize(); + data_in=new_mach->GetDataIn(); // TODO + grad_in=new_mach->GetGradIn(); + do_alloc(); + } + else { + if (new_mach->GetIdim() != idim) + ErrorN("input dimension of new combined machine does not match (%d), should be %d",new_mach->GetIdim(),idim); + if (new_mach->GetOdim() != idim) + ErrorN("output dimension of new combined machine does not match (%d), should be %d",new_mach->GetOdim(),idim); + if (bsize!=new_mach->GetBsize()) { + ErrorN("bunch size of new combined machine does not match (%d), should be %d",new_mach->GetBsize(),bsize); + } + machs.push_back(new_mach); + + // connect TODO + new_mach->SetDataIn(data_in); // all machines have same input + new_mach->SetGradOut(NULL); // TODO + + // no new allocation is needed since idim and odim don't change + } + + activ_forw.push_back(true); + activ_backw.push_back(true); +} + +/* + * delete last machine from the set + */ + +Mach *MachCombined::MachDel() +{ + if (machs.empty()) { + Error("impossible to delete element from combined machine: is already empty"); + } + + Mach *del_mach=machs.back(); + machs.pop_back(); + + if (machs.empty()) { + idim=odim=bsize=0; + data_in=data_out=grad_in=grad_out=NULL; + } + + activ_forw.pop_back(); + activ_backw.pop_back(); + + return del_mach; +} + +//----------------------------------------------- +// File input +//----------------------------------------------- + +void MachCombined::ReadData(istream &inpf, size_t s, int bs) +{ + MachMulti::ReadData(inpf, s, bs); + + idim = machs[0]->GetIdim(); + bsize = machs[0]->GetBsize(); + odim = machs[0]->GetOdim(); + do_alloc(); + + // connect first to the outside world + MachCombined::SetDataIn(data_in); // TODO: check + // TODO: grad_in=machs[0]->GetGradIn(); + + // connect last machine to the outside world + //data_out= TODO + //grad_out= +} + +// +// Tools +// + +void MachCombined::Info(bool detailed, char *txt) +{ + if (detailed) { + cout << "Information on multiple combined machine" << endl; + MachMulti::Info(detailed,txt); + } + else { + printf("%sMultiple combined machine [%u] %d- .. -%d, bs=%d, passes=%lu/%lu", txt, (uint) machs.size(), idim, odim, bsize, nb_forw, nb_backw); + tm.disp(", "); + tbackw.disp(" + back: "); + printf("\n"); + char ntxt[512]; + sprintf(ntxt,"%s ", txt); + for (unsigned int i=0; iInfo(detailed, ntxt); + } + printf("%stotal number of parameters: %lu (%d MBytes)\n", txt, GetNbParams(), (int) (GetNbParams()*sizeof(REAL)/1048576)); +} + +/* + * Forward pass + */ + +void MachCombined::Forw(int eff_bsize, bool in_train) +{ + if (machs.empty()) + Error("called Forw() for an empty multiple combined machine"); + + tm.start(); + for (size_t i=0; iForw(eff_bsize,in_train); + } + nb_forw += (eff_bsize<=0) ? bsize : eff_bsize; + + // we perform no operation to combine the multiple outputs into one + // THIS MUST BE DONE IN A SPEZIALIZED SUBCLASS + tm.stop(); +} + +void MachCombined::Backw(const float lrate, const float wdecay, int eff_bsize) +{ + if (machs.empty()) + Error("called Backw() for an empty combined machine"); + + debugMachOutp("MachCombined Grad",grad_out,idim,odim,eff_bsize); + tbackw.start(); + + for (int i=machs.size()-1; i>=0; i--) { + if (!activ_backw[i]) Error("MachCombined::Backw(): deactivation of combined machines is not supported\n"); + if (activ_backw[i]) machs[i]->Backw(lrate,wdecay,eff_bsize); + } + nb_backw += (eff_bsize<=0) ? bsize : eff_bsize; + + tbackw.stop(); + debugMachInp("MachCombined Grad",grad_in,idim,odim,eff_bsize); +} diff --git a/MachCombined.h b/MachCombined.h new file mode 100644 index 0000000..6b1577c --- /dev/null +++ b/MachCombined.h @@ -0,0 +1,72 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * This machines implements a set of INDEPENDENT machines which outputs are + * combined (max, average, etc according to the subclasses). The machines must have the + * same input and output dimension. The order of the forward and backward + * passes are not defined and may be in parallel on multiple CPUs or GPUs + * + * memory management: + * - data_in same pointer for all machines + * - data_out allocated (to calculate the max) + * - grad_in allocated (sum of Machine's grad_in) + * - grad_out points to following machine + * we also allocate internal storage to set some gradients + * of the individual machines to zero (this is faster than + * selective vector-wise backprop) + */ + +#ifndef _MachCombined_h +#define _MachCombined_h + +using namespace std; +#include + +#include "MachMulti.h" + +class MachCombined : public MachMulti +{ +private: + void do_alloc(); // perform allocation of dynamic data structures +protected: + Timer tbackw; + REAL *winner; // remember the winner of the output operation, odim*bsize + vector grad_out_modif; // modified output gradients for each machine in function of the winner + virtual void ReadData(istream&, size_t, int=0); // read binary data + MachCombined(const MachCombined &); // create a copy of the machine (without submachines) +public: + MachCombined(); // create initial sequence with no machine + virtual ~MachCombined(); + virtual MachCombined *Clone(); // create a copy of the machine and all submachines + virtual int GetMType() {return file_header_mtype_combined;}; // get type of machine + // redfine connecting functions + virtual void SetDataIn(REAL*); // set pointer of input data + virtual void SetGradOut(REAL*); // set pointer of output gradient + // add and remove machines + virtual void MachAdd(Mach*); // add new machine after the existing ones + virtual Mach *MachDel(); + // standard functions + virtual void Info(bool=false, char *txt=(char*)""); // display (detailed) information on machine + virtual void Forw(int=0, bool=false); // calculate outputs for current inputs + virtual void Backw(const float lrate, const float wdecay, int=0); // calculate gradients at input for current gradients at output +}; + +#endif diff --git a/MachConfig.cpp b/MachConfig.cpp new file mode 100644 index 0000000..2083f1f --- /dev/null +++ b/MachConfig.cpp @@ -0,0 +1,1330 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#include +#include +#include +#include +#include "MachAvr.h" +#include "MachConfig.h" +#include "MachJoin.h" +#include "MachLinRectif.h" +//#include "MachMax.h" // experimental +#include "MachPar.h" +#include "MachSeq.h" +#include "MachSig.h" +#include "MachSoftmax.h" +#include "MachSoftmaxStable.h" +#include "MachSoftmaxClass.h" +#include "MachSplit.h" +#include "MachSplit1.h" +//#include "MachStab.h" +//#include "MachStacked.h" +//#include "MachTabSh.h" +#include "MachTanh.h" +#include "Tools.h" +#include "MachCopy.h" + +namespace bpo = boost::program_options; + +/** + * creates a machine configuration reader + * @param bNeedConfFile true if configuration file is required on command line, false otherwise + * @param rInitBias general value for random initialization of the bias (0.1 by default) + */ +MachConfig::MachConfig (bool bNeedConfFile, REAL rInitBias) : + bSelectedOptions(false), + bHelpRequest(false), + bNeedConfFile(bNeedConfFile), + bReadMachOnly(false), + iRepeat(1), + rInitBias(rInitBias), + eErrorCode(MachConfig::NoError), + odCommandLine("Command line options"), + odSelectedConfig("Configuration options") +{ + /* set general options (in command line and configuration file) */ + + // general options in command line only + this->odCommandLine.add_options() + ("help" , "produce help message") + ("config-file,c" , bpo::value< std::vector >(), "configuration file (can be set without option name)") + ; + this->podCommandLine.add("config-file", -1); // command line may contain configuration file name without option name + + // general options in configuration file and selectable for command line + this->odGeneralConfig.add_options() + ("mach,m" , opt_sem::new_sem(), "file name of the machine") + ("src-word-list,s" , opt_sem::new_sem(), "word list of the source vocabulary") + ("tgt-word-list,w" , opt_sem::new_sem(), "word list of the vocabulary and counts (used to select the most frequent words)") + ("word-list,w" , opt_sem::new_sem(), "word list of the vocabulary and counts (used to select the most frequent words)") + ("input-file,i" , opt_sem::new_sem(), "file name of the input n-best list") + ("aux-file,a" , opt_sem::new_sem(), "file name of the auxiliary data") + ("output-file,o" , opt_sem::new_sem(), "file name of the output n-best list") + ("source-file,S" , opt_sem::new_sem(), "file name of the file with source sentences (needed for TM rescoring)") + ("phrase-table" , opt_sem::new_sem(), "rescore with a Moses on-disk phrase table") + ("phrase-table2" , opt_sem::new_sem(), "use a secondary Moses phrase table") + ("test-data,t" , opt_sem::new_sem(), "test data") + ("train-data,t" , opt_sem::new_sem(), "training data") + ("dev-data,d" , opt_sem::new_sem(), "development data (optional)") + ("lm,l" , opt_sem::new_sem(), "file name of the machine (only necessary when using short lists)") + ("output-probas" , opt_sem::new_sem(), "write sequence of log-probas to file (optional)") + ("cslm,c" , opt_sem::new_sem(), "rescore with a CSLM") + ("vocab,v" , opt_sem::new_sem(), "word-list to be used with the CSLM") + ("cstm,C" , opt_sem::new_sem(), "rescore with a CSTM") + ("vocab-source,b" , opt_sem::new_sem(), "source word-list to be used with the CSTM") + ("vocab-target,B" , opt_sem::new_sem(), "target word-list to be used with the CSTM") + ("weights,w" , opt_sem::new_sem(), "coefficients of the feature functions") + ("tm-scores,N" , opt_sem::new_sem()->default_value("4:0"), "specification of the TM scores to be used (default first 4)") + ("MachSeed,Mseed" , opt_sem ::new_sem()->default_value(0),"Machine seed for random weights init (default: do not set the seed)") + ("lrate,L" , opt_sem::new_sem()->default_value("Decay beg=5e-3 mult=7e-8 stop=0"), "learning rate applied: type (Decay AdaGrad Divide DivideAndRecover), initial value, multiplier and learning stop value") + ("inn,I" , opt_sem ::new_sem( )->default_value( 0 ), "number of hypothesis to read per n-best (default all)") + ("outn,O" , opt_sem ::new_sem( )->default_value( 0 ), "number of hypothesis to write per n-best (default all)") + ("offs,a" , opt_sem ::new_sem( )->default_value( 0 ), "add offset to n-best ID (useful for separately generated n-bests)") + ("aux-dim,n" , opt_sem ::new_sem( )->default_value( 0 ), "dimension of auxiliary data") + ("num-scores,n" , opt_sem ::new_sem( )->default_value( 5 ), "number of scores in phrase table") + ("ctxt-in,c" , opt_sem ::new_sem( )->default_value( 7 ), "input context size") + ("ctxt-out,C" , opt_sem ::new_sem( )->default_value( 7 ), "output context size") + ("curr-iter,C" , opt_sem ::new_sem( )->default_value( 0 ), "current iteration when continuing training of a neural network") + ("last-iter,I" , opt_sem ::new_sem( )->default_value( 10 ), "last iteration of neural network") + ("order" , opt_sem ::new_sem( )->default_value( 4 ), "order of the LM to apply on the test data (must match CSLM, but not necessarily back-off LM)") + ("mode,M" , opt_sem ::new_sem( )->default_value( 3 ), "mode of the data (1=IGN_BOS 2=IGN_UNK 4=IGN_UNK_ALL, 8=IGN_EOS)") + ("lm-pos,p" , opt_sem ::new_sem( )->default_value( 0 ), "position of LM score (1..n, 0 means to append it)") + ("tm-pos,P" , opt_sem ::new_sem( )->default_value( 0 ), "position of the TM scores, up to 4 values") + ("target-pos,T" , opt_sem ::new_sem( )->default_value( -1 ), "position of the predicted word in the n-gram, default: last one") + ("buf-size,b" , opt_sem ::new_sem( )->default_value(16384 ), "buffer size") + ("block-size,B" , opt_sem ::new_sem(&this->iBlockSize )->default_value( 128 ), "block size for faster training") + ("drop-out,O" , opt_sem::new_sem(&this->rPercDropOut )->default_value( 0.0), "percentage of neurons to be used for drop-out [0-1] (set by default to 0 to turn it off)") + ("random-init-project,r", opt_sem::new_sem(&this->rInitProjLayer)->default_value( 0.1), "value for random initialization of the projection layer") + ("random-init-weights,R", opt_sem::new_sem(&this->rInitWeights )->default_value( 0.1), "value for random initialization of the weights") + ("clip-weights,w" , opt_sem::new_sem(&this->rClipWeights )->default_value( 0 ), "value for clipping weights (no clipping by default)") + ("clip-gradients-weights,g",opt_sem::new_sem(&this->rClipGradWeights)->default_value(0 ), "value for clipping gradients on weights (no clipping by default)") + ("clip-gradients-bias,G", opt_sem::new_sem(&this->rClipGradBias )->default_value( 0 ), "value for clipping gradients on biases (no clipping by default)") + ("weight-decay,W" , opt_sem::new_sem( )->default_value( 3E-05), "coefficient of weight decay") + ("backward-tm,V" , opt_sem::new_sem()->zero_tokens(), "use an inverse back-ward translation model") + ("renormal,R" , opt_sem::new_sem()->zero_tokens(), "renormalize all probabilities, slow for large short-lists") + ("recalc,r" , opt_sem::new_sem()->zero_tokens(), "recalculate global scores") + ("sort,s" , opt_sem::new_sem()->zero_tokens(), "sort n-best list according to the global scores") + ("lexical,h" , opt_sem::new_sem()->zero_tokens(), "report number of lexically different hypothesis") + ("server,X" , opt_sem::new_sem()->zero_tokens(), "run in server mode listening to a named pipe to get weights for new solution extraction") + ("unstable-sort,U" , opt_sem::new_sem()->zero_tokens(), "use unstable sort (compatility mode with older version of the CSLM toolkit)") + ("use-word-class,u" , opt_sem::new_sem()->zero_tokens(), "use word class to structure the output layer") + ("dump-activities,A" , opt_sem >::new_sem(), "specify layer and filename to dump the activity for each n-gram (eg \"3:layer3.txt\")") +#ifdef BLAS_CUDA + ("cuda-device,D" , opt_sem >::new_sem(), "select CUDA device (eg \"0:2\" for devices 0 and 2)") + ("cuda-dev-num,N" , opt_sem::new_sem()->default_value(1), "number of CUDA devices to be used") +#endif + ; + + + /* set machine names */ + + // machine names are defined in configuration file options to be recognized as valid options + this->odMachineTypes.add_options() + ("machine.Mach" , bpo::value >()) + ("machine.Tab" , bpo::value >()) + ("machine.Linear" , bpo::value >()) + ("machine.LinRectif" , bpo::value >()) + ("machine.Sig" , bpo::value >()) + ("machine.Tanh" , bpo::value >()) + ("machine.Softmax" , bpo::value >()) + ("machine.SoftmaxStable", bpo::value >()) + ("machine.SoftmaxClass" , bpo::value >()) + ("machine.Multi" , bpo::value >()) + ("machine.Sequential" , bpo::value >()) + ("machine.Parallel" , bpo::value >()) + ("machine.Split" , bpo::value >()) + ("machine.Split1" , bpo::value >()) + ("machine.Join" , bpo::value >()) + ("machine.Combined" , bpo::value >()) + ("machine.Avr" , bpo::value >()) + ("machine.Copy" , bpo::value >()) + ; + this->odGeneralConfig.add(this->odMachineTypes); + + + /* set dimension constant names */ + + char sDimVar[10]; + for (char c = 1 ; 20 >= c ; c++) { + sprintf(sDimVar, "DIM%d", c); + this->odGeneralConfig.add_options()(sDimVar, bpo::value()); + } + + /* set machine specific options */ + + // machine options for many machine types except multiple machines + this->odMachineConf.add_options() + ("input-dim" , bpo::value ()->required(), "input dimension") + ("output-dim" , bpo::value ()->required(), "output dimension") + ("nb-forward" , bpo::value ()->default_value(0), "forward number") + ("nb-backward" , bpo::value ()->default_value(0), "backward number") + ("update" , bpo::value(), "update parameters during backward (default true)") + ("lrate-coeff" , bpo::value(), "layer specific coefficient of the learning rate (default 1.0)") + ("share-id" , bpo::value ()->default_value(-1), "All machines sharing the same share-id will share their weights (default is all machines share their weights)") + ; + + // machine options for all machine types (including multiple machines) + this->odMachMultiConf.add_options() + ("drop-out" , bpo::value(), "percentage of neurons to be used for drop-out [0-1], set to 0 to turn it off") + ("block-size" , bpo::value (), "block size for faster training") + ("init-from-file" , bpo::value(), "name of file containing all machine data") + ("name" , bpo::value(), "name of machine (used internally)") + ("clone" , bpo::value(), "replace current machine by a copy of previous machine with given name (sharing the parameters)") + ; + this->odMachineConf.add(this->odMachMultiConf); + + // machine options for multiple machine types ONLY + this->odMachMultiConf.add_options() + ("repeat" , bpo::value()->default_value(1), "repeat the inner machines N times") + ; + + // machine options for linear machines (base class MachLin) + this->odMachLinConf.add_options() + ("const-init-weights" , bpo::value(), "constant value for initialization of the weights") + ("ident-init-weights" , bpo::value(), "initialization of the weights by identity transformation") + ("fani-init-weights" , bpo::value(), "random initialization of the weights by function of fan-in") + ("fanio-init-weights" , bpo::value(), "random initialization of the weights by function of fan-in and fan-out") + ("random-init-weights" , bpo::value(), "value for random initialization of the weights (method used by default with general value)") + ("const-init-bias" , bpo::value(), "constant value for initialization of the bias") + ("random-init-bias" , bpo::value(), "value for random initialization of the bias (method used by default with general value)") + ("clip-weights" , bpo::value(), "value for clipping weights (used by default with general value)") + ("clip-gradients-weights",bpo::value(), "value for clipping gradients on weights (used by default with general value)") + ("clip-gradients-bias" , bpo::value(), "value for clipping gradients on biases (used by default with general value)") + ; + this->odMachLinConf.add(this->odMachineConf); + + // machine options for table lookup machines (base class MachTab) + this->odMachTabConf.add_options() + ("const-init-project" , bpo::value(), "constant value for initialization of the projection layer") + ("random-init-project" , bpo::value(), "value for random initialization of the projection layer (method used by default with general value)") + ; + this->odMachTabConf.add(this->odMachineConf); + + +} + +/** + * parses options from command line and configuration file + * @param iArgCount number of command line arguments + * @param sArgTable table of command line arguments + * @return false in case of error or help request, true otherwise + * @note error code is set if an error occurred + */ +bool MachConfig::parse_options (int iArgCount, char *sArgTable[]) +{ + this->vmGeneralOptions.clear(); + + // program name + if (iArgCount > 0) { + this->sProgName = sArgTable[0]; + size_t stEndPath = this->sProgName.find_last_of("/\\"); + if (stEndPath != std::string::npos) + this->sProgName.erase(0, stEndPath + 1); + } + else + this->sProgName.clear(); + + // set option list used by the application + bpo::options_description odUsedOptions; + odUsedOptions.add(this->odCommandLine); + odUsedOptions.add(this->odSelectedConfig); + + // parse command line + try { + bpo::store(bpo::command_line_parser(iArgCount, sArgTable).options(odUsedOptions).positional(this->podCommandLine).run(), this->vmGeneralOptions); + + // verify help option + this->bHelpRequest = (this->vmGeneralOptions.count("help") > 0); + if (this->bHelpRequest) + return false; + + // get configuration file name + std::vector vs; + std::string sConfFileOpt("config-file"); + if (this->vmGeneralOptions.count(sConfFileOpt) > 0) + vs = this->vmGeneralOptions[sConfFileOpt].as< std::vector >(); + switch (vs.size()) { + case 1: + this->sConfFile = vs.front(); + break; + case 0: + this->sConfFile.clear(); + if (this->bNeedConfFile) { + // error: configuration file is required + throw bpo::required_option(sConfFileOpt); + } + else { + // don't parse configuration file, so notify command line parsing + bpo::notify(this->vmGeneralOptions); + return true; + } + break; + default: + bpo::multiple_occurrences mo; + mo.set_option_name(sConfFileOpt); + throw mo; + break; + } + + } catch (bpo::error &e) { + // error handling + this->eErrorCode = MachConfig::CmdLineParsingError; + this->ossErrorInfo.str(e.what()); + return false; + } + + // open configuration file + if (!this->open_file()) + return false; + + try { + // parse configuration file and parse command line one more time (to be sure to use selected options with the good attributes) + bpo::store(bpo::parse_config_file(this->ifsConf, this->odGeneralConfig), this->vmGeneralOptions); + bpo::store(bpo::command_line_parser(iArgCount, sArgTable).options(odUsedOptions).positional(this->podCommandLine).run(), this->vmGeneralOptions); + bpo::notify(this->vmGeneralOptions); + } catch (bpo::error &e) { + // error handling + this->eErrorCode = MachConfig::ConfigParsingError; + this->ossErrorInfo.str(e.what()); + return false; + } + + // remove unused information (machine structure which will be read without boost) + const std::vector >& vodMachOpt = this->odMachineTypes.options(); + std::vector >::const_iterator iEnd = vodMachOpt.end(); + for (std::vector >::const_iterator iO = vodMachOpt.begin() ; iO != iEnd ; iO++) { + bpo::option_description *pod = iO->get(); + if (pod != NULL) + this->vmGeneralOptions.erase(pod->long_name()); + } + + return true; +} + +/** + * prints help message on standard output + */ +void MachConfig::print_help () const +{ + std::cout << + "Usage: " << this->sProgName << " [options]" << std::endl << + " " << this->sProgName << " configuration_file_name [options]" << std::endl << + std::endl << this->odCommandLine << std::endl; + if (this->bSelectedOptions) + std::cout << this->odSelectedConfig << std::endl; +} + +/** + * reads machine structure from configuration file + * @return new machine object, or NULL in case of error + * @note error code is set if an error occurred + */ +Mach *MachConfig::get_machine () +{ + // open configuration file + if (!this->open_file()) + return NULL; + + // search for "machine" group + std::string sRead; + char sMachGroup[] = "[machine]"; + do { + this->ifsConf >> sRead; + std::ios_base::iostate iost = this->ifsConf.rdstate(); + if (iost) { + // error handling + if (iost & std::ios_base::eofbit) + this->eErrorCode = MachConfig::NoMachineGroup; + else + this->eErrorCode = MachConfig::ProbSearchMachGroup; + return NULL; + } + } while (sRead != sMachGroup); + + Mach::SetFileId(file_header_version); //Loic: needed to create old machines with new code + + // read machine structure + this->bReadMachOnly = false; + this->eErrorCode = MachConfig::NoError; + Mach *pNextMach = NULL; + this->read_next_machine(pNextMach, this->iBlockSize); + if ((this->eErrorCode != MachConfig::NoError) && (pNextMach != NULL)) { + delete pNextMach; + pNextMach = NULL; + } + this->mMachNameMap.clear(); + return pNextMach; +} + +/** + * get last error + * @return error string + */ +std::string MachConfig::get_error_string() const +{ + std::string sError; + + // get string + switch (this->eErrorCode) { + case MachConfig::NoError: + return std::string(); + break; + case MachConfig::CmdLineParsingError: + sError = "command line error: "; + sError += this->ossErrorInfo.str(); + return sError; + break; + case MachConfig::ProbOpenConfigFile: + sError = "can't open configuration file \""; + sError += this->sConfFile; + sError += '\"'; + return sError; + break; + case MachConfig::ConfigParsingError: + sError = "configuration error: "; + sError += this->ossErrorInfo.str(); + return sError; + break; + case MachConfig::NoMachineGroup: + return "no [machine] group in configuration file"; + break; + case MachConfig::ProbSearchMachGroup: + return "internal error while searching [machine] group"; + break; + case MachConfig::MachDescrIncomplete: + return "machine description is not complete"; + break; + case MachConfig::ProbReadMachName: + return "internal error while reading machine type name"; + break; + case MachConfig::UnknownMachType: + sError = "unknown machine type \""; + break; + case MachConfig::UnknownMachName: + sError = "unknown machine name \""; + break; + case MachConfig::UnknownMachCode: + sError = "unknown machine code "; + sError += this->ossErrorInfo.str(); + return sError; + break; + case MachConfig::MachWithoutEqualChar: + sError = "no equal character after machine name in \""; + break; + case MachConfig::ProbReadMachParams: + sError = "internal error while reading machine parameters in \""; + break; + case MachConfig::MachParamsParsingError: + sError = "machine parameters error in \""; + sError += this->ossErrorInfo.str(); + return sError; + break; + case MachConfig::ProbOpenMachineFile: + sError = "can't open machine data file \""; + break; + case MachConfig::ProbAllocMachine: + sError = "can't allocate machine \""; + break; + default: + std::ostringstream oss; + oss << "unknown error " << this->eErrorCode; + return oss.str(); + break; + }; + + // append machine type + sError += this->ossErrorInfo.str(); + sError += '\"'; + + return sError; +} + +/** + * get file name of the machine (or void string if not set) + * @note if mach option value is "%CONF", file name will be same as configuration file (without extension ".conf") followed by extension ".mach" + */ +std::string MachConfig::get_mach () const +{ + const boost::program_options::variable_value &vvM = this->vmGeneralOptions["mach"]; + if (vvM.empty()) + // mach option not set + return std::string(); + else { + const std::string &sMachOpt = vvM.as(); + if ((sMachOpt == "%CONF") && !this->sConfFile.empty()) { + size_t stConfFileLen = this->sConfFile.length(); + + std::string sConfExt(".conf"); + size_t stConfExtLen = sConfExt.length(); + + // verify config-file extension + if ( ( stConfFileLen >= stConfExtLen ) + && (this->sConfFile.compare(stConfFileLen - stConfExtLen, stConfExtLen, sConfExt) == 0) ) + stConfFileLen -= stConfExtLen; + + // return mach value as config-file value with new extension + std::string sMachVal(this->sConfFile, 0, stConfFileLen); + sMachVal.append(".mach"); + return sMachVal; + } + else + // return mach value as set + return sMachOpt; + } +} + +#ifdef BLAS_CUDA +/** + * get CUDA devices + * @returns list of indexes (eg ":0:2" for devices 0 and 2) or number of devices + */ +std::string MachConfig::get_cuda_devices () const +{ + std::string sCudaDev; + if (this->vmGeneralOptions.count("cuda-device") > 0) { + // concatenate all device selections (for backward compatibility) + std::vector vsInput = this->vmGeneralOptions["cuda-device"].as >(); + for (std::vector::const_iterator vsci = vsInput.begin() ; vsci != vsInput.end() ; vsci++) + (sCudaDev += ':') += *vsci; + } + else { + // get number of devices + std::ostringstream oss; + oss << this->vmGeneralOptions["cuda-dev-num"].as(); + sCudaDev = oss.str(); + } + return sCudaDev; +} +#endif + +/** + * open configuration file + * @return false in case of error, true otherwise + */ +bool MachConfig::open_file () +{ + this->ifsConf.close(); + this->ifsConf.clear(); + + this->ifsConf.open(this->sConfFile.c_str(), std::ios_base::in); + if (this->ifsConf.fail()) { + this->eErrorCode = MachConfig::ProbOpenConfigFile; + return false; + } + else { + this->ifsConf.clear(); + return true; + } +} + +/** + * reads next machine block from configuration file + * @param pNewMach set to new machine object pointer, or NULL if 'end' mark is read (and possibly in case of error) + * @param iBlockSize block size for faster training + * @return true if 'end' mark is read, false otherwise + * @note error code is set if an error occurred + */ +bool MachConfig::read_next_machine (Mach *&pNewMach, int iBlockSize) +{ + // read machine type name + std::string sMachType; + const char *sMachType_cstr; + do { + this->ossErrorInfo.str(sMachType); + this->ifsConf >> sMachType; + std::ios_base::iostate iost = this->ifsConf.rdstate(); + if (iost) { + // error handling + if (iost & std::ios_base::eofbit) + this->eErrorCode = MachConfig::MachDescrIncomplete; + else + this->eErrorCode = MachConfig::ProbReadMachName; + this->ossErrorInfo << sMachType; + pNewMach = NULL; + return false; + } + sMachType_cstr = sMachType.c_str(); + + // discard comments / read 'end' mark + if ('#' == sMachType_cstr[0]) { + if (strcasecmp(sMachType_cstr, "#End") == 0) { + pNewMach = NULL; + return true; + } + else { + std::stringbuf sb; + this->ifsConf.get(sb); + this->ifsConf.clear(); + sMachType_cstr = NULL; + } + } + } while (NULL == sMachType_cstr); + + // verify if name contains equal sign + size_t stEqualPos = sMachType.find('=', 1); + if (stEqualPos != std::string::npos) { + this->ifsConf.seekg(stEqualPos - sMachType.length(), std::ios_base::cur); + this->ifsConf.clear(); + sMachType.resize(stEqualPos); + } + this->ossErrorInfo << sMachType; + + // get machine type + int iMachType; + bool bMachLin = false; + bool bMachMulti = false; + bool bMachTab = false; + if (strcasecmp(sMachType_cstr, "Mach") == 0) { + iMachType = file_header_mtype_base; + } + else if (strcasecmp(sMachType_cstr, "Tab") == 0) { + iMachType = file_header_mtype_tab; + bMachTab = true; + } + /*else if (strcasecmp(sMachType_cstr, "Tabsh") == 0) { + iMachType = file_header_mtype_tabsh; + bMachTab = true; + }*/ + else if (strcasecmp(sMachType_cstr, "Linear") == 0) { + iMachType = file_header_mtype_lin; + bMachLin = true; + } + else if (strcasecmp(sMachType_cstr, "Copy") == 0) { + iMachType = file_header_mtype_copy; + } + else if (strcasecmp(sMachType_cstr, "Sig") == 0) { + iMachType = file_header_mtype_sig; + bMachLin = true; + } + else if (strcasecmp(sMachType_cstr, "Tanh") == 0) { + iMachType = file_header_mtype_tanh; + bMachLin = true; + } + else if (strcasecmp(sMachType_cstr, "Softmax") == 0) { + iMachType = file_header_mtype_softmax; + bMachLin = true; + } + /*else if (strcasecmp(sMachType_cstr, "Stab") == 0) { + iMachType = file_header_mtype_stab; + bMachLin = true; + }*/ + else if (strcasecmp(sMachType_cstr, "SoftmaxClass") == 0) { + iMachType = file_header_mtype_softmax_class; + bMachLin = true; + } + else if (strcasecmp(sMachType_cstr, "SoftmaxStable") == 0) { + iMachType = file_header_mtype_softmax_stable; + bMachLin = true; + } + else if (strcasecmp(sMachType_cstr, "LinRectif") == 0) { + iMachType = file_header_mtype_lin_rectif; + bMachLin = true; + } + else { + bMachMulti = true; + if (strcasecmp(sMachType_cstr, "Multi") == 0) + iMachType = file_header_mtype_multi; + else if (strcasecmp(sMachType_cstr, "Sequential") == 0) + iMachType = file_header_mtype_mseq; + else if (strcasecmp(sMachType_cstr, "Split1") == 0) + iMachType = file_header_mtype_msplit1; + else if (strcasecmp(sMachType_cstr, "Parallel") == 0) + iMachType = file_header_mtype_mpar; + else if (strcasecmp(sMachType_cstr, "Split") == 0) + iMachType = file_header_mtype_msplit; + else if (strcasecmp(sMachType_cstr, "Combined") == 0) + iMachType = file_header_mtype_combined; + /*else if (strcasecmp(sMachType_cstr, "Max") == 0) // under development + iMachType = file_header_mtype_max;*/ // under development + else if (strcasecmp(sMachType_cstr, "Avr") == 0) + iMachType = file_header_mtype_avr; + /*else if (strcasecmp(sMachType_cstr, "Stacked") == 0) // under development + iMachType = file_header_mtype_mstack; */ // under development + else if (strcasecmp(sMachType_cstr, "Join") == 0) + iMachType = file_header_mtype_mjoin; + else { + // error handling + this->eErrorCode = MachConfig::UnknownMachType; + pNewMach = NULL; + return false; + } + } + + // create machine + if (bMachMulti) + pNewMach = this->read_multi_machine (iMachType, iBlockSize); + else + pNewMach = this->read_simple_machine(iMachType, iBlockSize, bMachLin, bMachTab); + return false; +} + +/** + * creates a multiple machine, reads his parameters and reads submachine blocks + * @param iMachType type of multiple machine + * @param iBlockSize block size for faster training + * @return new machine object (may be NULL in case of error) + * @note error code is set if an error occurred + */ +Mach *MachConfig::read_multi_machine (int iMachType, int iBlockSize) +{ + Mach *pNewMach = NULL; + MachMulti *pMachMulti = NULL; + bool bNoCloneOrInit = true; + + // read machine parameters + bpo::variables_map vmMachParams; + if (!this->read_machine_parameters(this->odMachMultiConf, vmMachParams)) + return NULL; + + // get current block size (get current machine block size if defined, or block size in parameter) + const boost::program_options::variable_value &vvBS = vmMachParams["block-size"]; + int iCurBlockSize = (vvBS.empty() ? iBlockSize : vvBS.as()); + + // get current repeat content (get current repeat value if defined) + const boost::program_options::variable_value &vvRPT = vmMachParams["repeat"]; + int iCurRepeat = (vvRPT.empty() ? iRepeat : vvRPT.as()); + + // verify if machine structure must be read without creating new object + if (!this->bReadMachOnly) { + if (bNoCloneOrInit) { + // verify if machine is copied from other one + const boost::program_options::variable_value &vvC = vmMachParams["clone"]; + if (!vvC.empty()) { + std::string sOtherName = vvC.as(); + if (this->mMachNameMap.count(sOtherName) > 0) { + pNewMach = this->mMachNameMap[sOtherName]->Clone(); + sOtherName.clear(); + } + if (pNewMach == NULL) { + // error handling + if (sOtherName.empty()) + this->eErrorCode = MachConfig::ProbAllocMachine; + else { + this->ossErrorInfo.str(sOtherName); + this->eErrorCode = MachConfig::UnknownMachName; + } + return NULL; + } + bNoCloneOrInit = false; + } + } + if (bNoCloneOrInit) { + // verify if machine is read from a file + const boost::program_options::variable_value &vvIFF = vmMachParams["init-from-file"]; + if (!vvIFF.empty()) { + pNewMach = this->read_machine_from_file(vvIFF.as(), iCurBlockSize, vmMachParams); + if (pNewMach == NULL) + // error handling + return NULL; + bNoCloneOrInit = false; + } + } + if (bNoCloneOrInit) { + // instantiate multi machine corresponding to given type + switch (iMachType) { + case file_header_mtype_multi: + pMachMulti = new MachMulti; + break; + case file_header_mtype_mseq: + pMachMulti = new MachSeq; + break; + case file_header_mtype_msplit1: + pMachMulti = new MachSplit1; + break; + case file_header_mtype_mpar: + pMachMulti = new MachPar; + break; + case file_header_mtype_msplit: + pMachMulti = new MachSplit; + break; + case file_header_mtype_combined: + pMachMulti = new MachCombined; + break; + /*case file_header_mtype_max: // under development + pMachMulti = new MachMax; + break;*/ + case file_header_mtype_avr: + pMachMulti = new MachAvr; + break; + /*case file_header_mtype_mstack: // under development + pMachMulti = new MachStacked; + break; */ + case file_header_mtype_mjoin: + pMachMulti = new MachJoin; + break; + default: + this->eErrorCode = MachConfig::UnknownMachCode; + this->ossErrorInfo.str(std::string()); + this->ossErrorInfo << iMachType; + return NULL; + break; + } + if (pMachMulti == NULL) { + // error handling + this->eErrorCode = MachConfig::ProbAllocMachine; + return NULL; + } + pNewMach = pMachMulti; + + // apply drop-out parameter (current machine drop-out value if defined, or general value) + const boost::program_options::variable_value &vvDO = vmMachParams["drop-out"]; + pNewMach->SetDropOut(vvDO.empty() ? this->rPercDropOut : vvDO.as()); + + // store name of machine if defined + const boost::program_options::variable_value &vvN = vmMachParams["name"]; + if (!vvN.empty()) + this->mMachNameMap[vvN.as()] = pNewMach; + } + else + this->bReadMachOnly = true; + } + + // read submachines +#ifdef BLAS_CUDA + size_t stMachConf = ((pMachMulti != NULL) ? pMachMulti->GetGpuConfig() : 0); + bool bChangeDev = ((Gpu::GetDeviceCount() > 1) && (pMachMulti != NULL) && ( + (iMachType == file_header_mtype_msplit) + || (iMachType == file_header_mtype_mjoin ) + )); +#endif + do { +#ifdef BLAS_CUDA + if (bChangeDev) + Gpu::NewConfig(); +#endif + Mach *pSubMach = NULL; + if (this->read_next_machine(pSubMach, iCurBlockSize)) + break; + else if (pSubMach != NULL) { + // handle errors + if (this->eErrorCode != MachConfig::NoError) { + delete pSubMach; + break; + } + + // add new submachine to multi machine + if (pMachMulti != NULL) { + pMachMulti->MachAdd(pSubMach); +#ifdef BLAS_CUDA + Gpu::SetConfig(pSubMach->GetGpuConfig()); +#endif + } + } + } while (this->eErrorCode == MachConfig::NoError); +#ifdef BLAS_CUDA + Gpu::SetConfig(stMachConf); // reset to multi machine GPU +#endif + + if(iCurRepeat > 1){ + int nb = pMachMulti->MachGetNb(); + cout << " - repeating these " << nb << " machine(s) " << iCurRepeat << " times" << endl; + for(int i=0; iMachGet(j)->Clone(); + pMachMulti->MachAdd(pClonedMach); + } + } + } + + if (!bNoCloneOrInit) + this->bReadMachOnly = false; + return pNewMach; +} + +/** + * creates a simple machine and reads his parameters + * @param iMachType type of simple machine + * @param iBlockSize block size for faster training + * @param bMachLin true if the machine is a linear machine, default false otherwise + * @param bMachTab true if the machine is a table lookup machine, default false otherwise + * @return new machine object (may be NULL in case of error) + * @note error code is set if an error occurred + */ +Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMachLin, bool bMachTab) +{ + Mach *pNewMach = NULL; + bool bNoCloneOrInit = true; + int iShareId=-1; + + // read machine parameters + bpo::variables_map vmMachParams; + if (!this->read_machine_parameters (bMachLin ? this->odMachLinConf : (bMachTab ? this->odMachTabConf : this->odMachineConf), vmMachParams)) + return NULL; + + // verify if machine structure must be read without creating new object + if (this->bReadMachOnly) + return NULL; + + // get current block size (get current machine block size if defined, or block size in parameter) + const boost::program_options::variable_value &vvBS = vmMachParams["block-size"]; + int iCurBlockSize = (vvBS.empty() ? iBlockSize : vvBS.as()); + + if (bNoCloneOrInit) { + // verify if machine is copied from other one + const boost::program_options::variable_value &vvC = vmMachParams["clone"]; + if (!vvC.empty()) { + std::string sOtherName = vvC.as(); + if (this->mMachNameMap.count(sOtherName) > 0) { + pNewMach = this->mMachNameMap[sOtherName]->Clone(); + sOtherName.clear(); + } + if (pNewMach == NULL) { + // error handling + if (sOtherName.empty()) + this->eErrorCode = MachConfig::ProbAllocMachine; + else { + this->ossErrorInfo.str(sOtherName); + this->eErrorCode = MachConfig::UnknownMachName; + } + } + bNoCloneOrInit = false; + } + } + if (bNoCloneOrInit) { + // verify if machine is read from a file + const boost::program_options::variable_value &vvIFF = vmMachParams["init-from-file"]; + if (!vvIFF.empty()) { + pNewMach = this->read_machine_from_file(vvIFF.as(), iCurBlockSize, vmMachParams); + bNoCloneOrInit = false; + } + } + if (bNoCloneOrInit) { + // get dimension values + int iInputDim = vmMachParams[ "input-dim"].as(); + int iOutputDim = vmMachParams["output-dim"].as(); + + // get forward and backward numbers + int iNbForward = vmMachParams["nb-forward" ].as(); + int iNbBackward = vmMachParams["nb-backward"].as(); + + bool bNewShareId = false; // apply general parameters only if machine with new share-id or no-share (-1) + // instantiate simple machine corresponding to given type + MachLin *pMachLin = NULL; + MachCopy *pMachCopy = NULL; + MachTab *pMachTab = NULL; + + iShareId = vmMachParams["share-id"].as(); + if(iShareId != -1 && prSharedMachines[iShareId] != NULL) { + //TODO: should we check the machine type also? + if(prSharedMachines[iShareId]->GetMType() != iMachType){ + cerr << "WARNING: machines sharing weights have not the same type, check the config file!" << endl; + } + if(iMachType == file_header_mtype_tab){ + if (prSharedMachines[iShareId]->GetIdim()!=1 || iOutputDim != prSharedMachines[iShareId]->GetOdim()){ + Error("MachTab sharing weights have not the same input/output size, check the config file!"); + } + } + else if(iInputDim != prSharedMachines[iShareId]->GetIdim() || iOutputDim != prSharedMachines[iShareId]->GetOdim()){ + cerr << "mach[" << iShareId << "]->idim=" << prSharedMachines[iShareId]->GetIdim() << " idim=" << iInputDim << endl; + cerr << "mach[" << iShareId << "]->odim=" << prSharedMachines[iShareId]->GetOdim() << " odim=" << iOutputDim << endl; + Error("Machines sharing weights have not the same input/output size, check the config file!"); + } + cout << "Cloning previous machine with share-id " << iShareId << endl; + pNewMach = prSharedMachines[iShareId]->Clone(); + if(iMachType == file_header_mtype_lin) pMachLin = (MachLin*) pNewMach; + else if(iMachType == file_header_mtype_tab) pMachTab = (MachTab*) pNewMach; + } else if(iShareId == -1 && prSharedMachines[iShareId] != NULL && iMachType == file_header_mtype_tab) { + // special case for MachTab + // All MachTab share their weights by default. This is for compatibility with previously built system + // cout << "Create MachTab with share-id " << iShareId << " -> cloning existing machine with that share-id" << endl; + if(iInputDim != prSharedMachines[iShareId]->GetIdim() || iOutputDim != prSharedMachines[iShareId]->GetOdim()){ + Error("Machines sharing weights have not the same input/output size, check the config file!"); + } + pNewMach = pMachTab = ((MachTab*)prSharedMachines[iShareId])->Clone(); + } else { + if(iShareId==-1) cout << "Creating new machine with no share-id" << endl; + else cout << "Creating new machine with share-id " << iShareId << endl; + switch (iMachType) { + case file_header_mtype_base: + pNewMach = new Mach(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward); + break; + case file_header_mtype_tab: + pNewMach = pMachTab = new MachTab(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + case file_header_mtype_lin: + pNewMach = pMachLin = new MachLin(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + case file_header_mtype_copy: + pNewMach = pMachCopy = new MachCopy(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward); + break; + case file_header_mtype_lin_rectif: + pNewMach = pMachLin = new MachLinRectif(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + case file_header_mtype_sig: + pNewMach = pMachLin = new MachSig(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + case file_header_mtype_tanh: + pNewMach = pMachLin = new MachTanh(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + case file_header_mtype_softmax: + pNewMach = pMachLin = new MachSoftmax(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + /*case file_header_mtype_stab: + pNewMach = pMachLin = MachStab(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward); + break;*/ + case file_header_mtype_softmax_stable: + pNewMach = pMachLin = new MachSoftmaxStable(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + case file_header_mtype_softmax_class: + pNewMach = pMachLin = new MachSoftmaxClass(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + break; + default: + this->eErrorCode = MachConfig::UnknownMachCode; + this->ossErrorInfo.str(std::string()); + this->ossErrorInfo << iMachType; + return NULL; + break; + } + if(iShareId != -1){ + prSharedMachines[iShareId] = pNewMach; + } + bNewShareId = true; + } + + if (pNewMach == NULL) { + // error handling + this->eErrorCode = MachConfig::ProbAllocMachine; + return NULL; + } + + // apply update parameter if defined + const boost::program_options::variable_value &vvU = vmMachParams["update"]; + if (!vvU.empty()) + pNewMach->SetUpdataParams(vvU.as()); + + // apply lrate-coeff parameter if defined + const boost::program_options::variable_value &vvLRC = vmMachParams["lrate-coeff"]; + if (!vvLRC.empty()) + pNewMach->SetLrateCoeff(vvLRC.as()); + + // apply drop-out parameter (current machine drop-out value if defined, or general value) + const boost::program_options::variable_value &vvDO = vmMachParams["drop-out"]; + pNewMach->SetDropOut(vvDO.empty() ? this->rPercDropOut : vvDO.as()); + + // store name of machine if defined + const boost::program_options::variable_value &vvN = vmMachParams["name"]; + if (!vvN.empty()) + this->mMachNameMap[vvN.as()] = pNewMach; + + // initialize MachLin + if (pMachLin != NULL) + this->apply_machine_parameters(pMachLin, vmMachParams, bNewShareId); + + // initialize MachTab + if (pMachTab != NULL) + this->apply_machine_parameters(pMachTab, vmMachParams, bNewShareId); + } + + return pNewMach; +} + +/** + * reads machine parameters and fills it in given map + * @param odMachineConf available options for the machine + * @param vmMachParams map filled with parameters read + * @return false in case of error, true otherwise + */ +bool MachConfig::read_machine_parameters (const bpo::options_description &odMachineConf, bpo::variables_map &vmMachParams) +{ + // read equal character + char cEqual = ' '; + this->ifsConf >> cEqual; + bool bNoEqualChar = (cEqual != '='); + + // read until end of line + std::stringbuf sbParamsLine; + this->ifsConf.get(sbParamsLine); + + // handle errors + if (this->ifsConf.bad() || bNoEqualChar) { + if (bNoEqualChar) + this->eErrorCode = MachConfig::MachWithoutEqualChar; + else + this->eErrorCode = MachConfig::ProbReadMachParams; + this->ossErrorInfo << ' ' << cEqual << sbParamsLine.str(); + return false; + } + this->ifsConf.clear(); + + // read abbreviated dimensions (ex: " 128 X 256 ", "DIM0xDIM1") + std::istringstream issParamsLine(sbParamsLine.str()); + std::vector vDims; + vDims.resize(2); + issParamsLine >> vDims[0]; + std::size_t stPos = vDims[0].find_first_of("xX"); + char cCross; + if (std::string::npos == stPos) + issParamsLine >> cCross >> vDims[1]; + else { + cCross = vDims[0][stPos++]; + if ('\0' == vDims[0][stPos]) + issParamsLine >> vDims[1]; + else + vDims[1] = vDims[0].substr(stPos); + vDims[0].erase(stPos - 1); + } + + // replace dimension constants by their values + for (std::vector::iterator it = vDims.begin() ; it != vDims.end() ; it++) { + const boost::program_options::variable_value &vv = this->vmGeneralOptions[*it]; + if (!vv.empty()) + try { + std::ostringstream oss; + oss << vv.as(); + (*it) = oss.str(); + } catch (boost::bad_any_cast&) {} + } + + // verify dimensions + bpo::parsed_options poDims(&odMachineConf); + if ((!issParamsLine.fail()) && (('x' == cCross) || ('X' == cCross))) { + // dimensions available + poDims.options.push_back(bpo::option(std::string( "input-dim"), std::vector(1, vDims[0]))); + poDims.options.push_back(bpo::option(std::string("output-dim"), std::vector(1, vDims[1]))); + } + else { + // no abbreviated dimensions + issParamsLine.clear(); + issParamsLine.seekg(0); + } + + // read other machine parameters + try { + std::stringbuf sbOtherParams; + issParamsLine.get(sbOtherParams); + bpo::store(poDims, vmMachParams); + bpo::store( + bpo::command_line_parser(std::vector(1, sbOtherParams.str())). + extra_style_parser(MachConfig::parse_mach_params).options(odMachineConf).run(), vmMachParams); + bpo::notify(vmMachParams); + } + catch (bpo::error &e) { + // error handling + this->eErrorCode = MachConfig::MachParamsParsingError; + this->ossErrorInfo << " =" << sbParamsLine.str() << "\": " << e.what(); + return false; + } + + return true; +} + +/** + * parses machine parameters + * @param vsTokens vector of tokens + * @return vector of options + * @note throws exception of class boost::program_options::error in case of error + */ +std::vector MachConfig::parse_mach_params (const std::vector &vsTokens) +{ + std::vector voParsed; + + // put tokens in stream + std::stringstream ssTokens; + std::vector::const_iterator iEnd = vsTokens.end(); + for (std::vector::const_iterator iT = vsTokens.begin() ; iT != iEnd ; iT++) + ssTokens << *iT << ' '; + + // read parameters + ParseParametersLine(ssTokens, voParsed); + + // handle errors + if (ssTokens.bad()) + throw bpo::error("internal stream error"); + + return voParsed; +} + +/** + * creates a machine by reading his data from file + * @param sFileName machine file name + * @param iBlockSize block size for faster training + * @param vmMachParams map of parameters read + * @return new machine object or NULL in case of error + * @note error code is set if an error occurred + */ +Mach *MachConfig::read_machine_from_file(const std::string &sFileName, int iBlockSize, const bpo::variables_map &vmMachParams) +{ + std::ifstream ifs; + this->ossErrorInfo.str(sFileName); + + // open file + ifs.open(sFileName.c_str(), std::ios_base::in); + if (ifs.fail()) { + // error handling + this->eErrorCode = MachConfig::ProbOpenMachineFile; + return NULL; + } + + // read file + Mach *pNewMach = Mach::Read(ifs, iBlockSize); + if (pNewMach == NULL) { + // error handling + this->eErrorCode = MachConfig::ProbAllocMachine; + return NULL; + } + + // apply machine forward and backward parameters (set to 0 if not defined) + const boost::program_options::variable_value &vvNF = vmMachParams["nb-forward" ]; + const boost::program_options::variable_value &vvNB = vmMachParams["nb-backward"]; + pNewMach->SetNbEx(vvNF.empty() ? 0 : vvNF.as(), + vvNB.empty() ? 0 : vvNB.as() ); + + // apply update parameter if defined + const boost::program_options::variable_value &vvU = vmMachParams["update"]; + if (!vvU.empty()) + pNewMach->SetUpdataParams(vvU.as()); + + // apply machine drop-out parameter if defined + const boost::program_options::variable_value &vvDO = vmMachParams["drop-out"]; + if (!vvDO.empty()) + pNewMach->SetDropOut(vvDO.as()); + + // initialize MachLin + MachLin *pMachLin = dynamic_cast(pNewMach); + if (pMachLin != NULL) { + this->apply_machine_parameters(pMachLin, vmMachParams); + return pNewMach; + } + + // initialize MachTab + MachTab *pMachTab = dynamic_cast(pNewMach); + if (pMachTab != NULL) { + this->apply_machine_parameters(pMachTab, vmMachParams); + return pNewMach; + } + + return pNewMach; +} + +/** + * applies parameters to given linear machine + * @note block size parameter is not applied here + * @param pMachLin pointer to linear machine object + * @param vmMachParams map of parameters + * @param bApplyGenVal true to apply general values to parameters as needed, default false otherwise + */ +void MachConfig::apply_machine_parameters(MachLin *pMachLin, const bpo::variables_map &vmMachParams, bool bApplyGenVal) const +{ + if (pMachLin != NULL) { + bool bWeigthsNotInit = bApplyGenVal; + bool bBiasNotInit = bApplyGenVal; + + // constant value for initialization of the weights + const boost::program_options::variable_value &vvCIW = vmMachParams["const-init-weights"]; + if (!vvCIW.empty()) { + pMachLin->WeightsConst(vvCIW.as()); + bWeigthsNotInit = false; + } + + // initialization of the weights by identity transformation + const boost::program_options::variable_value &vvIIW = vmMachParams["ident-init-weights"]; + if (!vvIIW.empty()) { + pMachLin->WeightsID(vvIIW.as()); + bWeigthsNotInit = false; + } + + // random initialization of the weights by function of fan-in + const boost::program_options::variable_value &vvFIIW = vmMachParams["fani-init-weights"]; + if (!vvFIIW.empty()) { + pMachLin->WeightsRandomFanI(vvFIIW.as()); + bWeigthsNotInit = false; + } + + // random initialization of the weights by function of fan-in and fan-out + const boost::program_options::variable_value &vvFIOIW = vmMachParams["fanio-init-weights"]; + if (!vvFIOIW.empty()) { + pMachLin->WeightsRandomFanIO(vvFIOIW.as()); + bWeigthsNotInit = false; + } + + // value for random initialization of the weights + const boost::program_options::variable_value &vvRIW = vmMachParams["random-init-weights"]; + bool bCurRandInitWeights = !vvRIW.empty(); + if (bCurRandInitWeights || bWeigthsNotInit) { // if no init-weights option is used, a general value is applied + pMachLin->WeightsRandom(bCurRandInitWeights ? vvRIW.as() : this->rInitWeights); + } + + // constant value for initialization of the bias + const boost::program_options::variable_value &vvCIB = vmMachParams["const-init-bias"]; + if (!vvCIB.empty()) { + pMachLin->BiasConst(vvCIB.as()); + bBiasNotInit = false; + } + + // value for random initialization of the bias + const boost::program_options::variable_value &vvRIB = vmMachParams["random-init-bias"]; + bool bCurRandInitBias = !vvRIB.empty(); + if (bCurRandInitBias || bBiasNotInit) { // if no init-bias option is used, a general value is applied + pMachLin->BiasRandom(bCurRandInitBias ? vvRIB.as() : this->rInitBias); + } + + // value for clipping weights + const boost::program_options::variable_value &vvCW = vmMachParams["clip-weights"]; + bool bCurClipWeights = !vvCW.empty(); + if (bCurClipWeights || bApplyGenVal) { // if the option is not used, the general value is applied + pMachLin->SetClipW(bCurClipWeights ? vvCW.as() : this->rClipWeights); + } + + // value for clipping gradients on weights + const boost::program_options::variable_value &vvCGW = vmMachParams["clip-gradients-weights"]; + bool bCurClipGradWeights = !vvCGW.empty(); + if (bCurClipGradWeights || bApplyGenVal) { // if the option is not used, the general value is applied + pMachLin->SetClipGradW(bCurClipGradWeights ? vvCGW.as() : this->rClipGradWeights); + } + + // value for clipping gradients on biases + const boost::program_options::variable_value &vvCGB = vmMachParams["clip-gradients-bias"]; + bool bCurClipGradBias = !vvCGB.empty(); + if (bCurClipGradBias || bApplyGenVal) { // if the option is not used, the general value is applied + pMachLin->SetClipGradB(bCurClipGradBias ? vvCGB.as() : this->rClipGradBias); + } + } +} + +/** + * applies parameters to given table lookup machine + * @note block size parameter is not applied here + * @param pMachTab pointer to table lookup machine object + * @param vmMachParams map of parameters + * @param bApplyGenVal true to apply general values to parameters as needed, default false otherwise + */ +void MachConfig::apply_machine_parameters(MachTab *pMachTab, const bpo::variables_map &vmMachParams, bool bApplyGenVal) const +{ + if (pMachTab != NULL) { + bool bTableNotInit = bApplyGenVal; + + // constant value for initialization of the projection layer + const boost::program_options::variable_value &vvCIP = vmMachParams["const-init-project"]; + if (!vvCIP.empty()) { + pMachTab->TableConst(vvCIP.as()); + bTableNotInit = false; + } + + // value for random initialization of the projection layer + const boost::program_options::variable_value &vvRIP = vmMachParams["random-init-project"]; + bool bCurRandInitProj = !vvRIP.empty(); + if (bCurRandInitProj || bTableNotInit) { // if no init-project option is used, a general value is applied + pMachTab->TableRandom(bCurRandInitProj ? vvRIP.as() : this->rInitProjLayer); + } + } +} diff --git a/MachConfig.h b/MachConfig.h new file mode 100644 index 0000000..f2b857c --- /dev/null +++ b/MachConfig.h @@ -0,0 +1,898 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +#ifndef _MachConfig_h +#define _MachConfig_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "MachMulti.h" +#include "MachLin.h" +#include "MachTab.h" + + + +template +class opt_sem; + +/** + * reads machine configuration from command line and configuration file + * @note a configuration file contains miscellaneous parameters and a group "[machine]" which describes machine structure + */ +class MachConfig +{ +public: + + /** + * creates a machine configuration reader + * @param bNeedConfFile true if configuration file is required on command line, false otherwise + * @param rInitBias general value for random initialization of the bias (0.1 by default) + */ + MachConfig (bool bNeedConfFile, REAL rInitBias = 0.1); + + /** + * selects a general option which can be used in command line + * @param sName long name of the option eventually followed by a comma and the letter used as shortcut ("long_name" or "long_name,s") + * @param bRequired true if the option value must occur, false otherwise + * @param sDescription explanation of the option, or default NULL to use default explanation + * @return reference to '*this' object + * @note if given option name is not found or if type T is not the same as option type, a new option will still be created + */ + template + inline MachConfig& sel_cmdline_option (const char *sName, bool bRequired, const char *sDescription = NULL) + { + return this->sel_cmdline_option(sName, bRequired, NULL, std::string(), sDescription); + } + + /** + * selects a general option which can be used in command line + * @param sName long name of the option eventually followed by a comma and the letter used as shortcut ("long_name" or "long_name,s") + * @param tDefaultValue default value which will be used if none is explicitly specified (the type 'T' should provide operator<< for std::ostream) + * @param sDescription explanation of the option, or default NULL to use default explanation + * @return reference to '*this' object + * @note if given option name is not found or if type T is not the same as option type, a new option will still be created + */ + template + inline MachConfig& sel_cmdline_option_def (const char *sName, const T &tDefaultValue, const char *sDescription = NULL) + { + std::ostringstream oss; + oss << tDefaultValue; + return this->sel_cmdline_option(sName, false, &tDefaultValue, oss.str(), sDescription); + } + + /** + * parses options from command line and configuration file + * @param iArgCount number of command line arguments + * @param sArgTable table of command line arguments + * @return false in case of error or help request, true otherwise + * @note error code is set if an error occurred + */ + bool parse_options (int iArgCount, char *sArgTable[]); + + /** + * verifies if user requests for help + * @return true if help is requested + */ + inline bool help_request () const { return this->bHelpRequest; } + + /** + * prints help message on standard output + */ + void print_help () const; + + /** + * checks if a parsing error occurred in command line or configuration file (general options) + * @return true if a parsing error occurred + */ + inline bool parsing_error () const { return ((this->eErrorCode == MachConfig::CmdLineParsingError) || (this->eErrorCode == MachConfig::ConfigParsingError)); } + + /** + * reads machine structure from configuration file + * @return new machine object, or NULL in case of error + * @note error code is set if an error occurred + */ + Mach *get_machine (); + + /** + * get last error + * @return error string + */ + std::string get_error_string() const; + + /** + * get file name of the machine (or void string if not set) + * @note if mach option value is "%CONF", file name will be same as configuration file (without extension ".conf") followed by extension ".mach" + */ + std::string get_mach () const; + + /** + * get word list of the source vocabulary (or void string if not set) + */ + inline std::string get_src_word_list () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["src-word-list"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get word list of the vocabulary and counts (or void string if not set) + */ + inline std::string get_tgt_word_list () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["tgt-word-list"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get word list of the vocabulary and counts (or void string if not set) + */ + inline std::string get_word_list () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["word-list"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get file name of the input n-best list (or void string if not set) + */ + inline std::string get_input_file () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["input-file"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get file name of the auxiliary data (or void string if not set) + */ + inline std::string get_aux_file () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["aux-file"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get file name of the output n-best list (or void string if not set) + */ + inline std::string get_output_file () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["output-file"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get file name of the file with source sentences (or void string if not set) + */ + inline std::string get_source_file () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["source-file"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get Moses on-disk phrase table (or void string if not set) + */ + inline std::string get_phrase_table () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["phrase-table"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get secondary Moses phrase table (or void string if not set) + */ + inline std::string get_phrase_table2 () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["phrase-table2"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get test data (or void string if not set) + */ + inline std::string get_test_data () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["test-data"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get training data (or void string if not set) + */ + inline std::string get_train_data () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["train-data"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get development data (or void string if not set) + */ + inline std::string get_dev_data () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["dev-data"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get file name of the machine when using short lists (or void string if not set) + */ + inline std::string get_lm () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["lm"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get file name of written log-probas (or void string if not set) + */ + inline std::string get_output_probas () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["output-probas"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get CSLM (or void string if not set) + */ + inline std::string get_cslm () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["cslm"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get word-list to be used with the CSLM (or void string if not set) + */ + inline std::string get_vocab () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["vocab"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get CSTM (or void string if not set) + */ + inline std::string get_cstm () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["cstm"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get source word-list to be used with the CSTM (or void string if not set) + */ + inline std::string get_vocab_source () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["vocab-source"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get target word-list to be used with the CSTM (or void string if not set) + */ + inline std::string get_vocab_target () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["vocab-target"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get coefficients of the feature functions (or void string if not set) + */ + inline std::string get_weights () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["weights"]; + return (vv.empty() ? std::string() : vv.as()); + } + + /** + * get specification of the TM scores to be used + */ + inline std::string get_tm_scores () const { return this->vmGeneralOptions["tm-scores"].as(); } + + /** + * get learning rate parameters + */ + inline std::string get_lrate () const { return this->vmGeneralOptions["lrate"].as(); } + + /** + * * get MachSeed : seed value for weights random init + */ + inline int get_MachSeed () const { return this->vmGeneralOptions["MachSeed"].as();} + + /** + * get number of hypothesis to read per n-best + */ + inline int get_inn () const { return this->vmGeneralOptions["inn"].as(); } + + /** + * get number of hypothesis to write per n-best + */ + inline int get_outn () const { return this->vmGeneralOptions["outn"].as(); } + + /** + * get offset to add to n-best ID + */ + inline int get_offs () const { return this->vmGeneralOptions["offs"].as(); } + + /** + * get the dimension of auxiliary data + */ + inline int get_aux_dim () const { return this->vmGeneralOptions["aux-dim"].as(); } + + /** + * get number of scores in phrase table + */ + inline int get_num_scores () const { return this->vmGeneralOptions["num-scores"].as(); } + + /** + * get input context size + */ + inline int get_ctxt_in () const { return this->vmGeneralOptions["ctxt-in"].as(); } + + /** + * get output context size + */ + inline int get_ctxt_out () const { return this->vmGeneralOptions["ctxt-out"].as(); } + + /** + * get current iteration when continuing training of a neural network + */ + inline int get_curr_iter () const { return this->vmGeneralOptions["curr-iter"].as(); } + + /** + * get last iteration of neural network + */ + inline int get_last_iter () const { return this->vmGeneralOptions["last-iter"].as(); } + + /** + * get order of the LM to apply on the test data + */ + inline int get_order () const { return this->vmGeneralOptions["order"].as(); } + + /** + * get mode of the data + */ + inline int get_mode () const { return this->vmGeneralOptions["mode"].as(); } + + /** + * get position of LM score + */ + inline int get_lm_pos () const { return this->vmGeneralOptions["lm-pos"].as(); } + + /** + * get position of the TM scores + */ + inline int get_tm_pos () const { return this->vmGeneralOptions["tm-pos"].as(); } + + /** + * get position of target words + */ + inline int get_tg_pos () const { return this->vmGeneralOptions["target-pos"].as(); } + + /** + * get buffer size + */ + inline int get_buf_size () const { return this->vmGeneralOptions["buf-size"].as(); } + + /** + * get block size for faster training + */ + inline int get_block_size () const { return this->iBlockSize; } + + /** + * get percentage of drop-out + */ + inline REAL get_drop_out () const { return this->rPercDropOut; } + + /** + * get value for random initialization of the projection layer + */ + inline REAL get_random_init_project () const { return this->rInitProjLayer; } + + /** + * get value for random initialization of the weights + */ + inline REAL get_random_init_weights () const { return this->rInitWeights; } + + /** + * get value for random initialization of the bias + */ + inline REAL get_random_init_bias () const { return this->rInitBias; } + + /** + * get value for clipping weights + */ + inline REAL get_clip_weights () const { return this->rClipWeights; } + + /** + * get value for clipping gradients on weights + */ + inline REAL get_clip_gradients_weights () const { return this->rClipGradWeights; } + + /** + * get value for clipping gradients on biases + */ + inline REAL get_clip_gradients_bias () const { return this->rClipGradBias; } + + /** + * get coefficient of weight decay + */ + inline REAL get_weight_decay () const { return this->vmGeneralOptions["weight-decay"].as(); } + + /** + * get state of inverse back-ward translation model use + */ + inline bool get_backward_tm () const { return (this->vmGeneralOptions.count("backward-tm") > 0); } + + /** + * get state of probabilities renormalization + */ + inline bool get_renormal () const { return (this->vmGeneralOptions.count("renormal") > 0); } + + /** + * get state of global scores recalculation + */ + inline bool get_recalc () const { return (this->vmGeneralOptions.count("recalc") > 0); } + + /** + * get state of n-best list sorting according to the global scores + */ + inline bool get_sort () const { return (this->vmGeneralOptions.count("sort") > 0); } + + /** + * get state of lexically different hypothesis reporting + */ + inline bool get_lexical () const { return (this->vmGeneralOptions.count("lexical") > 0); } + + /** + * get state of server mode listening + */ + inline bool get_server () const { return (this->vmGeneralOptions.count("server") > 0); } + + /** + * get state of stable sorting + */ + inline bool get_unstable_sort () const { return (this->vmGeneralOptions.count("unstable-sort") > 0); } + + /** + * get state of using word classes in the output layer + */ + inline bool get_use_word_class () const { return (this->vmGeneralOptions.count("use-word-class") > 0); } + + /** + * get state of using factors + */ + inline bool get_use_factors () const { return (this->vmGeneralOptions.count("use-factors") > 0); } + + /** + * get layer specification to dump activities when processing n-grams + */ + inline std::string get_layerfile () const + { + const boost::program_options::variable_value &vv = this->vmGeneralOptions["dump-activities"]; + return (vv.empty() ? std::string() : vv.as()); + } + +#ifdef BLAS_CUDA + /** + * get CUDA devices + * @returns list of indexes (eg ":0:2" for devices 0 and 2) or number of devices + */ + std::string get_cuda_devices () const; +#endif + + +private: + + /** + * error code type + */ + enum ErrorCode { + NoError = 0, + CmdLineParsingError, + ProbOpenConfigFile, + ConfigParsingError, + NoMachineGroup, + ProbSearchMachGroup, + MachDescrIncomplete, + ProbReadMachName, + UnknownMachType, + UnknownMachName, + UnknownMachCode, + MachWithoutEqualChar, + ProbReadMachParams, + MachParamsParsingError, + ProbOpenMachineFile, + ProbAllocMachine + }; + + bool bSelectedOptions; ///< some options are selected + bool bHelpRequest; ///< user requests for help + bool bNeedConfFile; ///< configuration file is required on command line + bool bReadMachOnly; ///< read machine structure without creating new object + int iBlockSize; ///< general block size for faster training + int iRepeat; ///< repeat sub-machines + REAL rPercDropOut; ///< general percentage of drop-out + REAL rInitProjLayer; ///< general value for random initialization of the projection layer + REAL rInitWeights; ///< general value for random initialization of the weights + REAL rInitBias; ///< general value for random initialization of the bias + REAL rClipWeights; ///< general value for clipping weights + REAL rClipGradWeights; ///< general value for clipping gradients on weights + REAL rClipGradBias; ///< general value for clipping gradients on biases + std::string sProgName; ///< program name + std::string sConfFile; ///< configuration file name + std::ifstream ifsConf; ///< configuration file stream + std::ostringstream ossErrorInfo; ///< error information for get_error_string method + MachConfig::ErrorCode eErrorCode; ///< error code + boost::program_options::options_description odCommandLine; ///< options for command line only + boost::program_options::options_description odGeneralConfig; ///< general options for configuration file + boost::program_options::options_description odSelectedConfig; ///< general options selected for command line + boost::program_options::options_description odMachineTypes; ///< available machine type names + boost::program_options::options_description odMachineConf; ///< options for a general machine + boost::program_options::options_description odMachMultiConf; ///< options for a multi machine + boost::program_options::options_description odMachLinConf; ///< options for a linear machine + boost::program_options::options_description odMachTabConf; ///< options for a table lookup machine + boost::program_options::positional_options_description podCommandLine; ///< options without name + boost::program_options::variables_map vmGeneralOptions; ///< map of general options + std::map mMachNameMap; ///< map of machine names + + std::map prSharedMachines; // to store Mach pointers for sharing using clone() function + + /** + * open configuration file + * @return false in case of error, true otherwise + */ + bool open_file (); + + /** + * reads next machine block from configuration file + * @param pNewMach set to new machine object pointer, or NULL if 'end' mark is read (and possibly in case of error) + * @param iBlockSize block size for faster training + * @return true if 'end' mark is read, false otherwise + * @note error code is set if an error occurred + */ + bool read_next_machine (Mach *&pNewMach, int iBlockSize); + + /** + * creates a multiple machine, reads his parameters and reads submachine blocks + * @param iMachType type of multiple machine + * @param iBlockSize block size for faster training + * @return new machine object (may be NULL in case of error) + * @note error code is set if an error occurred + */ + Mach *read_multi_machine (int iMachType, int iBlockSize); + + /** + * creates a simple machine and reads his parameters + * @param iMachType type of simple machine + * @param iBlockSize block size for faster training + * @param bMachLin true if the machine is a linear machine, default false otherwise + * @param bMachTab true if the machine is a table lookup machine, default false otherwise + * @return new machine object (may be NULL in case of error) + * @note error code is set if an error occurred + */ + Mach *read_simple_machine (int iMachType, int iBlockSize, bool bMachLin = false, bool bMachTab = false); + + /** + * reads machine parameters and fills it in given map + * @param odMachineConf available options for the machine + * @param vmMachParams map filled with parameters read + * @return false in case of error, true otherwise + */ + bool read_machine_parameters (const boost::program_options::options_description &odMachineConf, boost::program_options::variables_map &vmMachParams); + + /** + * parses machine parameters + * @param vsTokens vector of tokens + * @return vector of options + * @throw boost::program_options::error object in case of error + */ + static std::vector parse_mach_params (const std::vector &vsTokens); + + /** + * creates a machine by reading his data from file + * @param sFileName machine file name + * @param iBlockSize block size for faster training + * @param vmMachParams map of parameters read + * @return new machine object or NULL in case of error + * @note error code is set if an error occurred + */ + Mach *read_machine_from_file(const std::string &sFileName, int iBlockSize, const boost::program_options::variables_map &vmMachParams); + + /** + * applies parameters to given linear machine + * @note block size parameter is not applied here + * @param pMachLin pointer to linear machine object + * @param vmMachParams map of parameters + * @param bApplyGenVal true to apply general values to parameters as needed, default false otherwise + */ + void apply_machine_parameters(MachLin *pMachLin, const boost::program_options::variables_map &vmMachParams, bool bApplyGenVal = false) const; + + /** + * applies parameters to given table lookup machine + * @note block size parameter is not applied here + * @param pMachTab pointer to table lookup machine object + * @param vmMachParams map of parameters + * @param bApplyGenVal true to apply general values to parameters as needed, default false otherwise + */ + void apply_machine_parameters(MachTab *pMachTab, const boost::program_options::variables_map &vmMachParams, bool bApplyGenVal = false) const; + + /** + * selects a general option which can be used in command line + * @param sName long name of the option eventually followed by a comma and the letter used as shortcut ("long_name" or "long_name,s") + * @param bRequired true if the option value must occur, false otherwise + * @param ptDefaultValue pointer to default value which will be used if none is explicitly specified, or NULL if there is no default value + * @param sTextualValue textual representation of default value + * @param sDescription explanation of the option, or default NULL to use default explanation + * @return reference to '*this' object + * @note if given option name is not found or if type T is not the same as option type, a new option will still be created + */ + template + MachConfig& sel_cmdline_option (const char *sName, bool bRequired, const T *ptDefaultValue, const std::string &sTextualValue, const char *sDescription = NULL) + { + if (sName != NULL) { + boost::program_options::typed_value *ptvtNewSemantic = NULL; + const char *sNewDescription = ""; + + // search for comma in option name + const char * sShortPart = sName; + while(((*sShortPart) != ',') && ((*sShortPart) != '\0')) sShortPart++; + + // get option information + const boost::program_options::option_description *podOption = this->odGeneralConfig.find_nothrow(std::string(sName, sShortPart - sName), false); + if (podOption != NULL) { + // get copy of semantic + const opt_sem *postSemantic = dynamic_cast*>(podOption->semantic().get()); + if (postSemantic != NULL) + ptvtNewSemantic = postSemantic->parent_copy(); + + // get description + if (sDescription == NULL) + sNewDescription = podOption->description().c_str(); + } + + // create new semantic if none were found + if (ptvtNewSemantic == NULL) + ptvtNewSemantic = boost::program_options::value(); + + // modify semantic + if (ptvtNewSemantic != NULL) { + if (ptDefaultValue != NULL) + ptvtNewSemantic->default_value(*ptDefaultValue, sTextualValue); + if (bRequired) + ptvtNewSemantic->required(); + } + + // add new option to command line options + this->odSelectedConfig.add_options() ( sName, ptvtNewSemantic, + (sDescription != NULL) ? sDescription : sNewDescription ); + this->bSelectedOptions = true; + } + return *this; + } +}; + + +/** + * handles semantic of a specific option type + * (give copy function to boost::program_options::typed_value class) + * @see boost::program_options::typed_value + */ +template +class opt_sem : public boost::program_options::typed_value +{ +public: + /** + * creates new option semantic + * @see boost::program_options::value(T*) + * @param ptStoreTo pointer to value which will contain the value when it's known (default NULL) + * @return pointer to new object (to be destroyed) + */ + static inline opt_sem *new_sem(T* ptStoreTo = NULL) + { + return new opt_sem(ptStoreTo); + } + + /** + * constructs option semantic + * @see boost::program_options::typed_value(T*) + * @param ptStoreTo pointer to value which will contain the value when it's known (can be NULL) + */ + opt_sem(T *ptStoreTo) : + boost::program_options::typed_value(ptStoreTo), ptStoreTo(ptStoreTo), + bDefaultValue(false), bImplicitValue(false), bNotifier(false), + bComposing(false), bMultitoken(false), bZeroTokens(false), bRequired(false) + {} + + /** + * specifies default value, which will be used if none is explicitly specified + * @see boost::program_options::typed_value::default_value(const T&) + * @param tValue default value (the type 'T' should provide operator<< for std::ostream) + * @return pointer to this object + */ + opt_sem *default_value(const T &tValue) + { + this->tDefaultValue = tValue; + this->bDefaultValue = true; + std::ostringstream oss; + oss << tValue; + this->sDefaultValueText = oss.str(); + boost::program_options::typed_value::default_value(tValue, this->sDefaultValueText); + return this; + } + + /** + * specifies default value, which will be used if none is explicitly specified + * @see boost::program_options::typed_value::default_value(const T&,const std::string&) + * @param tValue default value + * @param sTextual textual representation of default value + * @return pointer to this object + */ + opt_sem *default_value(const T &tValue, const std::string &sTextual) + { + this->tDefaultValue = tValue; + this->bDefaultValue = true; + this->sDefaultValueText = sTextual; + boost::program_options::typed_value::default_value(tValue, sTextual); + return this; + } + + /** + * specifies an implicit value, which will be used if the option is given, but without an adjacent value + * @see boost::program_options::typed_value::implicit_value(const T&) + * @param tValue implicit value (the type 'T' should provide operator<< for std::ostream) + * @return pointer to this object + */ + opt_sem *implicit_value(const T &tValue) + { + this->tImplicitValue = tValue; + this->bImplicitValue = true; + std::ostringstream oss; + oss << tValue; + this->sImplicitValueText = oss.str(); + boost::program_options::typed_value::implicit_value(tValue, this->sImplicitValueText); + return this; + } + + /** + * specifies an implicit value, which will be used if the option is given, but without an adjacent value + * @see boost::program_options::typed_value::implicit_value(const T&,const std::string&) + * @param tValue implicit value + * @param sTextual textual representation of implicit value + * @return pointer to this object + */ + opt_sem *implicit_value(const T &tValue, const std::string &sTextual) + { + this->tImplicitValue = tValue; + this->bImplicitValue = true; + this->sImplicitValueText = sTextual; + boost::program_options::typed_value::implicit_value(tValue, sTextual); + return this; + } + + /** + * specifies a function to be called when the final value is determined + * @see boost::program_options::typed_value::notifier(boost::function1) + * @param f1vt function called + * @return pointer to this object + */ + opt_sem *notifier(boost::function1 f1vt) + { + this->f1vtNotifier = f1vt; + this->bNotifier = true; + boost::program_options::typed_value::notifier(f1vt); + return this; + } + + /** + * specifies that the value is composing + * @see boost::program_options::typed_value::composing() + * @return pointer to this object + */ + opt_sem *composing() + { + this->bComposing = true; + boost::program_options::typed_value::composing(); + return this; + } + + /** + * specifies that the value can span multiple tokens + * @see boost::program_options::typed_value::multitoken() + * @return pointer to this object + */ + opt_sem *multitoken() + { + this->bMultitoken = true; + boost::program_options::typed_value::multitoken(); + return this; + } + + /** + * specifies that no tokens may be provided as the value of this option + * @see boost::program_options::typed_value::zero_tokens() + * @return pointer to this object + */ + opt_sem *zero_tokens() + { + this->bZeroTokens = true; + boost::program_options::typed_value::zero_tokens(); + return this; + } + + /** + * specifies that the value must occur + * @see boost::program_options::typed_value::required() + * @return pointer to this object + */ + opt_sem *required() + { + this->bRequired = true; + boost::program_options::typed_value::required(); + return this; + } + + /** + * copies option semantic + * @return pointer to new parent object (to be destroyed) + * @see boost::program_options::value(T*) + */ + boost::program_options::typed_value *parent_copy() const + { + boost::program_options::typed_value *ptvt = boost::program_options::value(this->ptStoreTo); + if (this->bDefaultValue) + ptvt->default_value(this->tDefaultValue, this->sDefaultValueText); + if (this->bImplicitValue) + ptvt->implicit_value(this->tImplicitValue, this->sImplicitValueText); + if (this->bNotifier) + ptvt->notifier(this->f1vtNotifier); + if (this->bComposing) + ptvt->composing(); + if (this->bMultitoken) + ptvt->multitoken(); + if (this->bZeroTokens) + ptvt->zero_tokens(); + if (this->bRequired) + ptvt->required(); + return ptvt; + } + +private: + T* ptStoreTo; + T tDefaultValue, tImplicitValue; + std::string sDefaultValueText, sImplicitValueText; + bool bDefaultValue, bImplicitValue, bNotifier; + bool bComposing, bMultitoken, bZeroTokens, bRequired; + boost::function1 f1vtNotifier; +}; + +#endif diff --git a/MachCopy.cpp b/MachCopy.cpp new file mode 100644 index 0000000..54c3675 --- /dev/null +++ b/MachCopy.cpp @@ -0,0 +1,129 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +using namespace std; +#include + +#include "Tools.h" +#include "MachCopy.h" +#ifdef CUDA +# include "Gpu.cuh" +#endif + +MachCopy::MachCopy(const int p_idim, const int p_odim, const int p_bsize, const ulong p_nbfw, const ulong p_nbbw) + : Mach(p_idim, p_odim, p_bsize, p_nbfw, p_nbbw) +{ +#ifdef BLAS_CUDA +#else + + if (odim != idim) { + Error ("The input size should be equal the output size for copy machine"); + } +#endif +} + +MachCopy::MachCopy(const MachCopy &m) + : Mach(m) +{ +} + +/******************************************* + * + ********************************************/ + +void MachCopy::Info(bool detailed, char *txt) +{ + if (detailed) { + cout << "Information on copy machine" << endl; + Mach::Info(detailed,txt); + } + else { + printf("%sMachCopy %d-%d, bs=%d, passes=%lu/%lu", txt, idim, odim, bsize, nb_forw, nb_backw); +#ifdef BLAS_CUDA + printf(", on GPU %d", Gpu::GetCudaDevice(Gpu::GetDevice(gpu_conf))); +#endif + tm.disp(", "); + tm.newline(); + +#ifdef BLAS_CUDA +#else +#endif + } +} + +//----------------------------------------------- +// File input +//----------------------------------------------- + + +void MachCopy::ReadData(istream &inpf, size_t s, int bs) +{ + if (0 != s) + ErrorN("data block of copy machine has %zu elements (0 were expected)", s); + Mach::ReadData(inpf, 0, bs); +} + + +//----------------------------------------------- +// Training +//----------------------------------------------- + +void MachCopy::Forw(int eff_bsize, bool in_train) +{ + + tm.start(); + + if (!data_in) + Error("MachCopy::Forw(): input data is not set"); + if (eff_bsize<=0) eff_bsize=bsize; + + debugMachInp("MachCopy",data_in,idim,odim,eff_bsize); + +#ifdef BLAS_CUDA + Gpu::MemcpyAsync(data_out, data_in, eff_bsize * odim * sizeof(REAL), cudaMemcpyDeviceToDevice); +#else + memcpy(data_out, data_in, eff_bsize * odim * sizeof(REAL)); +#endif + nb_forw += eff_bsize; + + tm.stop(); + debugMachOutp("MachCopy",data_out,idim,odim,eff_bsize); +} + + +void MachCopy::Backw(const float lrate, const float wdecay, int eff_bsize) +{ + if (eff_bsize<=0) eff_bsize=bsize; + if (!grad_out) + Error("MachCopy::Backw(): output gradient is not set"); + + debugMachOutp("MachCopy Grad",grad_out,idim,odim,eff_bsize); + tm.start(); +#ifdef BLAS_CUDA + Gpu::MemcpyAsync(grad_in, grad_out, eff_bsize * odim * sizeof(REAL), cudaMemcpyDeviceToDevice); +#else + memcpy(grad_in, grad_out, eff_bsize * odim * sizeof(REAL)); +#endif + + nb_backw += eff_bsize; + + tm.stop(); + debugMachInp("MachCopy Grad",grad_in,idim,odim,eff_bsize); +} diff --git a/MachCopy.h b/MachCopy.h new file mode 100644 index 0000000..2622c65 --- /dev/null +++ b/MachCopy.h @@ -0,0 +1,46 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * copy machine: output = input + */ + +#ifndef _MachCopy_h +#define _MachCopy_h + +#include "Mach.h" + +class MachCopy : public Mach +{ +protected: + virtual void ReadData(istream&, size_t, int=0); // read binary data + MachCopy(const MachCopy &); // create a copy of the machine, sharing the parameters +public: + MachCopy(const int=0, const int=0, const int=128, const ulong=0, const ulong=0); + virtual ~MachCopy() {} + virtual MachCopy *Clone() {return new MachCopy(*this);} // create a copy of the machine, sharing the parameters + virtual int GetMType() {return file_header_mtype_copy;}; // get type of machine + virtual void Info(bool=false, char *txt=(char*)""); // display (detailed) information on machine + virtual void Forw(int=0, bool=false); // calculate outputs for current inputs + // backprop gradients from output to input and update all weights + virtual void Backw (const float lrate, const float wdecay, int=0); +}; + +#endif diff --git a/MachJoin.cpp b/MachJoin.cpp new file mode 100644 index 0000000..3490b44 --- /dev/null +++ b/MachJoin.cpp @@ -0,0 +1,530 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include + +#include "Tools.h" +#include "MachJoin.h" + +#ifdef BLAS_CUDA +#include "Gpu.cuh" +#endif + +/* + * we allocate a global input gradient but it is not used + */ +void MachJoin::do_alloc(bool alloc_data_out) +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + if (alloc_data_out) { + if (data_out) cublasFree(data_out); + data_out = Gpu::Alloc(odim*bsize, "output data of join machine"); + } + if (grad_in) cublasFree(grad_in); + grad_in = Gpu::Alloc(idim*bsize, "input gradient of join machine"); + + if (NULL == gpu_dev_data_out) + gpu_dev_data_out = Gpu::Alloc(odim*bsize*sizeof(REAL), "MachJoin::Forw tmp for AXPY"); + + // If more than 1 device is used, allocate (on the main device) a buffer + // large enough to contain one input minibatch for any of the sub-machines, + // before it is copied to the sub-machine's device. + if (sub_input_tmp) + cudaFree(sub_input_tmp); + if (Gpu::GetDeviceCount() > 1) { + Gpu::SetConfig(gpu_conf); + // use the max of machine's idim, so it can be used for any of the machines + int max_idim = 0; + for (uint m=0; mGetIdim(); + if (m_idim > max_idim) { + max_idim = m_idim; + } + } + Gpu::CheckError("before alloc sub_input_tmp"); + sub_input_tmp = Gpu::Alloc(max_idim*bsize, "tmp buffer for input data"); + } + else { + sub_input_tmp = NULL; + } + +#else + if (alloc_data_out) { + if (data_out) delete [] data_out; + data_out = (odim*bsize>0) ? new REAL[odim*bsize] : NULL; + // Allocate a buffer that will contain the output gradient passed to + // each sub-machine. This is needed because the sub-machine's call + // to Backw() can destroy the content of their grad_out buffer, + // so we have to pass a copy. + grad_out_copy = (odim*bsize>0) ? new REAL[odim*bsize] : NULL; + } + if (grad_in) delete [] grad_in; + grad_in = (idim*bsize>0) ? new REAL[idim*bsize] : NULL; +#endif +} + +void MachJoin::do_delete() +{ +#ifdef BLAS_CUDA + if (grad_in) cublasFree(grad_in); + + // free local copies of grad_out + for (vector::iterator it = machs.begin(); it!=machs.end(); ++it) { + cudaFree((*it)->GetDataIn()); + (*it)->SetDataIn(NULL); + cudaFree((*it)->GetGradOut()); + (*it)->SetGradOut(NULL); + } + + // free local data_out buffer + if (NULL != gpu_dev_data_out) { + cudaFree(gpu_dev_data_out); + gpu_dev_data_out = NULL; + } + data_out = NULL; + if (sub_input_tmp) { + cudaFree(sub_input_tmp); + sub_input_tmp = NULL; + } +#else + if (grad_in) delete [] grad_in; + + // free grad_out_copy + if (grad_out_copy) + { + delete [] grad_out_copy; + grad_out_copy = NULL; + } +#endif + grad_in = NULL; +} + + +MachJoin::MachJoin() + : MachMulti() +{ +#ifdef BLAS_CUDA + gpu_dev_data_out = NULL; + sub_input_tmp = NULL; +#endif +} + +MachJoin::MachJoin(const MachJoin &m) + : MachMulti(m) +{ +#ifdef BLAS_CUDA + gpu_dev_data_out = NULL; + sub_input_tmp = NULL; +#endif +} + +MachJoin::~MachJoin() +{ + do_delete(); +} + +MachJoin *MachJoin::Clone() +{ + MachJoin *m = new MachJoin(*this); + if (m != NULL) + m->CloneSubmachs(*this); + return m; +} + +void MachJoin::MachAdd(Mach *new_mach) +{ + if (machs.empty()) { + machs.push_back(new_mach); + // think about freeing memory + idim=new_mach->GetIdim(); + odim=new_mach->GetOdim(); + bsize=new_mach->GetBsize(); + data_in=NULL; // will be set by MachJoin::SetDataIn() + grad_in = NULL; + grad_out = NULL; + } + else { + if (bsize!=new_mach->GetBsize()) + Error("bunch size of new join machine does not match"); + if (odim!=new_mach->GetOdim()) + Error("output dimension of new join machine does not match"); + machs.push_back(new_mach); + + // resize input gradient + idim += new_mach->GetIdim(); + } + do_alloc(machs.size() == 1); +#ifdef BLAS_CUDA + Gpu::SetConfig(new_mach->GetGpuConfig()); + // Always allocate input buffer, as data_in does not have the right layout + new_mach->SetDataIn(Gpu::Alloc(new_mach->GetIdim()*bsize, "input data of joined submachine")); + // Always allocate buffer for a local copy of grad_out, as it may be + // overwritten when calling Back(). We need one such copy for each machine + // on the GPU, so we can do all copies at the beginning of Back(), and avoid + // forcing synchronization. + new_mach->SetGradOut(Gpu::Alloc(odim*bsize, "copy of grad_out for a submachine of MachJoin")); + Gpu::SetConfig(gpu_conf); +#else + new_mach->SetDataIn(new REAL[new_mach->GetIdim()*bsize]); + new_mach->SetGradOut(NULL); // will be set before first Backw() +#endif + + activ_forw.push_back(true); + activ_backw.push_back(true); +} + +Mach *MachJoin::MachDel() +{ + if (machs.empty()) { + Error("impossible to delete element from join machine: is already empty"); + } + + Mach *del_mach=machs.back(); + machs.pop_back(); + + if (machs.empty()) { + idim=odim=bsize=0; + do_delete(); + data_in = NULL; + } + else { + // resize input + idim -= del_mach->GetIdim(); + } + + activ_forw.pop_back(); + activ_backw.pop_back(); + + // free local data_in buffer of submachine + REAL* loc_data_in = del_mach->GetDataIn(); + if (NULL != loc_data_in) { +#ifdef BLAS_CUDA + cudaFree(loc_data_in); +#else + delete [] loc_data_in; +#endif + } + +#ifdef BLAS_CUDA + // free local copy of grad_out + cudaFree(del_mach->GetGradOut()); +#endif + + del_mach->SetDataIn(NULL); + del_mach->SetGradOut(NULL); + return del_mach; +} + +// set pointer of input data +void MachJoin::SetDataIn(REAL *data) +{ + // Simply set the pointer. The data will be copied in Forw(). + data_in=data; +} + +// set pointer of output gradient +void MachJoin::SetGradOut(REAL *data) +{ + grad_out=data; + + // Do not make the sub-machines' grad_out point to this->grad_out, + // as calling their Backw() method can overwrite the content of + // their grad_out. Instead, we will use: + // - grad_out_copy if the submachine is on CPU + // - pre-allocated memory already in the submachine's grad_out if on GPU +#ifdef BLAS_CUDA + // Everything is already allocated. +#else + for (unsigned int m=0; mSetGradOut(grad_out_copy); +#endif +} + + +//----------------------------------------------- +// File output +//----------------------------------------------- + +void MachJoin::ReadData(istream &inpf, size_t s, int bs) +{ +#ifdef BLAS_CUDA + if (s!=machs.size()) + ErrorN("data block of join machine has %zu machines (%zu were expected)", s, machs.size()); + + idim=0; + for (vector::iterator it = machs.begin(); it!=machs.end(); ++it) { + Gpu::NewConfig(); + (*it) = Mach::Read(inpf, bs); + idim += (*it)->GetIdim(); + } +#else + MachMulti::ReadData(inpf, s, bs); + + // get dimensions + idim=0; + for (uint m=0; mGetIdim(); +#endif + odim = machs[0]->GetOdim(); + bsize = machs[0]->GetBsize(); + + // allocate memory + do_delete(); + do_alloc(true); + + for (uint m=0; mGetGpuConfig()); + machs[m]->SetDataIn(Gpu::Alloc(machs[m]->GetIdim()*bsize, "input data of joined submachine")); + machs[m]->SetGradOut(Gpu::Alloc(odim*bsize, "copy of grad_out for a submachine of MachJoin")); +#else + machs[m]->SetDataIn(new REAL[machs[m]->GetIdim()*bsize]); + machs[m]->SetGradOut(NULL); // will be set in MachJoin::SetGradOut() +#endif + } +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); +#endif +} + +// +// Tools +// + +void MachJoin::Info(bool detailed, char *txt) +{ + if (detailed) { + cout << "Information on join machine" << endl; + MachMulti::Info(detailed); + } + else { + printf("%sJoin machine %d-%d, bs=%d, passes=%lu/%lu", txt, idim, odim, bsize, nb_forw, nb_backw); + tm.disp(", "); + printf("\n"); + char ntxt[512]; + sprintf(ntxt,"%s ", txt); + for (unsigned int i=0; iInfo(detailed, ntxt); + } + printf("%stotal number of parameters: %lu (%d MBytes)\n", txt, GetNbParams(), (int) (GetNbParams()*sizeof(REAL)/1048576)); +} + +// forward pass for all machines and average output into cumulated output +void MachJoin::Forw(int eff_bsize, bool in_train) +{ + if (machs.empty()) + Error("called Forw() for an empty join machine"); + + debugMachInp("MachJoin",data_in,idim,odim,eff_bsize); + + tm.start(); + if (eff_bsize<=0) eff_bsize=bsize; + int nb_activ=0; + + // The memory layout of data_in is NOT suited for the individual machines, + // as they need one contiguous block of memory, without strides between + // the rows. + // Mem layout of "data_in": + // part1, part2, ..., partN, # 1st example + // part1, part2, ..., partN, # 2nd example + // ..., + // part1, part2, ..., partN # eff_bsize-th example + // + // where "partI" is a vector representing the part of an example + // that goes into machine I. + // + // Mem layout needed by the first sub-machine: + // part1, # 1st example + // part1, # 2nd example + // ..., + // part1 # eff_bsize-th example + // + // So we need to copy the data into the input memory buffer of the N + // sub-machines, which is contiguous and already allocated (see MachAdd). + REAL *iptr=data_in; + +#ifdef BLAS_CUDA + Gpu::StreamSynchronize(); +#endif + for (unsigned int m=0; mGetIdim(); + if (activ_forw[m]) { +#ifdef BLAS_CUDA + // Use Gpu::Memcpy2DAsync, which does strided copies in just one call + Gpu::SetConfig(machs[m]->GetGpuConfig()); + Gpu::Memcpy2DAsync(machs[m]->GetDataIn(), m_idim*sizeof(REAL), + iptr, idim*sizeof(REAL), + m_idim*sizeof(REAL), eff_bsize, + cudaMemcpyDeviceToDevice); +#else + // On CPU, calling memcpy in a loop is fast enough + for (int i=0; iGetDataIn() + i*m_idim, + iptr + i*idim, m_idim*sizeof(REAL)); + } +#endif + } + iptr += m_idim; + } + + REAL normf = 1.0f; + int size = odim*eff_bsize; + int inc1 = 1; +#ifdef BLAS_CUDA + // Forward all machines + for (unsigned int m=0; mForw(eff_bsize,in_train); + Gpu::CheckError("MachJoin::Forw after sub-mach->Forw()"); + } + else { + } + } + // Transfer everything to master GPU and accumulate in data_out + // We will use gpu_dev_data_out for buffer. + size_t cur_dev = Gpu::GetDevice(gpu_conf); + REAL* buf_out; + bool first_act = true; + for (unsigned int m=0; mGetGpuConfig(); + Gpu::SetConfig(mach_conf); + if (Gpu::GetDevice(mach_conf) == cur_dev) + buf_out = machs[m]->GetDataOut(); + else { + buf_out = gpu_dev_data_out; + Gpu::MemcpyAsync(gpu_dev_data_out, machs[m]->GetDataOut(), + size*sizeof(REAL), cudaMemcpyDeviceToDevice); + Gpu::StreamSynchronize(); + Gpu::SetConfig(gpu_conf); + } + if (first_act) { + first_act = false; + Gpu::MemcpyAsync(data_out, buf_out, size*sizeof(REAL), cudaMemcpyDeviceToDevice); + } + else { + AXPY(size, normf, buf_out, inc1, + data_out, inc1); + Gpu::CheckError("MachJoin::Forw after accumulation AXPY"); + } + Gpu::StreamSynchronize(); + } + } + Gpu::SetConfig(gpu_conf); +#else + for (unsigned int m=1; mForw(eff_bsize,in_train); + AXPY(&size, &normf, machs[m]->GetDataOut(), &inc1, data_out, &inc1); + } + else { + } + } +#endif + + // normalize by number of active machines + // TODO: make that an option + if (nb_activ>0) { + REAL normf = 1.0 / (REAL) nb_activ; +#ifdef BLAS_CUDA + SCAL(size, normf, data_out, inc1); +#else + SCAL(&size, &normf, data_out, &inc1); +#endif + } + + nb_forw += eff_bsize; + + tm.stop(); + debugMachOutp("MachJoin",data_out,idim,odim,eff_bsize); +} + + + // backward pass for all machines + // everything is already chained correctly + // WARNING: the gradient wrt the input (grad_in) is NOT forwarded to the + // layer below. This only works if MachJoin is the FIRST layer, directly + // above the input. +void MachJoin::Backw(const float lrate, const float wdecay, int eff_bsize) +{ + if (machs.empty()) + Error("called Backw() for an empty join machine"); + if (eff_bsize<=0) eff_bsize=bsize; + + tm.start(); + + +#ifdef BLAS_CUDA + // copy grad_out to each submachine's local buffer first + Gpu::StreamSynchronize(); + for (unsigned int m=0; mGetGpuConfig()); + Gpu::MemcpyAsync(machs[m]->GetGradOut(), grad_out, odim*eff_bsize*sizeof(REAL), + cudaMemcpyDeviceToDevice); + } + } +#endif + + for (unsigned int m=0; mGetGradOut() before copy", machs[m]->GetGradOut(), idim, odim, eff_bsize); + // copy the current output gradient to machs[m]->GetGradOut(), + // so that each sub-machine can work on a brand new copy of grad_out, + // without risking overwriting it. + // For CPU Machines grad_out_copy will be used, and re-initialized here. + // For GPU machines, all copies were done in advance in the loop above. + memcpy(machs[m]->GetGradOut(), grad_out, odim*eff_bsize*sizeof(REAL)); + + debugMachOutp("MachJoin::Backw: machs[m]->GetGradOut() after copy", machs[m]->GetGradOut(), idim, odim, eff_bsize); +#endif + + machs[m]->Backw(lrate,wdecay,eff_bsize); + +#ifndef BLAS_CUDA + debugMachOutp("MachJoin::Backw: machs[m]->GetGradOut() after machs[m]->Backw()", machs[m]->GetGradOut(), idim, odim, eff_bsize); +#endif + } + else { + } + } +#ifdef BLAS_CUDA + // synchronize to all streams + for (unsigned int m=0; mGetGpuConfig()); + Gpu::StreamSynchronize(); + } + } + Gpu::SetConfig(gpu_conf); // reset to master GPU +#endif + nb_backw += eff_bsize; + tm.stop(); +} + diff --git a/MachJoin.h b/MachJoin.h new file mode 100644 index 0000000..dde8248 --- /dev/null +++ b/MachJoin.h @@ -0,0 +1,66 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + * + * Join machine: + * - combines several machines into one layer + * - the output dimensions must be identical, the input dimensions may differ + */ + +#ifndef _MachJoin_h +#define _MachJoin_h + +using namespace std; +#include + +#include "MachMulti.h" + +class MachJoin : public MachMulti +{ +private: +#ifdef BLAS_CUDA + REAL* gpu_dev_data_out; // local copy of output buffer + REAL* sub_input_tmp; // Temporarily hold the input of a sub-machine before transfer to the right device +#else + REAL* grad_out_copy; // copy of output gradients, that is passed to the sub-machines' Backw() +#endif + void do_alloc(bool); // perform allocation of dynamic data structures + void do_delete(); // delete data structures +protected: + virtual void ReadData(istream&, size_t, int=0); // read binary data + MachJoin(const MachJoin &); // create a copy of the machine (without submachines) +public: + MachJoin(); // create initial sequence with no machine + virtual ~MachJoin(); + virtual MachJoin *Clone(); // create a copy of the machine and all submachines + virtual int GetMType() {return file_header_mtype_mjoin;}; // get type of machine + // redfine connecting functions + virtual void SetDataIn(REAL*); // set pointer of input data + virtual void SetGradOut(REAL*); // set pointer of output gradient + // add and remove machines + virtual void MachAdd(Mach*); // add new machine after the existing ones + virtual Mach *MachDel(); + // standard functions + virtual void Info(bool=false, char *txt=(char*)""); // display (detailed) information on machine + virtual void Forw(int=0, bool=false); // calculate outputs for current inputs + virtual void Backw(const float lrate, const float wdecay, int=0); // calculate gradients at input for current gradients at output +}; + +#endif diff --git a/MachLin.cpp b/MachLin.cpp new file mode 100644 index 0000000..d8e6883 --- /dev/null +++ b/MachLin.cpp @@ -0,0 +1,649 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include + +#include "Tools.h" +#include "MachLin.h" +#include "Blas.h" +#ifdef CUDA +# include "Gpu.cuh" +#endif + +void MachLin::do_alloc() +{ + if(!bExternal){ +#ifdef BLAS_CUDA + b = Gpu::Alloc(odim, "bias of linear machine"); + w = Gpu::Alloc(idim*odim, "weights of linear machine"); +#else + if (odim>0) { + b = new REAL[odim]; + if (!b) Error ("can't allocate memory for bias of linear machine"); + } + else b=NULL; + if (idim*odim>0) { + w = new REAL[idim*odim]; + if (!w) Error ("can't allocate memory for weights of linear machine"); + } + else w=NULL; +#endif + } +} + +MachLin::MachLin(const int p_idim, const int p_odim, const int p_bsize, const ulong p_nbfw, const ulong p_nbbw, const int shareid, const bool xdata) + : Mach(p_idim, p_odim, p_bsize, p_nbfw, p_nbbw), Shareable(xdata, shareid), bw_shared(NULL), bw_mutex(NULL) +{ +#ifdef BLAS_CUDA +#endif + do_alloc(); + // initialize clipping + clip_w = clip_gradw = clip_gradb = 0; + + // biases and weights sharing + bw_mutex = new pthread_mutex_t; + if (bw_mutex != NULL) { + pthread_mutex_init(bw_mutex, NULL); + int *new_bw_shared = new int; + if (new_bw_shared != NULL) { + (*new_bw_shared) = 0; + bw_shared = new_bw_shared; + } + } +} + +MachLin::MachLin(const MachLin &m) + : Mach(m), Shareable(true, -1), b(NULL), w(NULL), bw_shared(NULL), bw_mutex(NULL) +{ + iShareId = m.iShareId; + int inc_bw_shared = 0; + if (m.bw_mutex != NULL) { + pthread_mutex_lock(m.bw_mutex); + inc_bw_shared = ((m.bw_shared != NULL) ? (*m.bw_shared) + 1 : 0); + if (inc_bw_shared > 0) { + (*m.bw_shared) = inc_bw_shared; + + // share the weights and biases + b = m.b; + w = m.w; + bw_shared = m.bw_shared; + bw_mutex = m.bw_mutex; + } + pthread_mutex_unlock(m.bw_mutex); + } + if (inc_bw_shared <= 0) + Error ("can't share memory for bias and weights of linear machine"); +} + +/******************************************* + * + ********************************************/ + +MachLin::~MachLin() +{ + +#ifdef BLAS_CUDA +#else +#if 0 + printf("W:\n"); + for (int od=0;od 0) { + (*bw_shared)--; + pthread_mutex_unlock(bw_mutex); + return; + } + else { + delete bw_shared; + bw_shared = NULL; + } + } + } + +#ifdef BLAS_CUDA + if (b) cublasFree(b); + if (w) cublasFree(w); +#else + if (b) delete [] b; + if (w) delete [] w; +#endif + b = w = NULL; + + // destroy mutex + if (bw_mutex != NULL) { + pthread_mutex_t *old_bw_mutex = bw_mutex; + bw_mutex = NULL; + pthread_mutex_unlock(old_bw_mutex); + pthread_mutex_destroy(old_bw_mutex); + delete old_bw_mutex; + } +} + +void MachLin::BiasConst(const REAL val) +{ +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + nppsSet_32f(val, b, odim); +#else + for (int i=0; iodim) + for (int x=0; xodim) + for (int x=0; x= file_header_version4) + printf("%sMachLin %c%c[%d]-%d, bs=%d, passes=%lu/%lu", txt, bExternal?'s':'p', iShareId!=-1?iShareId+'0':'-', idim, odim, bsize, nb_forw, nb_backw); + else + printf("%sMachLin %d-%d, bs=%d, passes=%lu/%lu", txt, idim, odim, bsize, nb_forw, nb_backw); + + if (lr_coeff != 1.0) printf(", lrate-coeff=%.2f", lr_coeff); + +#ifdef BLAS_CUDA + printf(", on GPU %d", Gpu::GetCudaDevice(Gpu::GetDevice(gpu_conf))); +#endif + tm.disp(", "); + printf(", weights=%p, bias=%p", w, b); //DEBUG + tm.newline(); +#ifdef BLAS_CUDA +#else +#endif + } +} + +bool MachLin::CopyParams(Mach* mach) +{ + MachLin* machlin = static_cast(mach); + if (Mach::CopyParams(mach)) { + this->nb_params = machlin->nb_params; + this->clip_w = machlin->clip_w; + this->clip_gradw = machlin->clip_gradw; + this->clip_gradb = machlin->clip_gradb; +#ifdef BLAS_CUDA + Gpu::MemcpyAsync(this->b, machlin->b, odim * sizeof(REAL), cudaMemcpyDeviceToDevice); + Gpu::MemcpyAsync(this->w, machlin->w, idim * odim * sizeof(REAL), cudaMemcpyDeviceToDevice); +#else + memcpy(this->b, machlin->b, odim * sizeof(REAL)); + memcpy(this->w, machlin->w, idim * odim * sizeof(REAL)); +#endif + if(Mach::fileid >= file_header_version4) { + this->bExternal = machlin->bExternal; + this->iShareId = machlin->iShareId; + } + return true; + } + else + return false; +} + +//----------------------------------------------- +// File output +//----------------------------------------------- + +void MachLin::WriteParams(ostream &of) +{ + Mach::WriteParams(of); + if(Mach::fileid >= file_header_version4) { + //fprintf(stderr, "MachLin::WriteParams - bExternal=%d iShareId=%d\n", (int) bExternal, iShareId); + of.write((char*) &bExternal, sizeof(int)); + of.write((char*) &iShareId, sizeof(int)); + } +} + +void MachLin::WriteData(ostream &outf) { + int i=0, s=sizeof(REAL); + if (bExternal) { + //fprintf(stderr, " MachLin with external address to file share-id=%d\n", iShareId); + outf.write((char*) &i, sizeof(int)); + outf.write((char*) &s, sizeof(int)); + } + else { + //fprintf(stderr, " MachLin with its own data : share-id=%d, size=%d (idim=%d, odim=%d)\n", iShareId, odim*idim+odim, idim, odim); + int s=odim*idim + odim; + outf.write((char*) &s,sizeof(int)); + s=sizeof(REAL); + outf.write((char*) &s,sizeof(int)); + +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + REAL *local_mem=new REAL[odim*idim]; + cublasGetVector(odim*idim,CUDA_SIZE,w,1,local_mem,1); + Gpu::CheckError("transfer of weight matrix from GPU memory"); + outf.write((char*)local_mem,odim*idim*sizeof(REAL)); + delete [] local_mem; + + local_mem=new REAL[odim]; + cublasGetVector(odim,CUDA_SIZE,b,1,local_mem,1); + Gpu::CheckError("transfer of bias vector from GPU memory"); + outf.write((char*)local_mem,odim*sizeof(REAL)); + delete [] local_mem; +#else + outf.write((char*) w,odim*idim*sizeof(REAL)); + outf.write((char*) b,odim*sizeof(REAL)); +#endif + } +} + +//----------------------------------------------- +// File input +//----------------------------------------------- + +void MachLin::ReadParams(istream &inpf, bool with_alloc) +{ + + Mach::ReadParams(inpf, false); + //This should be done for file_version 3 or greater ! + if(Mach::fileid >= file_header_version4){ + inpf.read((char*) &bExternal, sizeof(int)); +// fprintf(stderr, " - bExternal=%d", (int) bExternal); + + inpf.read((char*) &iShareId, sizeof(int)); + // fprintf(stderr, " - share-id=%d\n", (int) iShareId); + } + //fprintf(stderr, "\n"); + do_alloc(); +} + +void MachLin::ReadData(istream &inpf, size_t s, int bs) +{ + size_t se=odim*idim + odim; + + if (bExternal) { + if (s>0) { + ErrorN("MachLin: internal error in file, linear machine has external address, but %u elements of data are provided\n",(uint)s); + } + return; // address will be filled in Mach::Read + } + else if (s!=se) { + ErrorN("data block of linear machine has %zu elements (%zu were expected)", s, se); + } + + Mach::ReadData(inpf, 0, bs); + + // read parameters + // TODO: error checks +#ifdef BLAS_CUDA + Gpu::SetConfig(gpu_conf); + REAL *local_mem=new REAL[odim*idim]; + inpf.read((char*)local_mem,odim*idim*sizeof(REAL)); + for (int i=0;i times into result matrix + Gpu::CopyVectorToMatrix(data_out, b, eff_bsize, odim); + call_gemm(data_out, w, data_in, 1.0, odim, eff_bsize, idim); +#else + for (int e=0; e0 && !in_train) { + REAL scale=1.0-drop_out; + Gpu::CublasSscal(s,scale,data_out,1); + } + } + +#else // of BLAS_CUDA + + if (in_train) { + // perform drop-out during training: set randomly neurones to zero + REAL *rptr=drop_out_rand; + REAL *optr=data_out; + // TODO: may be it is faster to create a mask to be multiplied with a element-wise product + for (int i=0; i0 && !in_train) { + REAL scale=1.0-drop_out; + SCAL(&s,&scale,data_out,&inc1); + } + } +#endif +} + + +//----------------------------------------------- +// Backprop +//----------------------------------------------- + +void MachLin::Backw(const float lrate, const float wdecay, int eff_bsize) +{ + static REAL real1=1.0, real0=0.0; + static char transN='N', transT='T'; + REAL lrate_bs = lr_coeff * lrate / sqrt(GetBsize()); // scale by block size ! + REAL epsilon = 1.0 + lrate_bs * wdecay; + + if (eff_bsize<=0) eff_bsize=bsize; + if (!grad_out) + Error("MachLin::Backw(): output gradient is not set"); + + debugMachOutp("MachLin Grad",grad_out,idim,odim,eff_bsize); + tm.start(); + +#if defined(BLAS_ATLAS) || defined(BLAS_INTEL_MKL) + // perform drop-out, set selected output gradients to zero + if (drop_out>0.0) { + REAL *rptr=drop_out_rand; + REAL *gptr=grad_out; + for (int i=0; i0) { + Gpu::DropOut(odim*eff_bsize, grad_out, drop_out_rand, drop_out); + } + + if (update) { + Gpu::BatchedAXPY(odim,lrate_bs,grad_out,1,b,1,eff_bsize); + } + // backprop gradient: grad_in = w' * grad_out + // idim x bsize = (odim x idim)' * odim x bsize + GEMM (transT, transN, idim, eff_bsize, odim, + real1, w, odim, grad_out, odim, + real0, grad_in, idim); + if (update) { + // update weights including weight decay + // w = lrate *grad_out * data_in^T + epsilon * w + GEMM (transN, transT, odim, idim, eff_bsize, + lrate_bs, grad_out, odim, data_in, idim, + epsilon, w, odim); + } +# else + Error("you must compile with BLAS_ATLAS, BLAS_INTEL_MKL or BLAS_CUDA"); +# endif +#endif + nb_backw += eff_bsize; + + tm.stop(); + debugMachInp("MachLin Grad",grad_in,idim,odim,eff_bsize); +} + +void MachLin::Debug() +{ +#ifdef BLAS_CUDA + Error("MachLin::Debug(): not implemented for CUDA\n"); +#else + for (int o=0; o +#include "Mach.h" +#include "Shareable.h" + +class MachLin : public Mach, public Shareable +{ +private: + void do_alloc(); // perform allocation of dynamic data structures +protected: + int nb_params; // number of params for max-out + // CUDA: the following two variables refer to device memory + REAL *b; // biases + REAL *w; // weights, stored in BLAS format, e.g. COLUMN major ! + REAL clip_w; // absolute value for clipping weights, default=0, i.e. no clipping + REAL clip_gradw; // absolute value for clipping gradients on weights, default=0, i.e. no clipping + REAL clip_gradb; // absolute value for clipping gradients on biaises, default=0, i.e. no clipping + int *bw_shared; // number of objects sharing biases and weights + pthread_mutex_t *bw_mutex; // mutex used to share biases and weights + virtual void ReadParams(istream&, bool=true); // read all params + virtual void ReadData(istream&, size_t, int=0); // read binary data + virtual void WriteParams(ostream&); // write all params + virtual void WriteData(ostream&); // write binary data + MachLin(const MachLin &); // create a copy of the machine, sharing the parameters +public: + //MachLin(const int=0, const int=0, const int=128, const ulong=0, const ulong=0); + MachLin(const int p_idim=0, const int p_odim=0, const int p_bsize=128, const ulong p_nbfw=0, const ulong p_nbbw=0, const int shareid=-1, const bool xdata=false); + //MachLin(const int=0, const int=0, const int=128, const ulong=0, const ulong=0, const int=-1, const bool=false); + virtual ~MachLin(); + virtual MachLin *Clone() {return new MachLin(*this);} // create a copy of the machine, sharing the parameters + virtual ulong GetNbParams() {return idim*odim+odim;} // return the nbr of allocated parameters + virtual int GetMType() {return file_header_mtype_lin;}; // get type of machine + // set values for clipping + virtual void SetClipW(REAL v) {clip_w=v;}; + virtual void SetClipGradW(REAL v) {clip_gradw=v;}; + virtual void SetClipGradB(REAL v) {clip_gradb=v;}; + // network initialisation + virtual void BiasConst(const REAL val); // init biases with constant values + virtual void BiasRandom(const REAL range); // random init of biases in [-range, range] + virtual void WeightsConst(const REAL val); // init weights with constant values + virtual void WeightsID(const REAL =1.0); // init weights to identity transformation + virtual void WeightsRandom(const REAL range); // random init of weights in [-range, range] + virtual void WeightsRandomFanI(const REAL range=sqrt(6.0)); // random init of weights in fct of fan-in + virtual void WeightsRandomFanIO(const REAL range=sqrt(6.0)); // random init of weights in fct of fan-in and fan-out + virtual void Info(bool=false, char *txt=(char*)""); // display (detailed) information on machine + virtual bool CopyParams(Mach*); // copy parameters from another machine + virtual void Forw(int=0, bool=false); // calculate outputs for current inputs + virtual void ForwDropout(int=0, bool=false); // new function to apply dropout in training forward pass, must be called AFTER output function + // backprop gradients from output to input and update all weights + virtual void Backw (const float lrate, const float wdecay, int=0); + virtual void Debug (); +}; + +#endif diff --git a/MachLinRectif.cpp b/MachLinRectif.cpp new file mode 100644 index 0000000..cb7258c --- /dev/null +++ b/MachLinRectif.cpp @@ -0,0 +1,130 @@ +/* + * This file is part of the continuous space language and translation model toolkit + * for statistical machine translation and large vocabulary speech recognition. + * + * Copyright 2015, Holger Schwenk, LIUM, University of Le Mans, France + * + * The CSLM toolkit is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License version 3 as + * published by the Free Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * + */ + +using namespace std; +#include +#include + +#include "Tools.h" +#include "MachLinRectif.h" +#include "Blas.h" + +#ifdef BLAS_CUDA +#include "Gpu.cuh" +#endif + +MachLinRectif::MachLinRectif(const int p_idim, const int p_odim, const int p_bsize, const ulong p_nbfw, const ulong p_nbbw, const int shareid, const bool xdata) + : MachLin(p_idim, p_odim, p_bsize, p_nbfw, p_nbbw, shareid, xdata) +{ +} + +MachLinRectif::MachLinRectif(const MachLinRectif &m) + : MachLin(m) +{ +} + +MachLinRectif::~MachLinRectif() +{ +} + + +//----------------------------------------------- +// Tools +//----------------------------------------------- + +void MachLinRectif::Info(bool detailed, char *txt) +{ + if (detailed) { + cout << "Information on linear rectifier machine" << endl; + MachLin::Info(detailed,txt); + } + else { + if (drop_out>0) + printf("%sMachLinRectif %d-%d, bs=%d, drop-out=%4.2f, passes=%lu/%lu", txt, idim, odim, bsize, drop_out, nb_forw, nb_backw); + else + printf("%sMachLinRectif %d-%d, bs=%d, passes=%lu/%lu", txt, idim, odim, bsize, nb_forw, nb_backw); + if (lr_coeff != 1.0) printf(", lrate-coeff=%.2f", lr_coeff); +#ifdef BLAS_CUDA + printf(", on GPU %d", Gpu::GetCudaDevice(Gpu::GetDevice(gpu_conf))); +#endif + tm.disp(", "); + tmh.disp(" + recif: "); + printf("\n"); + } +} + +//----------------------------------------------- +// Training +//----------------------------------------------- + +void MachLinRectif::Forw(int eff_bsize, bool in_train) +{ + + if (eff_bsize<=0) eff_bsize=bsize; + MachLin::Forw(eff_bsize,in_train); + + tmh.start(); + + // apply linear rectifier on output +#ifdef BLAS_CUDA + Gpu::LinRectifForw(odim*eff_bsize, data_out); +#else + REAL *ptr=data_out; + for (int i=0; i