diff --git a/Lrate.cpp b/Lrate.cpp index 8848868..5e36b8e 100644 --- a/Lrate.cpp +++ b/Lrate.cpp @@ -222,9 +222,8 @@ bool LrateDivideAndRecover::UpdateLrateOnDev(REAL rErrDev, REAL rBestErrDev, con printf("error: %s\n", strerror(errno)); else { // reload previous best machine parameters - Mach::SetShareOffs(random()); // use a new shareOffs since we have one globale table + Mach::ResetSharedMachines(); // all previously shared machine are no more valid Mach* pPrevMach = Mach::Read(ifs); - Mach::SetShareOffs(0); // reset if ( (pMach->GetNbForw () >= pPrevMach->GetNbForw ()) && (pMach->GetNbBackw() >= pPrevMach->GetNbBackw()) && pMach->CopyParams(pPrevMach) ) diff --git a/Mach.cpp b/Mach.cpp index 4b103f8..3d318d0 100644 --- a/Mach.cpp +++ b/Mach.cpp @@ -44,8 +44,7 @@ using namespace std; vector signal_mach; int Mach::fileid=-1; -std::map prSharedMachines; // to store Mach pointers for sharing using clone() function -int shareOffs=0; // used to separate several machine loaded by multiple calls of Mach::Read() +std::map Mach::prSharedMachines; // to store Mach pointers for sharing using clone() function #ifdef BLAS_CUDA # include "Blas.h" @@ -195,6 +194,15 @@ Mach::~Mach() // File output //----------------------------------------------- +void Mach::WriteToFile(const char* fname){ + debug1("*** writing general machine to file '%s'\n",fname); + ofstream fs; + fs.open(fname,ios::binary); + CHECK_FILE(fs,fname); + Write(fs); + fs.close(); +} + void Mach::WriteParams(ostream &of) { debug0("*** write params of Mach\n"); // write machine specific params @@ -229,6 +237,15 @@ void Mach::Write(ostream &of) // File input //----------------------------------------------- +Mach *Mach::ReadFromFile(const char* fname, int bs){ + + ifstream ifs; + ifs.open(fname,ios::binary); + CHECK_FILE(ifs,fname); + Mach *m = Mach::Read(ifs, bs); + ifs.close(); + return m; +} void Mach::ReadParams(istream &inpf, bool with_alloc) { @@ -263,7 +280,6 @@ Mach *Mach::Read(istream &inpf, int bs) char header[file_header_size], h[file_header_size]; int v; - debug1("###### READ with shareOffs %d\n",shareOffs); inpf.read(header,file_header_size); if (sscanf(header,"%s %d",h,&v) != 2) { ErrorN("format of machine file not recognised: %s", header); @@ -340,31 +356,31 @@ Mach *Mach::Read(istream &inpf, int bs) // if version > 3 then check share-id if(Mach::fileid >= file_header_version3){ m->ReadData(inpf, s, bs); - int shID = shareOffs+mt->GetShareId(); - if(prSharedMachines[shID] == NULL){ + int shID = mt->GetShareId(); + if(Mach::GetSharedMachine(shID) == NULL){ //fprintf(stderr, " ... new primary MachTab with share-id %d\n", shID); - prSharedMachines[shID] = mt; + Mach::SetSharedMachine(shID, mt); if(mt->GetTabAdr() == NULL) { Error("Mach::Read: machine should have its weights allocated!\n"); } } else { //fprintf(stderr, " ... cloning secondary MachTab with share-id %d\n", shID); - m = prSharedMachines[shID]->Clone(); + m = (Mach::GetSharedMachine(shID))->Clone(); } } else { // before file_header_version3, all MachTab in a MachPar share the weights - int shID = shareOffs + -1; - if(prSharedMachines[shID] == NULL ){ + int shID = -1; + if(Mach::GetSharedMachine(shID) == NULL ){ if(mt->bExternal==0) m->ReadData(inpf, s, bs); //read the data for the first MachTab else{ Error("The first MachTab should have its own data but is set to have external data\n"); } - debug2("Storing address (%p) of machine %d\n",mt->GetTabAdr(),m); - prSharedMachines[shID]=m; + debug2("Storing address (%p) of machine %p\n",mt->GetTabAdr(),m); + Mach::SetSharedMachine(-1, m); } else { - m = prSharedMachines[shID]->Clone(); - debug1(" cloning MachTab, address = %p\n", mt->GetTabAdr()); + m = Mach::GetSharedMachine(shID)->Clone(); + debug2(" cloning MachTab %p, address = %p\n", m, mt->GetTabAdr()); //fprintf(stderr, " cloning MachTab, address = %p\n", mt->GetTabAdr()); } } @@ -372,19 +388,19 @@ Mach *Mach::Read(istream &inpf, int bs) else if(Mach::fileid >= file_header_version4 && Mach::canShare(mtype)) { //fprintf(stderr, "Shareable machine mtype = %d\n", mtype); Shareable* sharem = dynamic_cast(m); - int shID = shareOffs + sharem->GetShareId(); + int shID = sharem->GetShareId(); //fprintf(stderr, "Shareable: external=%d share-id=%d\n", sharem->HasExternalData(), sharem->GetShareId()); if(sharem->HasExternalData()){ - if(prSharedMachines[shID] != NULL){ + if(Mach::GetSharedMachine(shID) != NULL){ //fprintf(stderr, " ... secondary machine with share-id %d -> cloning primary machine\n", sharem->GetShareId()); - m = (MachLin*)prSharedMachines[shID]->Clone(); + m = ((MachLin*)Mach::GetSharedMachine(shID))->Clone(); } else { ErrorN("Found a secondary machine with shareid=%d, but the primary machine is not yet created\n", sharem->GetShareId()); } } else { - if(sharem->GetShareId() != shareOffs + -1){ + if(sharem->GetShareId() != -1){ //fprintf(stderr, " ... new primary machine with share-id %d\n", sharem->GetShareId()); - prSharedMachines[shID] = m; + Mach::SetSharedMachine(shID, m); } //else { fprintf(stderr, " ... new primary machine with no sharing\n"); } m->ReadData(inpf, s, bs); @@ -433,12 +449,20 @@ bool Mach::CopyParams(Mach* mach) && (mach->idim == this->idim ) && (mach->odim == this->odim ) && (mach->bsize == this->bsize) ) { + + this->nb_forw = mach->nb_forw; this->nb_backw = mach->nb_backw; this->update = mach->update; return true; } else + { + if(NULL == mach) { cerr << "Mach::CopyParams: mach is NULL" << endl; } + if(mach->idim == this->idim) { cerr << "Mach::CopyParams: idim differs" << endl; } + if(mach->odim == this->odim ) { cerr << "Mach::CopyParams: odim differs" << endl; } + if(mach->bsize == this->bsize) { cerr << "Mach::CopyParams: bsize differs" << endl; } + } return false; } diff --git a/Mach.h b/Mach.h index a9928df..c83c2c8 100644 --- a/Mach.h +++ b/Mach.h @@ -25,6 +25,7 @@ #include #include +#include #include "Tools.h" #include "Blas.h" #include "Timer.h" @@ -62,13 +63,14 @@ #define file_header_mtype_max 32 #define file_header_mtype_avr 33 -extern int shareOffs; + class Mach { private: void do_alloc(); // perform allocation of dynamic data structures protected: static int fileid; + static std::map prSharedMachines; // to store Mach pointers for sharing using clone() function int idim, odim; // input and output dimension int bsize; // block size (nb of example used in parallel) ulong nb_forw; // nb of forward examples processed @@ -145,7 +147,9 @@ class Mach virtual bool CopyParams(Mach*); // copy parameters from another machine // FILE IO static Mach *Read(istream&, int=0); // read class from a stream + static Mach *ReadFromFile(const char*, int=0); // read class from a file, call Read void Write(ostream&); // write content of class to a stream + void WriteToFile(const char*); // write content of class to a stream // Training virtual void Forw(int=0, bool=false); // calculate outputs for current inputs // backprop gradients from output to input and update all weights @@ -153,12 +157,15 @@ class Mach static int GetFileId(){ return fileid;} static void SetFileId(int id){ fileid = id;} + static Mach* GetSharedMachine(int i){ return prSharedMachines[i]; } + static void SetSharedMachine(int i, Mach* m){ prSharedMachines[i]=m; } + static bool canShare(int mtype) { return (mtype != file_header_mtype_base && mtype != file_header_mtype_stab && mtype <= file_header_mtype_softmax_class); } - static void SetShareOffs(int val) { shareOffs = val; } + static void ResetSharedMachines() { prSharedMachines.clear(); } }; void GpuUnlock(); diff --git a/MachConfig.cpp b/MachConfig.cpp index 83b9bab..ae02b91 100644 --- a/MachConfig.cpp +++ b/MachConfig.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "Mach.h" #include "MachAvr.h" #include "MachConfig.h" #include "MachJoin.h" @@ -44,6 +45,7 @@ #include "Tools.h" #include "MachCopy.h" + namespace bpo = boost::program_options; /** @@ -119,7 +121,7 @@ MachConfig::MachConfig (bool bNeedConfFile, REAL rInitBias) : ("clip-weights,w" , opt_sem::new_sem(&this->rClipWeights )->default_value( 0 ), "value for clipping weights (no clipping by default)") ("clip-gradients-weights,g",opt_sem::new_sem(&this->rClipGradWeights)->default_value(0 ), "value for clipping gradients on weights (no clipping by default)") ("clip-gradients-bias,G", opt_sem::new_sem(&this->rClipGradBias )->default_value( 0 ), "value for clipping gradients on biases (no clipping by default)") - ("weight-decay,W" , opt_sem::new_sem( )->default_value( 3E-05), "coefficient of weight decay") + ("weight-decay,W" , opt_sem::new_sem( )->default_value( 0.01), "coefficient of weight decay") ("backward-tm,V" , opt_sem::new_sem()->zero_tokens(), "use an inverse back-ward translation model") ("renormal,R" , opt_sem::new_sem()->zero_tokens(), "renormalize all probabilities, slow for large short-lists") ("recalc,r" , opt_sem::new_sem()->zero_tokens(), "recalculate global scores") @@ -925,33 +927,33 @@ Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMach MachTab *pMachTab = NULL; iShareId = vmMachParams["share-id"].as(); - if(iShareId != -1 && prSharedMachines[iShareId] != NULL) { + if(iShareId != -1 && Mach::GetSharedMachine(iShareId) != NULL) { //TODO: should we check the machine type also? - if(prSharedMachines[iShareId]->GetMType() != iMachType){ + if(Mach::GetSharedMachine(iShareId)->GetMType() != iMachType){ Error("WARNING: machines sharing weights have not the same type, check the config file!"); } if(iMachType == file_header_mtype_tab){ - if (prSharedMachines[iShareId]->GetIdim()!=1 || iOutputDim != prSharedMachines[iShareId]->GetOdim()){ + if (Mach::GetSharedMachine(iShareId)->GetIdim()!=1 || iOutputDim != Mach::GetSharedMachine(iShareId)->GetOdim()){ Error("MachTab sharing weights have not the same input/output size, check the config file!"); } } - else if(iInputDim != prSharedMachines[iShareId]->GetIdim() || iOutputDim != prSharedMachines[iShareId]->GetOdim()){ - cerr << "mach[" << iShareId << "]->idim=" << prSharedMachines[iShareId]->GetIdim() << " idim=" << iInputDim << endl; - cerr << "mach[" << iShareId << "]->odim=" << prSharedMachines[iShareId]->GetOdim() << " odim=" << iOutputDim << endl; + else if(iInputDim != Mach::GetSharedMachine(iShareId)->GetIdim() || iOutputDim != Mach::GetSharedMachine(iShareId)->GetOdim()){ + cerr << "mach[" << iShareId << "]->idim=" << Mach::GetSharedMachine(iShareId)->GetIdim() << " idim=" << iInputDim << endl; + cerr << "mach[" << iShareId << "]->odim=" << Mach::GetSharedMachine(iShareId)->GetOdim() << " odim=" << iOutputDim << endl; Error("Machines sharing weights have not the same input/output size, check the config file!"); } //cout << "Cloning previous machine with share-id " << iShareId << endl; - pNewMach = prSharedMachines[iShareId]->Clone(); + pNewMach = Mach::GetSharedMachine(iShareId)->Clone(); if(iMachType == file_header_mtype_lin) pMachLin = (MachLin*) pNewMach; else if(iMachType == file_header_mtype_tab) pMachTab = (MachTab*) pNewMach; - } else if(iShareId == -1 && prSharedMachines[iShareId] != NULL && iMachType == file_header_mtype_tab) { + } else if(iShareId == -1 && Mach::GetSharedMachine(iShareId) != NULL && iMachType == file_header_mtype_tab) { // special case for MachTab // All MachTab share their weights by default. This is for compatibility with previously built system // cout << "Create MachTab with share-id " << iShareId << " -> cloning existing machine with that share-id" << endl; - if(iInputDim != prSharedMachines[iShareId]->GetIdim() || iOutputDim != prSharedMachines[iShareId]->GetOdim()){ + if(1 != Mach::GetSharedMachine(iShareId)->GetIdim() || iOutputDim != Mach::GetSharedMachine(iShareId)->GetOdim()){ Error("Machines sharing weights have not the same input/output size, check the config file!"); } - pNewMach = pMachTab = ((MachTab*)prSharedMachines[iShareId])->Clone(); + pNewMach = pMachTab = ((MachTab*)Mach::GetSharedMachine(iShareId))->Clone(); } else { //if(iShareId==-1) cout << "Creating new machine with no share-id" << endl; //else cout << "Creating new machine with share-id " << iShareId << endl; @@ -961,6 +963,7 @@ Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMach break; case file_header_mtype_tab: pNewMach = pMachTab = new MachTab(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); + Mach::SetSharedMachine(iShareId, pNewMach); break; case file_header_mtype_lin: pNewMach = pMachLin = new MachLin(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId); @@ -997,7 +1000,7 @@ Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMach break; } if(iShareId != -1){ - prSharedMachines[iShareId] = pNewMach; + Mach::SetSharedMachine(iShareId, pNewMach); } bNewShareId = true; } diff --git a/MachConfig.h b/MachConfig.h index f2b857c..6450980 100644 --- a/MachConfig.h +++ b/MachConfig.h @@ -565,8 +565,6 @@ class MachConfig boost::program_options::variables_map vmGeneralOptions; ///< map of general options std::map mMachNameMap; ///< map of machine names - std::map prSharedMachines; // to store Mach pointers for sharing using clone() function - /** * open configuration file * @return false in case of error, true otherwise diff --git a/MachLin.cpp b/MachLin.cpp index 2bd20f5..03eca16 100644 --- a/MachLin.cpp +++ b/MachLin.cpp @@ -589,7 +589,7 @@ void MachLin::Backw(const float lrate, const float wdecay, int eff_bsize) static REAL real1=1.0, real0=0.0; static char transN='N', transT='T'; REAL lrate_bs = lr_coeff * lrate / sqrt(GetBsize()); // scale by block size ! - REAL epsilon = 1.0 + lrate_bs * wdecay; + REAL epsilon = 1.0 - lrate_bs * wdecay; if (eff_bsize<=0) eff_bsize=bsize; if (!grad_out) diff --git a/Makefile b/Makefile index a3f2625..d154341 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,7 @@ NEXT_PATCH_VERSION = $(MAJOR).$(MINOR).$(shell expr $(PATCH) + 1)-b$(BUILD) .DEFAULT: all CUDA_ROOT ?= /opt/cuda CUDA ?= 0 +DEBUG ?= 0 # K20: sm_35 / M2090: sm_20 / GTX690: sm_30 / GTX580: sm_20 NVCC_FLAGS ?= -g -arch=sm_35 -use_fast_math @@ -171,7 +172,11 @@ OPT_FLAGS?=-mtune=native -march=native -O3 -Ofast # corei7: eg. Intel X5675, Core i7 with sse4_2, aes, pclmulqdq) # corei7-avx: eg. Intel E5-2670 which adds avx # corei7-avx-i: eg. Intel E5-2690v2 which adds avx -CFLAGS=${OPT_FLAGS} -Wall -g ${DB} ${BLAS} ${BOLM_FLAGS} ${MOSES_INC} ${MOSES_CFLAGS} +CFLAGS=${OPT_FLAGS} -Wall -g ${DB} ${BLAS} ${BOLM_FLAGS} ${MOSES_INC} ${MOSES_CFLAGS} +#-D DEBUG=1 +ifneq "$(DEBUG)" "0" + CFLAGS+=-DDEBUG=1 +endif OBJS:=$(SRCS:.cpp=.o) OBJS:=$(OBJS:.cu=.o) diff --git a/README b/README index b5a3fb7..cecb543 100644 --- a/README +++ b/README @@ -88,6 +88,12 @@ Prerequisites: Version history --------------- +July 14 2015 V4.01 + + - mainly corrected bug in weight decay (the sign was wrong) + This had probably little effect since the default value of 3e-5 was rather small + the new default value is 1e-2 and we have observed perplexity improvements in several tasks + Jun 28 2015 V4.0 - bug fixes: - deterministic sorting of the wordlist for short lists diff --git a/docs/RELEASE_NOTES b/docs/RELEASE_NOTES index 1ee4393..8a1830e 100644 --- a/docs/RELEASE_NOTES +++ b/docs/RELEASE_NOTES @@ -1,4 +1,10 @@ ***************************************************************************************** +RELEASE V4.01 July 14 2015 + + - mainly corrected bug in weight decay (the sign was wrong) + This had probably little effect since the default value of 3e-5 was rather small + the new default value is 1e-2 and we have observed perplexity improvements in several tasks + RELEASE V4.0, June 28 2015 - bug fixes: