Skip to content

Commit

Permalink
release 4.01, mainly fixed bug in weight decay
Browse files Browse the repository at this point in the history
  • Loading branch information
hschwenk committed Jul 14, 2015
1 parent 9134273 commit 8abca5e
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 38 deletions.
3 changes: 1 addition & 2 deletions Lrate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,8 @@ bool LrateDivideAndRecover::UpdateLrateOnDev(REAL rErrDev, REAL rBestErrDev, con
printf("error: %s\n", strerror(errno));
else {
// reload previous best machine parameters
Mach::SetShareOffs(random()); // use a new shareOffs since we have one globale table
Mach::ResetSharedMachines(); // all previously shared machine are no more valid
Mach* pPrevMach = Mach::Read(ifs);
Mach::SetShareOffs(0); // reset
if ( (pMach->GetNbForw () >= pPrevMach->GetNbForw ())
&& (pMach->GetNbBackw() >= pPrevMach->GetNbBackw())
&& pMach->CopyParams(pPrevMach) )
Expand Down
60 changes: 42 additions & 18 deletions Mach.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ using namespace std;

vector<Mach*> signal_mach;
int Mach::fileid=-1;
std::map<int, Mach *> prSharedMachines; // to store Mach pointers for sharing using clone() function
int shareOffs=0; // used to separate several machine loaded by multiple calls of Mach::Read()
std::map<int, Mach *> Mach::prSharedMachines; // to store Mach pointers for sharing using clone() function

#ifdef BLAS_CUDA
# include "Blas.h"
Expand Down Expand Up @@ -195,6 +194,15 @@ Mach::~Mach()
// File output
//-----------------------------------------------

void Mach::WriteToFile(const char* fname){
debug1("*** writing general machine to file '%s'\n",fname);
ofstream fs;
fs.open(fname,ios::binary);
CHECK_FILE(fs,fname);
Write(fs);
fs.close();
}

void Mach::WriteParams(ostream &of) {
debug0("*** write params of Mach\n");
// write machine specific params
Expand Down Expand Up @@ -229,6 +237,15 @@ void Mach::Write(ostream &of)
// File input
//-----------------------------------------------

Mach *Mach::ReadFromFile(const char* fname, int bs){

ifstream ifs;
ifs.open(fname,ios::binary);
CHECK_FILE(ifs,fname);
Mach *m = Mach::Read(ifs, bs);
ifs.close();
return m;
}

void Mach::ReadParams(istream &inpf, bool with_alloc)
{
Expand Down Expand Up @@ -263,7 +280,6 @@ Mach *Mach::Read(istream &inpf, int bs)
char header[file_header_size], h[file_header_size];
int v;

debug1("###### READ with shareOffs %d\n",shareOffs);
inpf.read(header,file_header_size);
if (sscanf(header,"%s %d",h,&v) != 2) {
ErrorN("format of machine file not recognised: %s", header);
Expand Down Expand Up @@ -340,51 +356,51 @@ Mach *Mach::Read(istream &inpf, int bs)
// if version > 3 then check share-id
if(Mach::fileid >= file_header_version3){
m->ReadData(inpf, s, bs);
int shID = shareOffs+mt->GetShareId();
if(prSharedMachines[shID] == NULL){
int shID = mt->GetShareId();
if(Mach::GetSharedMachine(shID) == NULL){
//fprintf(stderr, " ... new primary MachTab with share-id %d\n", shID);
prSharedMachines[shID] = mt;
Mach::SetSharedMachine(shID, mt);
if(mt->GetTabAdr() == NULL) {
Error("Mach::Read: machine should have its weights allocated!\n");
}
} else {
//fprintf(stderr, " ... cloning secondary MachTab with share-id %d\n", shID);
m = prSharedMachines[shID]->Clone();
m = (Mach::GetSharedMachine(shID))->Clone();
}

} else { // before file_header_version3, all MachTab in a MachPar share the weights

int shID = shareOffs + -1;
if(prSharedMachines[shID] == NULL ){
int shID = -1;
if(Mach::GetSharedMachine(shID) == NULL ){
if(mt->bExternal==0) m->ReadData(inpf, s, bs); //read the data for the first MachTab
else{
Error("The first MachTab should have its own data but is set to have external data\n");
}
debug2("Storing address (%p) of machine %d\n",mt->GetTabAdr(),m);
prSharedMachines[shID]=m;
debug2("Storing address (%p) of machine %p\n",mt->GetTabAdr(),m);
Mach::SetSharedMachine(-1, m);
} else {
m = prSharedMachines[shID]->Clone();
debug1(" cloning MachTab, address = %p\n", mt->GetTabAdr());
m = Mach::GetSharedMachine(shID)->Clone();
debug2(" cloning MachTab %p, address = %p\n", m, mt->GetTabAdr());
//fprintf(stderr, " cloning MachTab, address = %p\n", mt->GetTabAdr());
}
}
}
else if(Mach::fileid >= file_header_version4 && Mach::canShare(mtype)) {
//fprintf(stderr, "Shareable machine mtype = %d\n", mtype);
Shareable* sharem = dynamic_cast<Shareable*>(m);
int shID = shareOffs + sharem->GetShareId();
int shID = sharem->GetShareId();
//fprintf(stderr, "Shareable: external=%d share-id=%d\n", sharem->HasExternalData(), sharem->GetShareId());
if(sharem->HasExternalData()){
if(prSharedMachines[shID] != NULL){
if(Mach::GetSharedMachine(shID) != NULL){
//fprintf(stderr, " ... secondary machine with share-id %d -> cloning primary machine\n", sharem->GetShareId());
m = (MachLin*)prSharedMachines[shID]->Clone();
m = ((MachLin*)Mach::GetSharedMachine(shID))->Clone();
} else {
ErrorN("Found a secondary machine with shareid=%d, but the primary machine is not yet created\n", sharem->GetShareId());
}
} else {
if(sharem->GetShareId() != shareOffs + -1){
if(sharem->GetShareId() != -1){
//fprintf(stderr, " ... new primary machine with share-id %d\n", sharem->GetShareId());
prSharedMachines[shID] = m;
Mach::SetSharedMachine(shID, m);
}
//else { fprintf(stderr, " ... new primary machine with no sharing\n"); }
m->ReadData(inpf, s, bs);
Expand Down Expand Up @@ -433,12 +449,20 @@ bool Mach::CopyParams(Mach* mach)
&& (mach->idim == this->idim )
&& (mach->odim == this->odim )
&& (mach->bsize == this->bsize) ) {


this->nb_forw = mach->nb_forw;
this->nb_backw = mach->nb_backw;
this->update = mach->update;
return true;
}
else
{
if(NULL == mach) { cerr << "Mach::CopyParams: mach is NULL" << endl; }
if(mach->idim == this->idim) { cerr << "Mach::CopyParams: idim differs" << endl; }
if(mach->odim == this->odim ) { cerr << "Mach::CopyParams: odim differs" << endl; }
if(mach->bsize == this->bsize) { cerr << "Mach::CopyParams: bsize differs" << endl; }
}
return false;
}

Expand Down
11 changes: 9 additions & 2 deletions Mach.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <iostream>
#include <fstream>
#include <map>
#include "Tools.h"
#include "Blas.h"
#include "Timer.h"
Expand Down Expand Up @@ -62,13 +63,14 @@
#define file_header_mtype_max 32
#define file_header_mtype_avr 33

extern int shareOffs;

class Mach
{
private:
void do_alloc(); // perform allocation of dynamic data structures
protected:
static int fileid;
static std::map<int, Mach *> prSharedMachines; // to store Mach pointers for sharing using clone() function
int idim, odim; // input and output dimension
int bsize; // block size (nb of example used in parallel)
ulong nb_forw; // nb of forward examples processed
Expand Down Expand Up @@ -145,20 +147,25 @@ class Mach
virtual bool CopyParams(Mach*); // copy parameters from another machine
// FILE IO
static Mach *Read(istream&, int=0); // read class from a stream
static Mach *ReadFromFile(const char*, int=0); // read class from a file, call Read
void Write(ostream&); // write content of class to a stream
void WriteToFile(const char*); // write content of class to a stream
// Training
virtual void Forw(int=0, bool=false); // calculate outputs for current inputs
// backprop gradients from output to input and update all weights
virtual void Backw (const float lrate, const float wdecay, int =0);

static int GetFileId(){ return fileid;}
static void SetFileId(int id){ fileid = id;}
static Mach* GetSharedMachine(int i){ return prSharedMachines[i]; }
static void SetSharedMachine(int i, Mach* m){ prSharedMachines[i]=m; }

static bool canShare(int mtype) {
return (mtype != file_header_mtype_base
&& mtype != file_header_mtype_stab
&& mtype <= file_header_mtype_softmax_class);
}
static void SetShareOffs(int val) { shareOffs = val; }
static void ResetSharedMachines() { prSharedMachines.clear(); }
};

void GpuUnlock();
Expand Down
27 changes: 15 additions & 12 deletions MachConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <boost/program_options/parsers.hpp>
#include <cstring>
#include <strings.h>
#include "Mach.h"
#include "MachAvr.h"
#include "MachConfig.h"
#include "MachJoin.h"
Expand All @@ -44,6 +45,7 @@
#include "Tools.h"
#include "MachCopy.h"


namespace bpo = boost::program_options;

/**
Expand Down Expand Up @@ -119,7 +121,7 @@ MachConfig::MachConfig (bool bNeedConfFile, REAL rInitBias) :
("clip-weights,w" , opt_sem<REAL>::new_sem(&this->rClipWeights )->default_value( 0 ), "value for clipping weights (no clipping by default)")
("clip-gradients-weights,g",opt_sem<REAL>::new_sem(&this->rClipGradWeights)->default_value(0 ), "value for clipping gradients on weights (no clipping by default)")
("clip-gradients-bias,G", opt_sem<REAL>::new_sem(&this->rClipGradBias )->default_value( 0 ), "value for clipping gradients on biases (no clipping by default)")
("weight-decay,W" , opt_sem<REAL>::new_sem( )->default_value( 3E-05), "coefficient of weight decay")
("weight-decay,W" , opt_sem<REAL>::new_sem( )->default_value( 0.01), "coefficient of weight decay")
("backward-tm,V" , opt_sem<bool>::new_sem()->zero_tokens(), "use an inverse back-ward translation model")
("renormal,R" , opt_sem<bool>::new_sem()->zero_tokens(), "renormalize all probabilities, slow for large short-lists")
("recalc,r" , opt_sem<bool>::new_sem()->zero_tokens(), "recalculate global scores")
Expand Down Expand Up @@ -925,33 +927,33 @@ Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMach
MachTab *pMachTab = NULL;

iShareId = vmMachParams["share-id"].as<int>();
if(iShareId != -1 && prSharedMachines[iShareId] != NULL) {
if(iShareId != -1 && Mach::GetSharedMachine(iShareId) != NULL) {
//TODO: should we check the machine type also?
if(prSharedMachines[iShareId]->GetMType() != iMachType){
if(Mach::GetSharedMachine(iShareId)->GetMType() != iMachType){
Error("WARNING: machines sharing weights have not the same type, check the config file!");
}
if(iMachType == file_header_mtype_tab){
if (prSharedMachines[iShareId]->GetIdim()!=1 || iOutputDim != prSharedMachines[iShareId]->GetOdim()){
if (Mach::GetSharedMachine(iShareId)->GetIdim()!=1 || iOutputDim != Mach::GetSharedMachine(iShareId)->GetOdim()){
Error("MachTab sharing weights have not the same input/output size, check the config file!");
}
}
else if(iInputDim != prSharedMachines[iShareId]->GetIdim() || iOutputDim != prSharedMachines[iShareId]->GetOdim()){
cerr << "mach[" << iShareId << "]->idim=" << prSharedMachines[iShareId]->GetIdim() << " idim=" << iInputDim << endl;
cerr << "mach[" << iShareId << "]->odim=" << prSharedMachines[iShareId]->GetOdim() << " odim=" << iOutputDim << endl;
else if(iInputDim != Mach::GetSharedMachine(iShareId)->GetIdim() || iOutputDim != Mach::GetSharedMachine(iShareId)->GetOdim()){
cerr << "mach[" << iShareId << "]->idim=" << Mach::GetSharedMachine(iShareId)->GetIdim() << " idim=" << iInputDim << endl;
cerr << "mach[" << iShareId << "]->odim=" << Mach::GetSharedMachine(iShareId)->GetOdim() << " odim=" << iOutputDim << endl;
Error("Machines sharing weights have not the same input/output size, check the config file!");
}
//cout << "Cloning previous machine with share-id " << iShareId << endl;
pNewMach = prSharedMachines[iShareId]->Clone();
pNewMach = Mach::GetSharedMachine(iShareId)->Clone();
if(iMachType == file_header_mtype_lin) pMachLin = (MachLin*) pNewMach;
else if(iMachType == file_header_mtype_tab) pMachTab = (MachTab*) pNewMach;
} else if(iShareId == -1 && prSharedMachines[iShareId] != NULL && iMachType == file_header_mtype_tab) {
} else if(iShareId == -1 && Mach::GetSharedMachine(iShareId) != NULL && iMachType == file_header_mtype_tab) {
// special case for MachTab
// All MachTab share their weights by default. This is for compatibility with previously built system
// cout << "Create MachTab with share-id " << iShareId << " -> cloning existing machine with that share-id" << endl;
if(iInputDim != prSharedMachines[iShareId]->GetIdim() || iOutputDim != prSharedMachines[iShareId]->GetOdim()){
if(1 != Mach::GetSharedMachine(iShareId)->GetIdim() || iOutputDim != Mach::GetSharedMachine(iShareId)->GetOdim()){
Error("Machines sharing weights have not the same input/output size, check the config file!");
}
pNewMach = pMachTab = ((MachTab*)prSharedMachines[iShareId])->Clone();
pNewMach = pMachTab = ((MachTab*)Mach::GetSharedMachine(iShareId))->Clone();
} else {
//if(iShareId==-1) cout << "Creating new machine with no share-id" << endl;
//else cout << "Creating new machine with share-id " << iShareId << endl;
Expand All @@ -961,6 +963,7 @@ Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMach
break;
case file_header_mtype_tab:
pNewMach = pMachTab = new MachTab(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId);
Mach::SetSharedMachine(iShareId, pNewMach);
break;
case file_header_mtype_lin:
pNewMach = pMachLin = new MachLin(iInputDim, iOutputDim, iCurBlockSize, iNbForward, iNbBackward, iShareId);
Expand Down Expand Up @@ -997,7 +1000,7 @@ Mach *MachConfig::read_simple_machine (int iMachType, int iBlockSize, bool bMach
break;
}
if(iShareId != -1){
prSharedMachines[iShareId] = pNewMach;
Mach::SetSharedMachine(iShareId, pNewMach);
}
bNewShareId = true;
}
Expand Down
2 changes: 0 additions & 2 deletions MachConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -565,8 +565,6 @@ class MachConfig
boost::program_options::variables_map vmGeneralOptions; ///< map of general options
std::map<std::string,Mach*> mMachNameMap; ///< map of machine names

std::map<int, Mach *> prSharedMachines; // to store Mach pointers for sharing using clone() function

/**
* open configuration file
* @return false in case of error, true otherwise
Expand Down
2 changes: 1 addition & 1 deletion MachLin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ void MachLin::Backw(const float lrate, const float wdecay, int eff_bsize)
static REAL real1=1.0, real0=0.0;
static char transN='N', transT='T';
REAL lrate_bs = lr_coeff * lrate / sqrt(GetBsize()); // scale by block size !
REAL epsilon = 1.0 + lrate_bs * wdecay;
REAL epsilon = 1.0 - lrate_bs * wdecay;

if (eff_bsize<=0) eff_bsize=bsize;
if (!grad_out)
Expand Down
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ NEXT_PATCH_VERSION = $(MAJOR).$(MINOR).$(shell expr $(PATCH) + 1)-b$(BUILD)
.DEFAULT: all
CUDA_ROOT ?= /opt/cuda
CUDA ?= 0
DEBUG ?= 0
# K20: sm_35 / M2090: sm_20 / GTX690: sm_30 / GTX580: sm_20
NVCC_FLAGS ?= -g -arch=sm_35 -use_fast_math

Expand Down Expand Up @@ -171,7 +172,11 @@ OPT_FLAGS?=-mtune=native -march=native -O3 -Ofast
# corei7: eg. Intel X5675, Core i7 with sse4_2, aes, pclmulqdq)
# corei7-avx: eg. Intel E5-2670 which adds avx
# corei7-avx-i: eg. Intel E5-2690v2 which adds avx
CFLAGS=${OPT_FLAGS} -Wall -g ${DB} ${BLAS} ${BOLM_FLAGS} ${MOSES_INC} ${MOSES_CFLAGS}
CFLAGS=${OPT_FLAGS} -Wall -g ${DB} ${BLAS} ${BOLM_FLAGS} ${MOSES_INC} ${MOSES_CFLAGS}
#-D DEBUG=1
ifneq "$(DEBUG)" "0"
CFLAGS+=-DDEBUG=1
endif

OBJS:=$(SRCS:.cpp=.o)
OBJS:=$(OBJS:.cu=.o)
Expand Down
6 changes: 6 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ Prerequisites:
Version history
---------------

July 14 2015 V4.01

- mainly corrected bug in weight decay (the sign was wrong)
This had probably little effect since the default value of 3e-5 was rather small
the new default value is 1e-2 and we have observed perplexity improvements in several tasks

Jun 28 2015 V4.0
- bug fixes:
- deterministic sorting of the wordlist for short lists
Expand Down
6 changes: 6 additions & 0 deletions docs/RELEASE_NOTES
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
*****************************************************************************************
RELEASE V4.01 July 14 2015

- mainly corrected bug in weight decay (the sign was wrong)
This had probably little effect since the default value of 3e-5 was rather small
the new default value is 1e-2 and we have observed perplexity improvements in several tasks

RELEASE V4.0, June 28 2015

- bug fixes:
Expand Down

0 comments on commit 8abca5e

Please sign in to comment.