Skip to content

Commit

Permalink
some minor updates, added debug() instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
hschwenk committed Jul 9, 2015
1 parent 0aff529 commit cbcc372
Show file tree
Hide file tree
Showing 49 changed files with 496 additions and 16 deletions.
8 changes: 8 additions & 0 deletions BackoffLmKen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ int BackoffLmKen::GetSentenceIds(WordID *&wid, const string &sentence, bool bos,
nw++;
}
}
debug1(" parsing found %d words\n", nw);

// end sentence with EOS ?
if (eos) {
Expand All @@ -111,6 +112,7 @@ int BackoffLmKen::GetSentenceIds(WordID *&wid, const string &sentence, bool bos,
}

wid = &(wid_vect.front());
debug4("* split sent with %d words into %d-grams (bos=%d, eos=%d):\n", nw, ken_ngram->Order(), map_ken2wid[ken_vocab->BeginSentence()], map_ken2wid[ken_vocab->EndSentence()]);
return nw;
}

Expand Down Expand Up @@ -138,7 +140,9 @@ REAL BackoffLmKen::BoffLnPw(char **ctxt, char *w, int req_order)
for (int i = 0; i < (req_order - 1); i++) {
ken_ngram->Score(state, ken_vocab->Index(ctxt[i]), out_state);
state = out_state;
debug2(" - context position ken=%d, ken_idx=%d\n", i, ken_vocab->Index(ctxt[i]));
}
debug2(" - predict ken_idx=%d, log10P=%e\n", ken_vocab->Index(w), ken_ngram->Score(state, ken_vocab->Index(w), out_state));

// we need to convert from log_10 to ln
return M_LN10 * ken_ngram->Score(state, ken_vocab->Index(w), out_state);
Expand Down Expand Up @@ -168,7 +172,9 @@ REAL BackoffLmKen::BoffLnPid(REAL *ctxt, WordID predw, int req_order)
for (int i = 0; i < (req_order - 1); i++) {
ken_ngram->Score(state, map_cslm2ken[(WordID) ctxt[i]], out_state);
state = out_state;
debug2(" - context position ken=%d, ken_idx=%d\n", i, map_cslm2ken[(WordID) ctxt[i]]);
}
debug3(" - predict cslm_id=%d, ken_idx=%d, log10P=%e\n", predw, map_cslm2ken[predw], ken_ngram->Score(state, map_cslm2ken[predw], out_state));

// we need to convert from log_10 to ln
return M_LN10 * ken_ngram->Score(state, map_cslm2ken[predw], out_state);
Expand Down Expand Up @@ -198,7 +204,9 @@ REAL BackoffLmKen::BoffLnStd(WordID *ctxt, WordID predw, int req_order)
for (int i = 0; i < (req_order - 1); i++) {
ken_ngram->Score(state, ctxt[i], out_state);
state = out_state;
debug2(" - context position ken=%d, ken_idx=%d\n", i, ctxt[i]);
}
debug3(" - predict cslm_id=%d, ken_idx=%d, log10P=%e\n", predw, predw, ken_ngram->Score(state, predw, out_state));

// we need to convert from log_10 to ln
return M_LN10 * ken_ngram->Score(state, predw, out_state);
Expand Down
3 changes: 3 additions & 0 deletions BackoffLmSri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ BackoffLmSri::BackoffLmSri(char *p_fname, int p_max_order, const WordList &wlist
WordList::const_iterator iter = wlist.Begin(), end = wlist.End();
for (size_t ci=0; iter!=end; iter++, ci++) {
VocabIndex vi = sri_vocab->getIndex(iter->word);
//debug3("'%s' bin=%d -> sri=%d\n", iter->word, ci, vi);
if (vi == Vocab_None) {
fprintf(stderr,"word %s not found at pos %zu\n", iter->word, ci );
}
Expand Down Expand Up @@ -156,6 +157,7 @@ int BackoffLmSri::GetSentenceIds(WordID *&wid, const string &sentence, bool bos,
strcpy(str,sentence.c_str()); // we need to copy since parseWords() modifies the string
nw = sri_vocab->parseWords(str, vstr, max_words - 1);
if (nw >= max_words-1) Error("too many words in one hypothesis\n");
debug1(" parsing found %d words\n", nw);

int b=0;
// start sentence with BOS ?
Expand All @@ -171,5 +173,6 @@ int BackoffLmSri::GetSentenceIds(WordID *&wid, const string &sentence, bool bos,
if (eos) wid_table[nw++]=sri_vocab->seIndex();

wid = wid_table;
debug4("* split sent with %d words into %d-grams (bos=%d, eos=%d):\n", nw, sri_order, sri_vocab->ssIndex(), sri_vocab->seIndex());
return nw;
}
2 changes: 2 additions & 0 deletions BackoffLmSri.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,10 @@ class BackoffLmSri : public BackoffLm {
for (i=0; i<req_order-1; i++) { // build context vector in REVERSE order
int j=req_order-i-2;
sri_context_idxs[i] = ctxt[j];
//debug4(" - context position cslm=%d -> sri=%d, sri_idx=%d word=%s\n", j, i, sri_context_idxs[i], sri_vocab->getWord(sri_context_idxs[i]) );
}
sri_context_idxs[i]=Vocab_None; // terminate, this is needed to specify the length of the context
//debug3(" - predict cslm_id=%d, sri_idx=%d word=%s\n", predw, predw, sri_vocab->getWord(predw) );

#ifdef DEBUG
printf(" - SRI %d-gram context: ",req_order);
Expand Down
1 change: 1 addition & 0 deletions Blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ inline void call_gemv (REAL *dest, REAL *matrix, REAL *source, REAL *bias,
// m x n


debug0("-mkl- call gemv\n");
#ifdef BLAS_CUDA
COPY(dim_dest,bias,inc,dest,inc); // TODO: verify
GEMV(trans, dim_dest, dim_src, fact, matrix, dim_dest, source, inc, fact, dest, inc);
Expand Down
10 changes: 10 additions & 0 deletions Data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ Data::Data(const char *p_fname, Data *other_data, bool use_class)
sr_wl_idx(-1), tg_wl_idx(-1), current_df(0), idx(-1),
mem_cdf(NULL), mem_inp(NULL), mem_trg(NULL), input(NULL), target(NULL), aux(NULL)
{
debug0("* constructor Data\n");
ReadFile(other_data, use_class);
}

Expand Down Expand Up @@ -354,6 +355,7 @@ void Data::ReadFile(Data *other_data, bool use_class)

Data::~Data()
{
debug0("* destructor Data\n");
if (preload) {
delete [] mem_cdf;
delete [] mem_inp;
Expand Down Expand Up @@ -454,12 +456,14 @@ void Data::Preload()
int odim1=(*itf)[0]->GetOdim();

while (++n < maxn) {
debug1("getting example %d\n",idx);
mem_cdf[idx] = cdf;

bool ok=false;
while (!ok) {
// advance synchronously all factors until ok
for (vector<DataFile*>::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) {
debug1(" next factor %ld\n", it-(*itf).begin());
if (! (*it)->Next()) (*it)->Rewind(); // TODO: deadlock if file empty
}

Expand All @@ -469,12 +473,14 @@ void Data::Preload()
ok = (drand48() < (*itf)[0]->GetResamplCoef());
}

debug1(" %s\n", ok ? "keep" : "skip");
}

// copy all factors sequentially in memory
REAL *adr_inp=mem_inp+idx*idim;
REAL *adr_trg=mem_trg+idx*odim;
for (vector<DataFile*>::iterator it = (*itf).begin(); it!=(*itf).end(); ++it) {
debug2(" load factor %ld to address %p\n", it-(*itf).begin(), adr_inp);
memcpy(adr_inp, (*it)->input, idim1*sizeof(REAL));
adr_inp+=idim1;
if (odim1 > 0) {
Expand All @@ -494,6 +500,7 @@ void Data::Preload()
REAL m=0, *mptr;
for (e=0, mptr=mem_inp+i; e<idx; e++, mptr+=idim) m+=*mptr;
m = m/idx; // mean
debug2("mean[%d]=%f\n", i, m);
for (e=0, mptr=mem_inp+i; e<idx; e++, mptr+=idim) *mptr -= m;
}
}
Expand All @@ -506,6 +513,7 @@ void Data::Preload()
for (e=0, mptr=mem_inp+i; e<idx; e++, mptr+=idim) { m+=*mptr; m2+=*mptr * *mptr; }
m = m/idx; // mean
m2 = m2/idx - m; // var = 1/n sum_i x_i^2 - mu^2
debug3(" mean, var[%d]=%f %f\n", i, m, m2);
if (m2>0)
for (e=0, mptr=mem_inp+i; e<idx; e++, mptr+=idim)
*mptr = (*mptr - m) / m2;
Expand All @@ -530,6 +538,7 @@ void Data::Preload()

void Data::Rewind()
{
debug0("** Data::Rewind()\n");
if (preload) {
// clear all data, resample and shuffle again
Preload();
Expand All @@ -541,6 +550,7 @@ void Data::Rewind()
(*it)->Rewind();
}
idx = -1;
debug0("** Data::Rewind() done\n");
}

/**************************
Expand Down
4 changes: 4 additions & 0 deletions DataAscii.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const char* DATA_FILE_ASCII="DataAscii";
DataAscii::DataAscii(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, const string& p_SentSc_ext,int p_betweenSentCtxt , DataAscii *prev_df)
: DataFile::DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext, p_betweenSentCtxt, prev_df)
{
debug0("** constructor DataAscii\n");

char full_fname[max_word_len]="";

Expand Down Expand Up @@ -70,6 +71,7 @@ DataAscii::DataAscii(char *p_prefix, ifstream &ifs, int p_aux_dim, const string&

DataAscii::~DataAscii()
{
debug0("** destructor DataAscii\n");
dfs.close();
if (idim>0) delete [] input;
if (odim>0) delete [] target_vect;
Expand All @@ -82,6 +84,7 @@ DataAscii::~DataAscii()

void DataAscii::Rewind()
{
debug0("*** DataAscii::Rewind()\n");
dfs.seekg(0, dfs.beg);
char buf[DATA_LINE_LEN];
dfs.getline(buf,DATA_LINE_LEN);
Expand All @@ -95,6 +98,7 @@ void DataAscii::Rewind()

bool DataAscii::Next()
{
// debug0("*** DataAscii::Next() "); cout<<idx<< ", fpos=" << dfs.tellg() << endl;
char line[DATA_LINE_LEN];
dfs.getline(line, DATA_LINE_LEN);
if (dfs.eof()) return false;
Expand Down
2 changes: 2 additions & 0 deletions DataAsciiClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const char* DATA_FILE_ASCIICLASS="DataAsciiClass";
DataAsciiClass::DataAsciiClass(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, const string& p_SentSc_ext,int p_betweenSentCtxt, DataAsciiClass *prev_df)
: DataAscii::DataAscii(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext, p_betweenSentCtxt, prev_df)
{
debug0("** constructor DataAsciiClass\n");

if (prev_df) {
tgt0=prev_df->tgt0;
Expand All @@ -50,6 +51,7 @@ DataAsciiClass::DataAsciiClass(char *p_prefix, ifstream &ifs, int p_aux_dim, con

bool DataAsciiClass::Next()
{
// debug0("*** DataAsciiClass::Next() "); cout<<idx<< ", fpos=" << dfs.tellg() << endl;
char line[DATA_LINE_LEN];
dfs.getline(line, DATA_LINE_LEN);
if (dfs.eof()) return false;
Expand Down
6 changes: 6 additions & 0 deletions DataFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ DataFile::DataFile(char *p_path_prefix, ifstream &ifs, int p_aux_dim, const stri
nb_SentSc(p_nb_SentSc), betweenSent_ctxt(p_betweenSentCtxt), SentSc_ext(p_SentSc_ext),
idx(-1), input(NULL), target_vect(NULL), aux(NULL), target_id(0)
{
debug0("** constructor DataFile\n");
char p_fname[DATA_LINE_LEN];

ifs >> p_fname;
Expand Down Expand Up @@ -69,6 +70,7 @@ DataFile::DataFile(char *p_path_prefix, char *p_fname, const float p_rcoeff)
: idim(0), odim(0), auxdim(0), nbex(0), resampl_coeff(p_rcoeff), path_prefix(p_path_prefix), fname(NULL),
idx(-1), input(NULL), target_vect(NULL), aux(NULL), target_id(0)
{
debug0("** constructor DataFile with fname\n");
if (NULL != p_fname)
fname = strdup(p_fname);

Expand All @@ -78,6 +80,7 @@ DataFile::DataFile(char *p_path_prefix, char *p_fname, const float p_rcoeff)

DataFile::~DataFile()
{
debug0("** destructor DataFile\n");
if (fname) free(fname);
if (aux_fs.is_open())
aux_fs.close();
Expand Down Expand Up @@ -153,6 +156,7 @@ int DataFile::Info(const char *txt)

void DataFile::Rewind()
{
debug0("*** DataFile::Rewind()\n");
Error("DataFile::Rewind() should be overriden");
}

Expand All @@ -174,6 +178,7 @@ bool DataFile::Next()

int DataFile::Resampl()
{
//debug0("*** DataFile::Resampl()\n");
bool ok=false;

while (!ok) {
Expand All @@ -184,6 +189,7 @@ int DataFile::Resampl()
//cout << " ok=" << ok << endl;
}

//debug0("*** DataFile::Resampl() end\n");
return idx;
}

6 changes: 6 additions & 0 deletions DataMnist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ uint DataMnist::read_iswap(int fd) {

// swap integer Big Endian -> little Endian
ps[0]=pi[3]; ps[1]=pi[2]; ps[2]=pi[1]; ps[3]=pi[0];
debug2("read=%4x, swap=%4x\n", i, s);

return s;
}
Expand All @@ -59,6 +60,7 @@ uint DataMnist::read_iswap(int fd) {
DataMnist::DataMnist(char *p_prefix, ifstream &ifs, int p_aux_dim, const string& p_aux_ext, int p_nb_SentSc, string& p_SentSc_ext,int p_betweenSentCtxt, DataMnist *prev_df)
: DataFile::DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext, p_betweenSentCtxt, prev_df)
{
debug0("** constructor DataMnist\n");
char full_fname[max_word_len]="";

printf(" - %s: MNIST data ", fname); fflush(stdout);
Expand Down Expand Up @@ -125,6 +127,7 @@ DataMnist::DataMnist(char *p_prefix, ifstream &ifs, int p_aux_dim, const string&

DataMnist::~DataMnist()
{
debug0("** destructor DataMnist\n");
close(dfd);
close(lfd);
if (idim>0) { delete [] input; delete [] ubuf; }
Expand All @@ -139,6 +142,7 @@ DataMnist::~DataMnist()

void DataMnist::Rewind()
{
debug0("*** DataMnist::Rewind()\n");
lseek(dfd, 16, SEEK_SET);
lseek(lfd, 8, SEEK_SET);
if (aux_fs.is_open())
Expand All @@ -151,6 +155,7 @@ void DataMnist::Rewind()

bool DataMnist::Next()
{
// debug0("*** DataMnist::Next() "); cout<<idx<< " << endl;

// read next image
int t=idim*sizeof(unsigned char);
Expand Down Expand Up @@ -182,6 +187,7 @@ bool DataMnist::Next()
sprintf(msg, "no examples left in class file %s", cl_fname);
Error(msg);
}
debug1("class %d\n", ubuf[0]);
target_id = (int) ubuf[0];
if (target_id>=odim) {
ErrorN("example %lu has a target of %d, but we have only %d classes\n", idx+1, target_id, odim);
Expand Down
7 changes: 7 additions & 0 deletions DataNgramBin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ DataNgramBin::DataNgramBin(char *p_prefix, ifstream &ifs, int p_aux_dim, const s
: DataFile::DataFile(p_prefix, ifs, p_aux_dim, p_aux_ext, p_nb_SentSc, p_SentSc_ext,p_betweenSentCtxt, prev_df),
order(0), tgpos(0), eospos(0), mode(0), nbw(0), nbs(0), nbu(0), nbi(0)
{
debug0("*** constructor DataNgramBin\n");
// DataNgramBin <file_name> <resampl_coeff> <order> [<tgpos>] <mode>

// parse addtl params ->
Expand Down Expand Up @@ -197,6 +198,7 @@ DataNgramBin::DataNgramBin(char *p_fname, float p_rcoeff, int p_order)
: DataFile::DataFile(NULL, p_fname, p_rcoeff),
order(p_order), tgpos(p_order - 1), eospos(0), mode(3), nbw(0), nbs(0), nbu(0), nbi(0)
{
debug0("*** constructor DataNgramBin with fname\n");

do_constructor_work();
// skip counting for efficieny reasons
Expand All @@ -209,6 +211,7 @@ DataNgramBin::DataNgramBin(char *p_fname, float p_rcoeff, int p_order, int p_tgp
: DataFile::DataFile(NULL, p_fname, p_rcoeff),
order(p_order), tgpos(p_tgpos), mode(p_mode), nbw(0), nbs(0), nbu(0), nbi(0)
{
debug0("*** constructor DataNgramBin with fname\n");
if (tgpos<0 || tgpos>=order)
ErrorN("wrong value of target position: %d not in [0,%d]\n",tgpos,order-1);

Expand All @@ -221,6 +224,7 @@ DataNgramBin::DataNgramBin(char *p_fname, float p_rcoeff, int p_order, int p_tgp

DataNgramBin::~DataNgramBin()
{
debug0("*** destructor DataNgramBin\n");

close(fd);
if (idim>0) {
Expand All @@ -238,6 +242,7 @@ DataNgramBin::~DataNgramBin()
* */
bool DataNgramBin::Next()
{
//debug0("*** DataNgramBin::Next() \n");
bool ok=false;
string line_sc;
int i;
Expand Down Expand Up @@ -353,12 +358,14 @@ bool DataNgramBin::Next()

void DataNgramBin::Rewind()
{
debug0("*** DataNgramBin::Rewind()\n");
lseek(fd, header_len, SEEK_SET);
if (aux_fs.is_open())
aux_fs.seekg(0, aux_fs.beg);
if(SentSc_fs.is_open())
SentSc_fs.seekg(0, aux_fs.beg);
idx=-1;
debug0("*** DataNgramBin::Rewind() done\n");
// initialize read buffer
buf_n=0; buf_pos=-1;
eospos = 0;
Expand Down
Loading

0 comments on commit cbcc372

Please sign in to comment.