diff --git a/.classpath b/.classpath
index 2e6d8d18..5552ff34 100644
--- a/.classpath
+++ b/.classpath
@@ -18,25 +18,25 @@
-
+
-
+
-
+
-
+
-
-
+
+
@@ -96,11 +96,11 @@
-
-
+
+
-
-
+
+
@@ -112,12 +112,12 @@
-
+
-
+
diff --git a/src/java/integration/ivory/integration/wikipedia/SearchSequenceFiles.java b/src/java/integration/ivory/integration/wikipedia/SearchSequenceFiles.java
index 0bc603b9..f0d758cc 100644
--- a/src/java/integration/ivory/integration/wikipedia/SearchSequenceFiles.java
+++ b/src/java/integration/ivory/integration/wikipedia/SearchSequenceFiles.java
@@ -31,7 +31,7 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
/**
* Read sequence files, output key-value pairs that match specified key.
@@ -59,14 +59,14 @@ public SearchSequenceFiles() {
}
static class MyMapperTerm extends MapReduceBase implements
- Mapper {
+ Mapper {
private String[] keys;
public void configure(JobConf job) {
keys = job.get("keys").split(",");
}
- public void map(IntWritable key, HMapSFW value, OutputCollector output,
+ public void map(IntWritable key, HMapStFW value, OutputCollector output,
Reporter reporter) throws IOException {
for (String compareKey : keys) {
int k = Integer.parseInt(compareKey);
@@ -131,8 +131,8 @@ public int run(String[] args) throws Exception {
if (valueClassName.contains("HMapSFW")) {
job.setMapperClass(MyMapperTerm.class);
- job.setMapOutputValueClass(HMapSFW.class);
- job.setOutputValueClass(HMapSFW.class);
+ job.setMapOutputValueClass(HMapStFW.class);
+ job.setOutputValueClass(HMapStFW.class);
} else {
job.setMapperClass(MyMapperInt.class);
job.setMapOutputValueClass(WeightedIntDocVector.class);
diff --git a/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingCrosslingual.java b/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingCrosslingual.java
index 3be8e759..9211b816 100644
--- a/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingCrosslingual.java
+++ b/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingCrosslingual.java
@@ -19,7 +19,7 @@
import org.junit.Test;
import tl.lin.data.map.HMapIFW;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapIF;
import tl.lin.data.map.MapKF;
@@ -233,7 +233,7 @@ public void runBuildIndexEnSide() throws Exception {
"-input=" + enwikiEn + "/wt-term-doc-vectors",
"-output=" + enwikiEn + "/test_wt-term-doc-vectors",
"-keys=" + enTermDocVector1Id + "," + enTermDocVector2Id,
- "-valueclass=" + HMapSFW.class.getCanonicalName()};
+ "-valueclass=" + HMapStFW.class.getCanonicalName()};
IntegrationUtils.exec(Joiner.on(" ").join(args));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("dist", "ivory"),
@@ -252,7 +252,7 @@ public void verifyTermDocVectorsEn() throws Exception {
SequenceFile.Reader reader;
IntWritable key = new IntWritable();
- HMapSFW value = new HMapSFW();
+ HMapStFW value = new HMapStFW();
reader = new SequenceFile.Reader(fs.getConf(),
SequenceFile.Reader.file(new Path(enwikiEn + "/test_wt-term-doc-vectors/part-00000")));
@@ -365,7 +365,7 @@ public void runBuildIndexDeSide() throws Exception {
"-input=" + dewikiEn + "/wt-term-doc-vectors",
"-output=" + dewikiEn + "/test_wt-term-doc-vectors",
"-keys=" + deTermDocVector1Id + "," + deTermDocVector2Id,
- "-valueclass=" + HMapSFW.class.getCanonicalName()};
+ "-valueclass=" + HMapStFW.class.getCanonicalName()};
IntegrationUtils.exec(Joiner.on(" ").join(args));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("dist", "ivory"),
@@ -384,7 +384,7 @@ public void verifyTermDocVectorsDe() throws Exception {
SequenceFile.Reader reader;
IntWritable key = new IntWritable();
- HMapSFW value = new HMapSFW();
+ HMapStFW value = new HMapStFW();
reader = new SequenceFile.Reader(fs.getConf(),
SequenceFile.Reader.file(new Path(dewikiEn + "/test_wt-term-doc-vectors/part-00000")));
@@ -434,7 +434,7 @@ public void verifyIntDocVectorsDe() throws Exception {
reader.close();
}
- private void verifyTermDocVector(Map doc, HMapSFW value) {
+ private void verifyTermDocVector(Map doc, HMapStFW value) {
assertTrue(value != null);
for (Map.Entry entry : doc.entrySet()) {
assertTrue(value.containsKey(entry.getKey()));
diff --git a/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingMonolingual.java b/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingMonolingual.java
index 2e23987a..becd00b3 100644
--- a/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingMonolingual.java
+++ b/src/java/integration/ivory/integration/wikipedia/VerifyWikipediaProcessingMonolingual.java
@@ -19,7 +19,7 @@
import org.junit.Test;
import tl.lin.data.map.HMapIFW;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapIF;
import tl.lin.data.map.MapKF;
@@ -213,7 +213,7 @@ public void runBuildIndexGalago() throws Exception {
"-input=" + galagoIndex + "/wt-term-doc-vectors",
"-output=" + galagoIndex + "/test_wt-term-doc-vectors",
"-keys=" + galagoTermDocVector1Id + "," + galagoTermDocVector2Id,
- "-valueclass=" + HMapSFW.class.getCanonicalName() };
+ "-valueclass=" + HMapStFW.class.getCanonicalName() };
IntegrationUtils.exec(Joiner.on(" ").join(args));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("dist", "ivory"),
@@ -226,7 +226,7 @@ public void runBuildIndexGalago() throws Exception {
System.out.println("verifyTermDocVectorsGalago");
IntWritable key1 = new IntWritable();
- HMapSFW value1 = new HMapSFW();
+ HMapStFW value1 = new HMapStFW();
SequenceFile.Reader reader1 = new SequenceFile.Reader(fs.getConf(),
SequenceFile.Reader.file(new Path(galagoIndex + "/test_wt-term-doc-vectors/part-00000")));
@@ -327,7 +327,7 @@ public void runBuildIndexOpennlp() throws Exception {
"-input=" + opennlpIndex + "/wt-term-doc-vectors",
"-output=" + opennlpIndex + "/test_wt-term-doc-vectors",
"-keys=" + opennlpTermDocVector1Id + "," + opennlpTermDocVector2Id,
- "-valueclass=" + HMapSFW.class.getCanonicalName() };
+ "-valueclass=" + HMapStFW.class.getCanonicalName() };
IntegrationUtils.exec(Joiner.on(" ").join(args));
args = new String[] { "hadoop jar", IntegrationUtils.getJar("dist", "ivory"),
@@ -340,7 +340,7 @@ public void runBuildIndexOpennlp() throws Exception {
System.out.println("verifyTermDocVectorsOpennlp");
IntWritable key1 = new IntWritable();
- HMapSFW value1 = new HMapSFW();
+ HMapStFW value1 = new HMapStFW();
SequenceFile.Reader reader1 = new SequenceFile.Reader(fs.getConf(),
SequenceFile.Reader.file(new Path(opennlpIndex + "/test_wt-term-doc-vectors/part-00000")));
@@ -385,7 +385,7 @@ public void runBuildIndexOpennlp() throws Exception {
reader2.close();
}
- private void verifyTermDocVector(Map doc, HMapSFW value) {
+ private void verifyTermDocVector(Map doc, HMapStFW value) {
assertTrue(value != null);
for (Map.Entry entry : doc.entrySet()) {
System.out.println("checking " + entry.getKey() + ": expected = " + entry.getValue() + ", actual = " + value.get(entry.getKey()));
diff --git a/src/java/main/ivory/core/preprocess/BuildTargetLangWeightedIntDocVectors.java b/src/java/main/ivory/core/preprocess/BuildTargetLangWeightedIntDocVectors.java
index 8a523fcf..4c0beb0b 100644
--- a/src/java/main/ivory/core/preprocess/BuildTargetLangWeightedIntDocVectors.java
+++ b/src/java/main/ivory/core/preprocess/BuildTargetLangWeightedIntDocVectors.java
@@ -44,7 +44,7 @@
import org.apache.log4j.Logger;
import tl.lin.data.map.HMapIFW;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapKF;
import edu.umd.cloud9.util.PowerTool;
import edu.umd.hooka.Vocab;
@@ -72,7 +72,7 @@ protected static enum Terms{
}
private static class MyMapper extends MapReduceBase implements
- Mapper {
+ Mapper {
static IntWritable mDocno = new IntWritable();
private boolean normalize = false;
@@ -102,7 +102,7 @@ public void configure(JobConf conf){
HMapIFW weightedVector = new HMapIFW();
float sum2;
- public void map(IntWritable docno, HMapSFW doc,
+ public void map(IntWritable docno, HMapStFW doc,
OutputCollector output, Reporter reporter)
throws IOException {
mDocno.set(docno.get());
diff --git a/src/java/main/ivory/core/preprocess/BuildTranslatedTermDocVectors.java b/src/java/main/ivory/core/preprocess/BuildTranslatedTermDocVectors.java
index 15ed613b..ccf2dc6a 100644
--- a/src/java/main/ivory/core/preprocess/BuildTranslatedTermDocVectors.java
+++ b/src/java/main/ivory/core/preprocess/BuildTranslatedTermDocVectors.java
@@ -37,7 +37,7 @@
import org.apache.log4j.Logger;
import tl.lin.data.map.HMapIFW;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapIF;
import com.google.common.collect.Maps;
@@ -66,7 +66,7 @@ protected static enum Docs { DBG, ZERO, SHORT, SHORTAfterTranslation, Total };
protected static enum DF { TransDf, NoDf }
private static class MyMapperTrans extends MapReduceBase implements
- Mapper {
+ Mapper {
private ScoringModel model;
// eVocabSrc is the English vocabulary for probability table e2f_Probs.
@@ -209,7 +209,7 @@ public void configure(JobConf conf) {
}
public void map(IntWritable docno, TermDocVector doc,
- OutputCollector output, Reporter reporter) throws IOException {
+ OutputCollector output, Reporter reporter) throws IOException {
if (docno.get() % SAMPLING != 0) {
return; // for generating sample document vectors. no sampling if SAMPLING=1
}
@@ -236,7 +236,7 @@ public void map(IntWritable docno, TermDocVector doc,
int docLen = CLIRUtils.translateTFs(doc, tfS, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg,
e2f_Probs, f2e_Probs, tokenizer, LOG);
- HMapSFW v = CLIRUtils.createTermDocVector(docLen, tfS, eVocabTrg, model, dict, dfTable,
+ HMapStFW v = CLIRUtils.createTermDocVector(docLen, tfS, eVocabTrg, model, dict, dfTable,
isNormalize, LOG);
// If no translation of any word is in the target vocab, remove document i.e., our model
@@ -354,9 +354,9 @@ public int runTool() throws Exception {
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setMapOutputKeyClass(IntWritable.class);
- conf.setMapOutputValueClass(HMapSFW.class);
+ conf.setMapOutputValueClass(HMapStFW.class);
conf.setOutputKeyClass(IntWritable.class);
- conf.setOutputValueClass(HMapSFW.class);
+ conf.setOutputValueClass(HMapStFW.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setMapperClass(MyMapperTrans.class);
diff --git a/src/java/main/ivory/core/preprocess/BuildWeightedTermDocVectors.java b/src/java/main/ivory/core/preprocess/BuildWeightedTermDocVectors.java
index a5bc4560..af7525b2 100644
--- a/src/java/main/ivory/core/preprocess/BuildWeightedTermDocVectors.java
+++ b/src/java/main/ivory/core/preprocess/BuildWeightedTermDocVectors.java
@@ -48,7 +48,7 @@
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapKF;
import com.google.common.collect.Maps;
@@ -61,7 +61,7 @@ public class BuildWeightedTermDocVectors extends PowerTool {
protected static enum Docs { Total, ZERO, SHORT }
private static class MyMapper extends MapReduceBase implements
- Mapper {
+ Mapper {
static IntWritable mDocno = new IntWritable();
private static DocLengthTable mDLTable;
@@ -72,7 +72,7 @@ private static class MyMapper extends MapReduceBase implements
private boolean normalize = false;
DefaultFrequencySortedDictionary dict;
DfTableArray dfTable;
- HMapSFW weightedVector = new HMapSFW();
+ HMapStFW weightedVector = new HMapStFW();
String term;
float wt, sum2;
@@ -162,7 +162,7 @@ public void configure(JobConf conf){
}
public void map(IntWritable docno, LazyTermDocVector doc,
- OutputCollector output, Reporter reporter)
+ OutputCollector output, Reporter reporter)
throws IOException {
mDocno.set(docno.get());
int docLen = mDLTable.getDocLength(mDocno.get());
@@ -288,10 +288,10 @@ public int runTool() throws Exception {
FileOutputFormat.setOutputPath(conf, weightedVectorsPath);
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setMapOutputKeyClass(IntWritable.class);
- conf.setMapOutputValueClass(HMapSFW.class);
+ conf.setMapOutputValueClass(HMapStFW.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setOutputKeyClass(IntWritable.class);
- conf.setOutputValueClass(HMapSFW.class);
+ conf.setOutputValueClass(HMapStFW.class);
LOG.info("Running job: "+conf.getJobName());
diff --git a/src/java/main/ivory/core/util/CLIRUtils.java b/src/java/main/ivory/core/util/CLIRUtils.java
index b8e7b0b9..a3f7d705 100644
--- a/src/java/main/ivory/core/util/CLIRUtils.java
+++ b/src/java/main/ivory/core/util/CLIRUtils.java
@@ -43,8 +43,8 @@
import tl.lin.data.map.HMapIF;
import tl.lin.data.map.HMapIFW;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
import tl.lin.data.map.MapKF;
import tl.lin.data.pair.PairOfFloatString;
import tl.lin.data.pair.PairOfFloats;
@@ -147,7 +147,7 @@ public static float cosine(HMapIFW vectorA, HMapIFW vectorB) {
* @return
* cosine score
*/
- public static float cosine(HMapSFW vectorA, HMapSFW vectorB) {
+ public static float cosine(HMapStFW vectorA, HMapStFW vectorB) {
float sum = 0, magA = 0, magB = 0;
for(tl.lin.data.map.MapKF.Entry e : vectorA.entrySet()){
float value = e.getValue();
@@ -176,7 +176,7 @@ public static float cosine(HMapSFW vectorA, HMapSFW vectorB) {
* @return
* cosine score
*/
- public static float cosineNormalized(HMapSFW vectorA, HMapSFW vectorB) {
+ public static float cosineNormalized(HMapStFW vectorA, HMapStFW vectorB) {
float sum = 0;
for(tl.lin.data.map.MapKF.Entry e : vectorA.entrySet()){
float value = e.getValue();
@@ -234,7 +234,7 @@ public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_
* @return
* mapping from E-terms to their computed df values
*/
- public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, HMapSIW dfs){
+ public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, HMapStIW dfs){
HMapIFW transDfTable = new HMapIFW();
for(int e=1;e entry : tfTable.entrySet()){
// retrieve term string, tf and df
@@ -1125,32 +1125,32 @@ private static void combineTTables(String ttableFile, String srcEVocabFile, Stri
*
*/
- public static String[] computeFeaturesF1(HMapSFW eVector, HMapSFW translatedFVector, float eSentLength, float fSentLength) {
+ public static String[] computeFeaturesF1(HMapStFW eVector, HMapStFW translatedFVector, float eSentLength, float fSentLength) {
return computeFeatures(1, null, null, null, null, null, eVector, null, translatedFVector, eSentLength, fSentLength, null, null, null, null, null, null, 0);
}
- public static String[] computeFeaturesF2(HMapSIW eSrcTfs, HMapSFW eVector, HMapSIW fSrcTfs, HMapSFW translatedFVector, float eSentLength, float fSentLength,
+ public static String[] computeFeaturesF2(HMapStIW eSrcTfs, HMapStFW eVector, HMapStIW fSrcTfs, HMapStFW translatedFVector, float eSentLength, float fSentLength,
Vocab eVocabSrc, Vocab eVocabTrg, Vocab fVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_Probs, TTable_monolithic_IFAs f2e_Probs, float prob){
return computeFeatures(2, null, null, null, null, eSrcTfs, eVector, fSrcTfs, translatedFVector,
eSentLength, fSentLength, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg, e2f_Probs, f2e_Probs, prob);
}
public static String[] computeFeaturesF3(String fSentence, String eSentence, Tokenizer fTokenizer, Tokenizer eTokenizer,
- HMapSIW eSrcTfs, HMapSFW eVector, HMapSIW fSrcTfs, HMapSFW translatedFVector, float eSentLength, float fSentLength,
+ HMapStIW eSrcTfs, HMapStFW eVector, HMapStIW fSrcTfs, HMapStFW translatedFVector, float eSentLength, float fSentLength,
Vocab eVocabSrc, Vocab eVocabTrg, Vocab fVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_Probs, TTable_monolithic_IFAs f2e_Probs, float prob){
return computeFeatures(3, fSentence, eSentence, fTokenizer, eTokenizer, eSrcTfs, eVector, fSrcTfs, translatedFVector,
eSentLength, fSentLength, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg, e2f_Probs, f2e_Probs, prob);
}
public static String[] computeFeatures(int featSet, String fSentence, String eSentence, Tokenizer fTokenizer, Tokenizer eTokenizer,
- HMapSIW eSrcTfs, HMapSFW eVector, HMapSIW fSrcTfs, HMapSFW translatedFVector, float eSentLength, float fSentLength,
+ HMapStIW eSrcTfs, HMapStFW eVector, HMapStIW fSrcTfs, HMapStFW translatedFVector, float eSentLength, float fSentLength,
Vocab eVocabSrc, Vocab eVocabTrg, Vocab fVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_Probs, TTable_monolithic_IFAs f2e_Probs, float prob){
return computeFeatures(featSet, fSentence, eSentence, fTokenizer, eTokenizer, eSrcTfs, eVector, fSrcTfs, translatedFVector,
eSentLength, fSentLength, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg, e2f_Probs, f2e_Probs, prob, logger);
}
public static String[] computeFeatures(int featSet, String fSentence, String eSentence, Tokenizer fTokenizer, Tokenizer eTokenizer,
- HMapSIW eSrcTfs, HMapSFW eVector, HMapSIW fSrcTfs, HMapSFW translatedFVector, float eSentLength, float fSentLength,
+ HMapStIW eSrcTfs, HMapStFW eVector, HMapStIW fSrcTfs, HMapStFW translatedFVector, float eSentLength, float fSentLength,
Vocab eVocabSrc, Vocab eVocabTrg, Vocab fVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_Probs, TTable_monolithic_IFAs f2e_Probs, float prob, Logger sLogger) {
List features = new ArrayList();
if(fSentLength == 0 || eSentLength == 0){
@@ -1227,7 +1227,7 @@ private static int getNumberOfWordsWithNDigits(int N, String[] tokens) {
return cnt;
}
- private static float getWordTransRatio(HMapSIW eSrcTfs, HMapSIW fSrcTfs, Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2fProbs, float probThreshold) {
+ private static float getWordTransRatio(HMapStIW eSrcTfs, HMapStIW fSrcTfs, Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2fProbs, float probThreshold) {
// if there are k occurences of a term w on source side, and m occurrences of a possible translation of w on target side,
// instead of saying that w has a translation on target side, we say w has max(1,m/k) translations to downweight cases where m sentences;
- ArrayListWritable vectors = new ArrayListWritable();
+ ArrayListWritable vectors = new ArrayListWritable();
ArrayListOfIntsWritable sentLengths = new ArrayListOfIntsWritable();
// identify sentences in document, filter out ones below MinSentLength threshold
// convert each sentence into a tf-idf vector, using general DF map for collection and a heuristic for avg. doc length
diff --git a/src/java/main/ivory/lsh/bitext/FilterSentencePairs.java b/src/java/main/ivory/lsh/bitext/FilterSentencePairs.java
index e8f0febb..d5f335eb 100644
--- a/src/java/main/ivory/lsh/bitext/FilterSentencePairs.java
+++ b/src/java/main/ivory/lsh/bitext/FilterSentencePairs.java
@@ -35,8 +35,8 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
/**
Step 2 of the bitext extraction algorithm.
@@ -73,7 +73,7 @@ private static class MyMapper extends MapReduceBase implements
private PreprocessHelper helper;
private String eSent, fSent;
private int eLen, fLen;
- private HMapSFW eVector, fVector;
+ private HMapStFW eVector, fVector;
private Tokenizer eTok, fTok;
private Text outSent1, outSent2;
private float classifierThreshold;
@@ -111,9 +111,9 @@ public void map(LongWritable key, Text sentencePair, OutputCollector
fSent = sentences[0];
eLen = eTok.getNumberTokens(eSent);
fLen = fTok.getNumberTokens(fSent);
- HMapSIW eSrcTfs = new HMapSIW();
+ HMapStIW eSrcTfs = new HMapStIW();
eVector = helper.createEDocVector(eSent, eSrcTfs);
- HMapSIW fSrcTfs = new HMapSIW();
+ HMapStIW fSrcTfs = new HMapStIW();
fVector = helper.createFDocVector(fSent, fSrcTfs);
if (eVector == null || fVector == null) {
diff --git a/src/java/main/ivory/lsh/bitext/FindParallelSentencePairs.java b/src/java/main/ivory/lsh/bitext/FindParallelSentencePairs.java
index a9df372d..587526a2 100644
--- a/src/java/main/ivory/lsh/bitext/FindParallelSentencePairs.java
+++ b/src/java/main/ivory/lsh/bitext/FindParallelSentencePairs.java
@@ -41,7 +41,7 @@
import tl.lin.data.array.ArrayListOfIntsWritable;
import tl.lin.data.array.ArrayListWritable;
import tl.lin.data.map.HMapIV;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.pair.PairOfInts;
/**
@@ -218,7 +218,7 @@ private static class MyReducer extends MapReduceBase implements
Reducer{
private int fDocno, eDocno;
private int classifierPositiveId;
- private ArrayListWritable fVectors, eVectors;
+ private ArrayListWritable fVectors, eVectors;
private ArrayListWritable fSentences, eSentences;
private PreprocessHelper helper; // for modularity, helper provides methods to preprocess data
private float classifierThreshold;
@@ -242,8 +242,8 @@ public void configure(JobConf job) {
throw new RuntimeException("Classifier confidence threshold > 1, provide value in [0,1]: "+classifierThreshold);
}
- eVectors = new ArrayListWritable();
- fVectors = new ArrayListWritable();
+ eVectors = new ArrayListWritable();
+ fVectors = new ArrayListWritable();
eSentences = new ArrayListWritable();
fSentences = new ArrayListWritable();
}
@@ -303,11 +303,11 @@ public void reduce(PairOfInts docnoPair, Iterator wikiSentence
// classify each e-f sentence pair in the candidate set
for (int f = 0; f < fVectors.size(); f++) {
- HMapSFW fVector = fVectors.get(f);
+ HMapStFW fVector = fVectors.get(f);
int fSentLength = fSentences.get(f).getLength();
for (int e = 0; e < eVectors.size(); e++) {
- HMapSFW eVector = eVectors.get(e);
+ HMapStFW eVector = eVectors.get(e);
int eSentLength = eSentences.get(e).getLength();
if (eSentLength > 2 * fSentLength || fSentLength > 2 * eSentLength) {
diff --git a/src/java/main/ivory/lsh/bitext/FindParallelSentencePairsOld.java b/src/java/main/ivory/lsh/bitext/FindParallelSentencePairsOld.java
index 06d7be33..6f62e010 100644
--- a/src/java/main/ivory/lsh/bitext/FindParallelSentencePairsOld.java
+++ b/src/java/main/ivory/lsh/bitext/FindParallelSentencePairsOld.java
@@ -37,7 +37,7 @@
import tl.lin.data.array.ArrayListOfIntsWritable;
import tl.lin.data.array.ArrayListWritable;
import tl.lin.data.map.HMapIV;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.pair.PairOfInts;
import edu.umd.cloud9.collection.Indexable;
import edu.umd.cloud9.collection.wikipedia.WikipediaPage;
@@ -184,7 +184,7 @@ public void map(Writable docnoKey, Indexable page, OutputCollector sentences;
- ArrayListWritable vectors = new ArrayListWritable();
+ ArrayListWritable vectors = new ArrayListWritable();
ArrayListOfIntsWritable sentLengths = new ArrayListOfIntsWritable();
try {
if(lang.equals("en")){
@@ -235,7 +235,7 @@ private static class MyReducer extends MapReduceBase implements
Reducer{
private int fDocno, eDocno;
private int classifierPositiveId;
- private ArrayListWritable fVectors, eVectors;
+ private ArrayListWritable fVectors, eVectors;
private ArrayListWritable fSentences, eSentences;
private PreprocessHelper helper; // for modularity, helper provides methods to preprocess data
private float classifierThreshold;
@@ -259,8 +259,8 @@ public void configure(JobConf job) {
throw new RuntimeException("Classifier confidence threshold > 1, provide value in [0,1]: "+classifierThreshold);
}
- eVectors = new ArrayListWritable();
- fVectors = new ArrayListWritable();
+ eVectors = new ArrayListWritable();
+ fVectors = new ArrayListWritable();
eSentences = new ArrayListWritable();
fSentences = new ArrayListWritable();
}
@@ -323,11 +323,11 @@ public void reduce(PairOfInts docnoPair, Iterator wikiTexts,
// classify each e-f sentence pair in the candidate set
for (int f = 0; f < fVectors.size(); f++) {
- HMapSFW fVector = fVectors.get(f);
+ HMapStFW fVector = fVectors.get(f);
int fSentLength = fSentences.get(f).getLength();
for (int e = 0; e < eVectors.size(); e++) {
- HMapSFW eVector = eVectors.get(e);
+ HMapStFW eVector = eVectors.get(e);
int eSentLength = eSentences.get(e).getLength();
if (eSentLength > 2 * fSentLength || fSentLength > 2 * eSentLength) {
diff --git a/src/java/main/ivory/lsh/bitext/PreprocessHelper.java b/src/java/main/ivory/lsh/bitext/PreprocessHelper.java
index d710b303..3450ddfc 100644
--- a/src/java/main/ivory/lsh/bitext/PreprocessHelper.java
+++ b/src/java/main/ivory/lsh/bitext/PreprocessHelper.java
@@ -29,8 +29,8 @@
import tl.lin.data.array.ArrayListOfIntsWritable;
import tl.lin.data.array.ArrayListWritable;
import tl.lin.data.map.HMapIFW;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
import tl.lin.data.map.MapKI;
import com.google.common.collect.Maps;
@@ -53,7 +53,7 @@ public class PreprocessHelper {
private DfTableArray dfTable;
private DefaultFrequencySortedDictionary dict;
private final Logger sLogger = Logger.getLogger(PreprocessHelper.class);
- private static final HMapSIW lang2AvgSentLen = new HMapSIW();
+ private static final HMapStIW lang2AvgSentLen = new HMapStIW();
static {
// took average # of tokens per sentence in Wikipedia data
lang2AvgSentLen.put("en",21);
@@ -218,11 +218,11 @@ private void loadEModels(JobConf conf) throws Exception {
dfTable = new DfTableArray(new Path(env.getDfByTermData()), fs);
}
- public HMapSFW createFDocVector(String sentence) {
- return createFDocVector(sentence, new HMapSIW());
+ public HMapStFW createFDocVector(String sentence) {
+ return createFDocVector(sentence, new HMapStIW());
}
- public HMapSFW createFDocVector(String sentence, HMapSIW term2Tf) {
+ public HMapStFW createFDocVector(String sentence, HMapStIW term2Tf) {
String[] terms = fTok.processContent(sentence);
for(String term : terms){
@@ -238,7 +238,7 @@ public HMapSFW createFDocVector(String sentence, HMapSIW term2Tf) {
transTermTf = CLIRUtils.updateTFsByTerm(fTerm, tf, transTermTf, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg, e2f_Probs, f2e_Probs, eTok, sLogger);
}
- HMapSFW weightedVector = CLIRUtils.createTermDocVector(terms.length, transTermTf, eVocabTrg, fScoreFn, dict, dfTable, true, sLogger);
+ HMapStFW weightedVector = CLIRUtils.createTermDocVector(terms.length, transTermTf, eVocabTrg, fScoreFn, dict, dfTable, true, sLogger);
// don't count numbers for the min #terms constraint since Wikipedia has "sentences" full of numbers that doesn't make any sense
int numNonNumbers = 0;
@@ -254,12 +254,12 @@ public HMapSFW createFDocVector(String sentence, HMapSIW term2Tf) {
}
}
- public HMapSFW createEDocVector(String sentence) {
- return createEDocVector(sentence, new HMapSIW());
+ public HMapStFW createEDocVector(String sentence) {
+ return createEDocVector(sentence, new HMapStIW());
}
- public HMapSFW createEDocVector(String sentence, HMapSIW term2Tf) {
- HMapSFW weightedVector = new HMapSFW();
+ public HMapStFW createEDocVector(String sentence, HMapStIW term2Tf) {
+ HMapStFW weightedVector = new HMapStFW();
String[] terms = eTok.processContent(sentence);
for(String term : terms){
@@ -281,7 +281,7 @@ public HMapSFW createEDocVector(String sentence, HMapSIW term2Tf) {
}
}
- public ArrayListWritable getESentences(String text, ArrayListWritable vectors, ArrayListOfIntsWritable sentLengths) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
+ public ArrayListWritable getESentences(String text, ArrayListWritable vectors, ArrayListOfIntsWritable sentLengths) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
ArrayListWritable sentences = new ArrayListWritable();
String[] lines = text.split("\n");
@@ -295,7 +295,7 @@ public ArrayListWritable getESentences(String text, ArrayListWritable= MinSentenceLength){
- HMapSFW vector = createEDocVector(sent.toString());
+ HMapStFW vector = createEDocVector(sent.toString());
if(vector != null){
vectors.add(vector);
sentences.add(new Text(sent));
@@ -308,7 +308,7 @@ public ArrayListWritable getESentences(String text, ArrayListWritable getFSentences(String text, ArrayListWritable vectors, ArrayListOfIntsWritable sentLengths) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
+ public ArrayListWritable getFSentences(String text, ArrayListWritable vectors, ArrayListOfIntsWritable sentLengths) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
// sLogger.setLevel(Level.DEBUG);
sLogger.debug("text length="+text.length());
@@ -341,7 +341,7 @@ public ArrayListWritable getFSentences(String text, ArrayListWritable= MinSentenceLength) {
- HMapSFW vector = createFDocVector(sent);
+ HMapStFW vector = createFDocVector(sent);
if (vector != null) {
vectors.add(vector);
sentences.add(new Text(sent));
diff --git a/src/java/main/ivory/lsh/data/WikiDocInfo.java b/src/java/main/ivory/lsh/data/WikiDocInfo.java
index f1e6304e..99a6c93a 100644
--- a/src/java/main/ivory/lsh/data/WikiDocInfo.java
+++ b/src/java/main/ivory/lsh/data/WikiDocInfo.java
@@ -8,25 +8,25 @@
import org.apache.hadoop.io.Writable;
import tl.lin.data.array.ArrayListWritable;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
public class WikiDocInfo implements Writable {
int langID;
- ArrayListWritable vectors;
+ ArrayListWritable vectors;
ArrayListWritable sentences;
public WikiDocInfo() {
super();
}
- public WikiDocInfo(int i1, ArrayListWritable t, ArrayListWritable v){//, ArrayListOfIntsWritable l) {
+ public WikiDocInfo(int i1, ArrayListWritable t, ArrayListWritable v){//, ArrayListOfIntsWritable l) {
langID = i1;
vectors = v;
sentences = t;
}
public void readFields(DataInput in) {
- vectors = new ArrayListWritable();
+ vectors = new ArrayListWritable();
sentences = new ArrayListWritable();
try {
@@ -56,7 +56,7 @@ public boolean equals(Object other){
return (p.getLangID()==getLangID() && (p.getVectors()).equals(this.getVectors()) && (p.getSentences()).equals(this.getSentences()));
}
- public ArrayListWritable getVectors() {
+ public ArrayListWritable getVectors() {
return vectors;
}
@@ -68,7 +68,7 @@ public int getLangID() {
return langID;
}
- public void set(int n1, ArrayListWritable vectors, ArrayListWritable sentences) {
+ public void set(int n1, ArrayListWritable vectors, ArrayListWritable sentences) {
this.langID = n1;
this.vectors = vectors;
this.sentences = sentences;
diff --git a/src/java/main/ivory/lsh/data/WikiSentenceInfo.java b/src/java/main/ivory/lsh/data/WikiSentenceInfo.java
index 7f24c9fc..dd695ac4 100644
--- a/src/java/main/ivory/lsh/data/WikiSentenceInfo.java
+++ b/src/java/main/ivory/lsh/data/WikiSentenceInfo.java
@@ -7,18 +7,18 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
public class WikiSentenceInfo implements Writable {
int langID;
Text sentence;
- HMapSFW vector;
+ HMapStFW vector;
public WikiSentenceInfo() {
super();
}
- public WikiSentenceInfo(int i1, Text t, HMapSFW v){
+ public WikiSentenceInfo(int i1, Text t, HMapStFW v){
langID = i1;
sentence = t;
vector = v;
@@ -35,7 +35,7 @@ public void readFields(DataInput in) {
try {
sentence = new Text();
sentence.readFields(in);
- vector = new HMapSFW();
+ vector = new HMapStFW();
vector.readFields(in);
} catch (IOException e) {
throw new RuntimeException("Could not read vectors/sentences in WikiSentenceInfo");
@@ -58,7 +58,7 @@ public Text getSentence() {
return sentence;
}
- public HMapSFW getVector() {
+ public HMapStFW getVector() {
return vector;
}
@@ -66,7 +66,7 @@ public int getLangID() {
return langID;
}
- public void set(int n1, Text sentence, HMapSFW vector) {
+ public void set(int n1, Text sentence, HMapStFW vector) {
this.langID = n1;
this.sentence = sentence;
this.vector = vector;
diff --git a/src/java/main/ivory/lsh/eval/BitextClassifierUtils.java b/src/java/main/ivory/lsh/eval/BitextClassifierUtils.java
index 6883d50e..8db38af9 100644
--- a/src/java/main/ivory/lsh/eval/BitextClassifierUtils.java
+++ b/src/java/main/ivory/lsh/eval/BitextClassifierUtils.java
@@ -30,8 +30,8 @@
import tl.lin.data.array.ArrayListOfInts;
import tl.lin.data.map.HMapIFW;
import tl.lin.data.map.HMapIIW;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
import tl.lin.data.map.MapKF;
import tl.lin.data.map.MapKI;
import edu.umd.hooka.Vocab;
@@ -45,26 +45,26 @@
*
*/
public class BitextClassifierUtils {
- static List fDocs = new ArrayList();
- static List fDocTfs = new ArrayList();
+ static List fDocs = new ArrayList();
+ static List fDocTfs = new ArrayList();
static List fSents = new ArrayList();
- static List eDocs = new ArrayList();
- static List eDocTfs = new ArrayList();
+ static List eDocs = new ArrayList();
+ static List eDocTfs = new ArrayList();
static List eSents = new ArrayList();
static ArrayListOfInts enSentLengths = new ArrayListOfInts();
static ArrayListOfInts deSentLengths = new ArrayListOfInts();
- static HMapSIW numSentencesPerDocE;
- static HMapSIW numSentencesPerDocF;
+ static HMapStIW numSentencesPerDocE;
+ static HMapStIW numSentencesPerDocF;
- static List gDocs = new ArrayList();
- static HMapSIW dfE = new HMapSIW();
- static HMapSIW dfD = new HMapSIW();
- static HMapSIW dfG = new HMapSIW();
+ static List gDocs = new ArrayList();
+ static HMapStIW dfE = new HMapStIW();
+ static HMapStIW dfD = new HMapStIW();
+ static HMapStIW dfG = new HMapStIW();
- static HMapSIW fTitle2SentCnt = new HMapSIW();
- static HMapSIW eTitle2SentCnt = new HMapSIW();
+ static HMapStIW fTitle2SentCnt = new HMapStIW();
+ static HMapStIW eTitle2SentCnt = new HMapStIW();
static HMapIIW parallelPairs = new HMapIIW();
static float avgFDocLeng;
@@ -75,8 +75,8 @@ public class BitextClassifierUtils {
static TTable_monolithic_IFAs f2e_Probs, e2f_Probs;
private static Options options;
- private List translateDocVectors(String eLang,
- String eTokenizerModelFile, String eStopwordsFile, List docs, float avgLen, HMapSIW transDfTable) {
+ private List translateDocVectors(String eLang,
+ String eTokenizerModelFile, String eStopwordsFile, List docs, float avgLen, HMapStIW transDfTable) {
Bm25 mModel = new Bm25();
// set number of docs
mModel.setDocCount(docs.size());
@@ -84,12 +84,12 @@ private List translateDocVectors(String eLang,
// set average doc length
mModel.setAvgDocLength(avgLen);
- List transDocs = new ArrayList();
+ List transDocs = new ArrayList();
Tokenizer tokenizer = TokenizerFactory.createTokenizer(eLang,
eTokenizerModelFile, true, eStopwordsFile, eStopwordsFile + ".stemmed", null);
// translate doc texts here
- for (HMapSIW deDoc : docs) {
+ for (HMapStIW deDoc : docs) {
HMapIFW tfS = new HMapIFW();
int docLen = 0;
try {
@@ -98,7 +98,7 @@ private List translateDocVectors(String eLang,
} catch (IOException e) {
e.printStackTrace();
}
- HMapSFW v = CLIRUtils.createTermDocVector(docLen, tfS, eVocabTrg, mModel, dfE, true, null);
+ HMapStFW v = CLIRUtils.createTermDocVector(docLen, tfS, eVocabTrg, mModel, dfE, true, null);
// System.out.println("f"+(n++)+" : " + v);
transDocs.add(v);
@@ -137,13 +137,13 @@ private void readWikiSentences(String eReadFile, String fReadFile, String pairsF
}
}
- private float readLines(BufferedReader reader, Tokenizer tokenizer, HMapSIW title2SentCnt, ArrayListOfInts sentLengths,
- List sentTfs, List sents, HMapSIW dfTable) throws IOException {
+ private float readLines(BufferedReader reader, Tokenizer tokenizer, HMapStIW title2SentCnt, ArrayListOfInts sentLengths,
+ List sentTfs, List sents, HMapStIW dfTable) throws IOException {
String line = null;
boolean isNewDoc = true;
int cnt = 0;
float sumLengths = 0;
- HMapSIW sent = new HMapSIW();
+ HMapStIW sent = new HMapStIW();
while ((line = reader.readLine()) != null) {
line = line.trim();
@@ -187,8 +187,8 @@ private void readSentences(int sentsPerDoc, String eReadFile, String fReadFile,
try {
BufferedReader dis1 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(eReadFile)), "UTF-8"));
BufferedReader dis2 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(fReadFile)), "UTF-8"));
- HMapSIW fDoc = new HMapSIW();
- HMapSIW eDoc = new HMapSIW();
+ HMapStIW fDoc = new HMapStIW();
+ HMapStIW eDoc = new HMapStIW();
String eLine = null, fLine = null;
int cntEDocs = 0, cntFDocs = 0, lastDocLenE = 0, lastDocLenF = 0, numSents = 0;
@@ -231,8 +231,8 @@ private void readSentences(int sentsPerDoc, String eReadFile, String fReadFile,
cntFDocs++;
// reset variables
- fDoc = new HMapSIW();
- eDoc = new HMapSIW();
+ fDoc = new HMapStIW();
+ eDoc = new HMapStIW();
numSents = 0;
lastDocLenE = 0;
lastDocLenF = 0;
@@ -255,8 +255,8 @@ private void readSentences(int sentsPerDoc, String eReadFile, String fReadFile,
}
}
- private List buildDocVectors(List term2tfVectors, float avgLen,
- HMapSIW dfTable) {
+ private List buildDocVectors(List term2tfVectors, float avgLen,
+ HMapStIW dfTable) {
Bm25 mModel = new Bm25();
// set number of docs
mModel.setDocCount(term2tfVectors.size());
@@ -265,9 +265,9 @@ private List buildDocVectors(List term2tfVectors, float avgLen
mModel.setAvgDocLength(avgLen);
// tf-idf computation
- List docVectors = new ArrayList();
- for (HMapSIW enDoc : term2tfVectors) {
- HMapSFW v = new HMapSFW();
+ List docVectors = new ArrayList();
+ for (HMapStIW enDoc : term2tfVectors) {
+ HMapStFW v = new HMapStFW();
int docLen = 0;
for (MapKI.Entry item : enDoc.entrySet()) {
int tf = item.getValue();
@@ -322,8 +322,8 @@ private List readAlignments(String alignmentFileName) {
private void prepareTrainTestData(List fSents, List eSents,
Tokenizer fTokenizer, Tokenizer eTokenizer,
- List fTfs, List eTfs, HMapIIW parallelPairs, List transVectors,
- List eVectors, int featureSet, float prob, List alignments) {
+ List fTfs, List eTfs, HMapIIW parallelPairs, List transVectors,
+ List eVectors, int featureSet, float prob, List alignments) {
NumberFormat nf = NumberFormat.getNumberInstance();
nf.setGroupingUsed(false);
nf.setMaximumFractionDigits(2);
@@ -332,12 +332,12 @@ private void prepareTrainTestData(List fSents, List eSents,
long time = System.currentTimeMillis();
for (int i = 0; i < transVectors.size(); i++) {
- HMapSFW transVector = transVectors.get(i);
- HMapSIW fTfMap = fTfs.get(i);
+ HMapStFW transVector = transVectors.get(i);
+ HMapStIW fTfMap = fTfs.get(i);
String fSent = fSents.get(i);
for (int j = 0; j < eVectors.size(); j++) {
- HMapSFW eVector = eVectors.get(j);
- HMapSIW eTfMap = eTfs.get(j);
+ HMapStFW eVector = eVectors.get(j);
+ HMapStIW eTfMap = eTfs.get(j);
String eSent = eSents.get(j);
if (parallelPairs.get(i) == j) {
label = "parallel";
@@ -418,12 +418,12 @@ public void runPrepareSentenceExtractionData(String fLang, String eLang, String
System.out.println("Sentences read in " + (sentTime - startTime) +
" ms. Number of sentences: " + fDocTfs.size() + " = " + eDocTfs.size());
- List eSentVectors = buildDocVectors(eDocTfs, avgEDocLeng, dfE);
+ List eSentVectors = buildDocVectors(eDocTfs, avgEDocLeng, dfE);
long evectTime = System.currentTimeMillis();
System.out.println("E vectors created in " + (evectTime - sentTime) + " ms");
- List fSentVectors = translateDocVectors(eLang, eTokenFile, eStopwordsFile, fDocTfs, avgFDocLeng, dfE);
+ List fSentVectors = translateDocVectors(eLang, eTokenFile, eStopwordsFile, fDocTfs, avgFDocLeng, dfE);
long fvectTime = System.currentTimeMillis();
System.out.println("F vectors created in " + (fvectTime - evectTime) +
@@ -489,8 +489,8 @@ private static void runCLIRComparison() throws IOException, ClassNotFoundExcepti
String DATADIR = "/fs/clip-qa/ferhan/cl-pwsim/pwsim-experiments-2013"; // /Users/ferhanture/edu/research_archive/data/de-en/eu-nc-wmt08
BitextClassifierUtils dt = new BitextClassifierUtils();
- numSentencesPerDocE = new HMapSIW();
- numSentencesPerDocF = new HMapSIW();
+ numSentencesPerDocE = new HMapStIW();
+ numSentencesPerDocF = new HMapStIW();
FileSystem localFs = FileSystem.getLocal(new Configuration());
eVocabSrc = HadoopAlign.loadVocab(new Path(VOCABDIR+"/vocab.en-de.en"), localFs);
eVocabTrg = HadoopAlign.loadVocab(new Path(VOCABDIR+"/vocab.de-en.en"), localFs);
@@ -505,7 +505,7 @@ private static void runCLIRComparison() throws IOException, ClassNotFoundExcepti
TOKENDIR+"/en-token.bin",
TOKENDIR+"/de.stop",
TOKENDIR+"/en.stop");
- List fDocVectors = dt.translateDocVectors("en",
+ List fDocVectors = dt.translateDocVectors("en",
TOKENDIR+"/en-token.bin",
TOKENDIR+"/en.stop",
fDocTfs, avgFDocLeng, dfE);
@@ -519,7 +519,7 @@ private static void runCLIRComparison() throws IOException, ClassNotFoundExcepti
TOKENDIR+"/en-token.bin",
TOKENDIR+"/de.stop",
TOKENDIR+"/en.stop");
- List googletransDocVectors = dt.buildDocVectors(eDocTfs, avgEDocLeng, dfE);
+ List googletransDocVectors = dt.buildDocVectors(eDocTfs, avgEDocLeng, dfE);
eDocTfs.clear();
dfE.clear();
@@ -530,7 +530,7 @@ private static void runCLIRComparison() throws IOException, ClassNotFoundExcepti
TOKENDIR+"/en-token.bin",
TOKENDIR+"/de.stop",
TOKENDIR+"/en.stop");
- List cdectransDocVectors = dt.buildDocVectors(eDocTfs, avgEDocLeng, dfE);
+ List cdectransDocVectors = dt.buildDocVectors(eDocTfs, avgEDocLeng, dfE);
eDocTfs.clear();
dfE.clear();
@@ -541,7 +541,7 @@ private static void runCLIRComparison() throws IOException, ClassNotFoundExcepti
TOKENDIR+"/en-token.bin",
TOKENDIR+"/de.stop",
TOKENDIR+"/en.stop");
- List eDocVectors = dt.buildDocVectors(eDocTfs, avgEDocLeng, dfE);
+ List eDocVectors = dt.buildDocVectors(eDocTfs, avgEDocLeng, dfE);
for (int i=0; i<100; i++) {
// System.out.println(CLIRUtils.cosine(fDocVectors.get(i), eDocVectors.get(i)));
System.out.println("cdec\t+\t" + CLIRUtils.cosine(cdectransDocVectors.get(i), eDocVectors.get(i)));
diff --git a/src/java/main/ivory/lsh/eval/BruteForcePwsim.java b/src/java/main/ivory/lsh/eval/BruteForcePwsim.java
index dad1ed20..f2aab251 100644
--- a/src/java/main/ivory/lsh/eval/BruteForcePwsim.java
+++ b/src/java/main/ivory/lsh/eval/BruteForcePwsim.java
@@ -40,7 +40,7 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.pair.PairOfFloatInt;
import tl.lin.data.pair.PairOfInts;
import tl.lin.data.pair.PairOfWritables;
@@ -119,8 +119,8 @@ public void map(IntWritable docno, WeightedIntDocVector docvector,
* @author ferhanture
*/
public static class MyMapperTermDocVectors extends MapReduceBase implements
- Mapper {
- private List> vectors;
+ Mapper {
+ private List> vectors;
float threshold;
public void configure(JobConf job) {
@@ -144,12 +144,12 @@ public void configure(JobConf job) {
LOG.info("Read " + vectors.size() + " sample doc vectors");
}
- public void map(IntWritable docno, HMapSFW docvector,
+ public void map(IntWritable docno, HMapStFW docvector,
OutputCollector output, Reporter reporter) throws IOException {
for (int i = 0; i < vectors.size(); i++) {
reporter.incrCounter(Pairs.Total, 1);
IntWritable sampleDocno = vectors.get(i).getLeftElement();
- HMapSFW fromSample = vectors.get(i).getRightElement();
+ HMapStFW fromSample = vectors.get(i).getRightElement();
float cs = CLIRUtils.cosine(docvector, fromSample);
if (cs >= threshold) {
diff --git a/src/java/main/ivory/lsh/eval/SampleTermDocVectors.java b/src/java/main/ivory/lsh/eval/SampleTermDocVectors.java
index bc87b0bd..9b9653ae 100644
--- a/src/java/main/ivory/lsh/eval/SampleTermDocVectors.java
+++ b/src/java/main/ivory/lsh/eval/SampleTermDocVectors.java
@@ -45,7 +45,7 @@
import tl.lin.data.map.HMapII;
import tl.lin.data.map.HMapIIW;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import edu.umd.cloud9.io.SequenceFileUtils;
/**
@@ -95,7 +95,7 @@
@SuppressWarnings("deprecation")
public class SampleTermDocVectors extends Configured implements Tool {
@SuppressWarnings("unchecked")
- static Class keyClass = IntWritable.class, valueClass = HMapSFW.class,
+ static Class keyClass = IntWritable.class, valueClass = HMapStFW.class,
inputFormat = SequenceFileInputFormat.class;
private static final Logger sLogger = Logger.getLogger(SampleTermDocVectors.class);
@@ -107,7 +107,7 @@ private void printUsage() {
private static class MyMapper extends MapReduceBase implements
- Mapper {
+ Mapper {
private int sampleFreq;
private HMapII samplesMap = null;
@@ -159,8 +159,8 @@ public void configure(JobConf conf) {
}
}
- public void map(IntWritable key, HMapSFW val,
- OutputCollector output, Reporter reporter)
+ public void map(IntWritable key, HMapStFW val,
+ OutputCollector output, Reporter reporter)
throws IOException {
if (samplesMap != null) {
if (samplesMap.containsKey(key.get())) {
@@ -177,11 +177,11 @@ public void map(IntWritable key, HMapSFW val,
}
public static class MyReducer extends MapReduceBase implements
- Reducer {
+ Reducer {
@Override
- public void reduce(IntWritable key, Iterator values,
- OutputCollector output, Reporter reporter)
+ public void reduce(IntWritable key, Iterator values,
+ OutputCollector output, Reporter reporter)
throws IOException {
output.collect(key, values.next());
}
diff --git a/src/java/main/ivory/lsh/projection/ComputeSignaturesSimhash.java b/src/java/main/ivory/lsh/projection/ComputeSignaturesSimhash.java
index 6935c7d3..e6dcae62 100644
--- a/src/java/main/ivory/lsh/projection/ComputeSignaturesSimhash.java
+++ b/src/java/main/ivory/lsh/projection/ComputeSignaturesSimhash.java
@@ -24,7 +24,7 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapKF;
import edu.umd.cloud9.util.PowerTool;
@@ -60,7 +60,7 @@ protected static enum Maps {
*
*/
public static class MyMapper extends MapReduceBase implements
- Mapper {
+ Mapper {
static GeneralHashFunctionLibrary hashLib;
static float[] V = new float[64];
@@ -70,7 +70,7 @@ public void configure(JobConf job) {
hashLib = new GeneralHashFunctionLibrary();
}
- public void map(IntWritable docno, HMapSFW docvector,
+ public void map(IntWritable docno, HMapStFW docvector,
OutputCollector output, Reporter reporter)
throws IOException {
V = new float[64];
diff --git a/src/java/main/ivory/sqe/querygenerator/MtNQueryGenerator.java b/src/java/main/ivory/sqe/querygenerator/MtNQueryGenerator.java
index 97f2499b..246ac844 100644
--- a/src/java/main/ivory/sqe/querygenerator/MtNQueryGenerator.java
+++ b/src/java/main/ivory/sqe/querygenerator/MtNQueryGenerator.java
@@ -17,7 +17,7 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.pair.PairOfStrings;
import com.google.gson.JsonArray;
@@ -126,7 +126,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
String origQuery = translation.getOriginalQuery();
String grammarFile = conf.get(Constants.GrammarPath);
- Map probMap = null;
+ Map probMap = null;
if (scfgWeight > 0) {
probMap = scfgGenerator.processGrammar(fs, conf, grammarFile);
}
@@ -188,7 +188,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
JsonArray tokensArr = new JsonArray();
if (tokenWeight > 0) {
for (String srcToken : stemmedSourceTokens) {
- HMapSFW nbestDist = translation.getDistributionOf(srcToken);
+ HMapStFW nbestDist = translation.getDistributionOf(srcToken);
if (defaultTokenizer.isStopWord(srcToken)){
continue;
@@ -200,7 +200,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
// Pr{bitext}
if (bitextWeight > 0) {
- HMapSFW bitextDist = clGenerator.getTranslations(origQuery.trim(), srcToken, pairsInGrammar, stemmed2Stemmed);
+ HMapStFW bitextDist = clGenerator.getTranslations(origQuery.trim(), srcToken, pairsInGrammar, stemmed2Stemmed);
if(bitextDist != null && !bitextDist.isEmpty()){
tokenRepresentationList.add(new PairOfFloatMap(bitextDist, bitextWeight));
}
@@ -208,7 +208,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
// Pr{scfg}
if (scfgWeight > 0) {
- HMapSFW scfgDist = scfgGenerator.getTranslations(origQuery.trim(), srcToken, probMap, stemmed2Stemmed);
+ HMapStFW scfgDist = scfgGenerator.getTranslations(origQuery.trim(), srcToken, probMap, stemmed2Stemmed);
if (scfgDist != null && !scfgDist.isEmpty() ){
tokenRepresentationList.add(new PairOfFloatMap(scfgDist, scfgWeight));
}
@@ -248,7 +248,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
// combine the token-based and phrase-based representations into a #combweight structure
JsonArray queryJsonArr = new JsonArray();
- HMapSFW scaledPhrase2Weight = null;
+ HMapStFW scaledPhrase2Weight = null;
if (phraseWeight > 0) {
scaledPhrase2Weight = Utils.scaleProbMap(lexProbThreshold, phraseWeight, translation.getPhraseDist());
for (String phrase : scaledPhrase2Weight.keySet()) {
diff --git a/src/java/main/ivory/sqe/querygenerator/ProbabilisticStructuredQueryGenerator.java b/src/java/main/ivory/sqe/querygenerator/ProbabilisticStructuredQueryGenerator.java
index de454258..37a72770 100644
--- a/src/java/main/ivory/sqe/querygenerator/ProbabilisticStructuredQueryGenerator.java
+++ b/src/java/main/ivory/sqe/querygenerator/ProbabilisticStructuredQueryGenerator.java
@@ -18,7 +18,7 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.pair.PairOfFloatInt;
import tl.lin.data.pair.PairOfStrings;
@@ -145,7 +145,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
}
} else {
JsonObject tokenTrans = new JsonObject();
- HMapSFW distr = getTranslations(origQuery, token, phrasePairs, stemmed2Stemmed);
+ HMapStFW distr = getTranslations(origQuery, token, phrasePairs, stemmed2Stemmed);
JsonArray weights = Utils.createJsonArrayFromProbabilities(distr);
if (weights != null) {
tokenTrans.add("#weight", weights);
@@ -185,8 +185,8 @@ protected String getBestTranslation(String token) {
return token;
}
- protected HMapSFW getTranslations(String query, String token, Set pairsInSCFG, Map stemmed2Stemmed) {
- HMapSFW probDist = new HMapSFW();
+ protected HMapStFW getTranslations(String query, String token, Set pairsInSCFG, Map stemmed2Stemmed) {
+ HMapStFW probDist = new HMapStFW();
int f = fVocab_f2e.get(token);
if (f <= 0) {
diff --git a/src/java/main/ivory/sqe/querygenerator/SCFGQueryGenerator.java b/src/java/main/ivory/sqe/querygenerator/SCFGQueryGenerator.java
index 5811c318..6817af5e 100644
--- a/src/java/main/ivory/sqe/querygenerator/SCFGQueryGenerator.java
+++ b/src/java/main/ivory/sqe/querygenerator/SCFGQueryGenerator.java
@@ -16,7 +16,7 @@
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
+import tl.lin.data.map.HMapStFW;
import tl.lin.data.map.MapKF;
import com.google.gson.JsonArray;
@@ -32,7 +32,7 @@
public class SCFGQueryGenerator implements QueryGenerator {
private static final Logger LOG = Logger.getLogger(SCFGQueryGenerator.class);
private Tokenizer defaultTokenizer, docLangTokenizer, queryLangTokenizerWithStemming, queryLangTokenizer, bigramTokenizer;
- private Map> query2probMap;
+ private Map> query2probMap;
private int length, numTransPerToken;
private boolean isDocStemmed, isStemming, bigramSegment = false;
private RetrievalEnvironment env;
@@ -101,7 +101,7 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
String origQuery = query.trim().split("\\|\\|\\|\\|")[0].trim();
String grammarFile = conf.get(Constants.GrammarPath);
- Map probMap = processGrammar(fs, conf, grammarFile);
+ Map probMap = processGrammar(fs, conf, grammarFile);
Map stemmed2Stemmed = Utils.getStemMapping(origQuery, defaultTokenizer, docLangTokenizer);
JsonArray tokenTranslations = new JsonArray();
@@ -134,8 +134,8 @@ public StructuredQuery parseQuery(String query, FileSystem fs, Configuration con
return new StructuredQuery(queryJson, length);
}
- public Map processGrammar(FileSystem fs, Configuration conf, String grammarFile) {
- Map probMap = Utils.generateTranslationTable(fs, conf, grammarFile,
+ public Map processGrammar(FileSystem fs, Configuration conf, String grammarFile) {
+ Map probMap = Utils.generateTranslationTable(fs, conf, grammarFile,
queryLangTokenizerWithStemming, docLangTokenizer);
if (probMap == null) {
LOG.info("No probabilities extracted from " + grammarFile);
@@ -147,7 +147,7 @@ public Map processGrammar(FileSystem fs, Configuration conf, St
}
private String getBestTranslation(String query, String token) {
- HMapSFW probDist = query2probMap.get(query).get(token);
+ HMapStFW probDist = query2probMap.get(query).get(token);
if(probDist == null){
return token;
@@ -164,8 +164,8 @@ private String getBestTranslation(String query, String token) {
return maxProbTrans;
}
- protected HMapSFW getTranslations(String query, String token, Map probMap, Map stemmed2Stemmed) {
- HMapSFW probDist = null;
+ protected HMapStFW getTranslations(String query, String token, Map probMap, Map stemmed2Stemmed) {
+ HMapStFW probDist = null;
try {
probDist = probMap.get(token);
} catch (NullPointerException e) {
@@ -175,7 +175,7 @@ protected HMapSFW getTranslations(String query, String token, Map tok2tokDist;
+ private Map tok2tokDist;
private Set targetTokens; // all non-stopword target tokens s.t. aligned to some source non-stopword token
- private HMapSFW targetPhraseDist; // map from RHSs of rules to translation scores; there is only one RHS (equal to entire translation), if we don't have derivation info
- private HMapSIW sourceTokenCnt;
+ private HMapStFW targetPhraseDist; // map from RHSs of rules to translation scores; there is only one RHS (equal to entire translation), if we don't have derivation info
+ private HMapStIW sourceTokenCnt;
private String originalQuery;
private int count;
private Map stemMapping;
@@ -34,19 +34,19 @@ public void setTargetTokens(Set targetTokens) {
this.targetTokens = targetTokens;
}
- public HMapSIW getSourceTokenCnt() {
+ public HMapStIW getSourceTokenCnt() {
return sourceTokenCnt;
}
- public void setSourceTokenCnt(HMapSIW sourceTokenCnt) {
+ public void setSourceTokenCnt(HMapStIW sourceTokenCnt) {
this.sourceTokenCnt = sourceTokenCnt;
}
- public void setPhraseDist(HMapSFW dist) {
+ public void setPhraseDist(HMapStFW dist) {
targetPhraseDist = dist;
}
- public HMapSFW getPhraseDist() {
+ public HMapStFW getPhraseDist() {
return targetPhraseDist;
}
@@ -58,11 +58,11 @@ public void setOriginalQuery(String o) {
originalQuery = o;
}
- public Map getTokenDist() {
+ public Map getTokenDist() {
return tok2tokDist;
}
- public void setTokenDist(Map dist) {
+ public void setTokenDist(Map dist) {
tok2tokDist = dist;
}
@@ -74,7 +74,7 @@ public int getCount() {
return count;
}
- public HMapSFW getDistributionOf(String srcToken) {
+ public HMapStFW getDistributionOf(String srcToken) {
return tok2tokDist.get(srcToken);
}
}
diff --git a/src/java/main/ivory/sqe/querygenerator/TranslationFactory.java b/src/java/main/ivory/sqe/querygenerator/TranslationFactory.java
index 6dae3c00..de1cadb6 100644
--- a/src/java/main/ivory/sqe/querygenerator/TranslationFactory.java
+++ b/src/java/main/ivory/sqe/querygenerator/TranslationFactory.java
@@ -11,8 +11,8 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Logger;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
public class TranslationFactory {
private static final Logger LOG = Logger.getLogger(TranslationFactory.class);
@@ -46,12 +46,12 @@ public static Translation readTranslationsFromNBest(String queryRepresentation,
int one2many = conf.getInt(Constants.One2Many, 2);
// src token --> (trg token --> prob(trg|src))
- Map token2tokenDist = new HashMap();
+ Map token2tokenDist = new HashMap();
// target phrase --> prob
- HMapSFW phraseDist = new HMapSFW();
+ HMapStFW phraseDist = new HMapStFW();
- HMapSIW srcTokenCnt = new HMapSIW();
+ HMapStIW srcTokenCnt = new HMapStIW();
Set bagOfTargetTokens = new HashSet();
diff --git a/src/java/main/ivory/sqe/querygenerator/TranslationFromNBest.java b/src/java/main/ivory/sqe/querygenerator/TranslationFromNBest.java
index 36813f58..195b665f 100644
--- a/src/java/main/ivory/sqe/querygenerator/TranslationFromNBest.java
+++ b/src/java/main/ivory/sqe/querygenerator/TranslationFromNBest.java
@@ -3,12 +3,12 @@
import java.util.Map;
import java.util.Set;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
public class TranslationFromNBest extends Translation {
- public TranslationFromNBest(int n, String origQuery, Map stemmed2stemmed, Set bagOfTargetTokens, Map token2tokenDist, HMapSFW phraseDist, HMapSIW srcTokenCnt) {
+ public TranslationFromNBest(int n, String origQuery, Map stemmed2stemmed, Set bagOfTargetTokens, Map token2tokenDist, HMapStFW phraseDist, HMapStIW srcTokenCnt) {
setOriginalQuery(origQuery);
setPhraseDist(phraseDist);
setTokenDist(token2tokenDist);
diff --git a/src/java/main/ivory/sqe/querygenerator/Utils.java b/src/java/main/ivory/sqe/querygenerator/Utils.java
index 98080098..5d7a1039 100644
--- a/src/java/main/ivory/sqe/querygenerator/Utils.java
+++ b/src/java/main/ivory/sqe/querygenerator/Utils.java
@@ -28,8 +28,8 @@
import tl.lin.data.map.HMapIV;
import tl.lin.data.map.HMapKF;
import tl.lin.data.map.HMapKI;
-import tl.lin.data.map.HMapSFW;
-import tl.lin.data.map.HMapSIW;
+import tl.lin.data.map.HMapStFW;
+import tl.lin.data.map.HMapStIW;
import tl.lin.data.map.MapKF;
import tl.lin.data.pair.PairOfStringFloat;
import tl.lin.data.pair.PairOfStrings;
@@ -72,14 +72,14 @@ public static String[] extractPhrases(String[] tokens, int windowSize) {
* @param phrase2score
* @param phrase2count
*/
- private static void addToPhraseTable(String fPhrase, String transPhrase, float prob, Map phrase2score, Map> phrase2count){
+ private static void addToPhraseTable(String fPhrase, String transPhrase, float prob, Map phrase2score, Map> phrase2count){
fPhrase = fPhrase.trim();
transPhrase = transPhrase.trim();
//LOG.info("Found translation phrase " + transPhrase);
if (!phrase2score.containsKey(fPhrase)) {
- phrase2score.put(fPhrase, new HMapSFW());
+ phrase2score.put(fPhrase, new HMapStFW());
}
// if same phrase extracted from multiple rules, average prob.s
@@ -188,7 +188,7 @@ private static String isConsecutiveWithStopwords(ArrayListOfInts lst, String[] r
* @param docLangTokenizer
* to check for stopwords on RHS
*/
- public static Map generateTranslationTable(FileSystem fs, Configuration conf, String grammarFile, Tokenizer queryLangTokenizer, Tokenizer docLangTokenizer) {
+ public static Map generateTranslationTable(FileSystem fs, Configuration conf, String grammarFile, Tokenizer queryLangTokenizer, Tokenizer docLangTokenizer) {
if (conf.getBoolean(Constants.Quiet, false)) {
LOG.setLevel(Level.OFF);
}
@@ -198,12 +198,12 @@ public static Map generateTranslationTable(FileSystem fs, Confi
int one2many = conf.getInt(Constants.One2Many, 2);
// scfgDist table is a set of (source_token --> X) maps, where X is a set of (token_trans --> score) maps
- Map scfgDist = new HashMap();
+ Map scfgDist = new HashMap();
// phrase2count table is a set of (source_phrase --> X) maps, where X is a set of (phrase_trans --> count) maps
- HMapSFW phraseDist = new HMapSFW();
+ HMapStFW phraseDist = new HMapStFW();
- HMapSIW srcTokenCnt = new HMapSIW();
+ HMapStIW srcTokenCnt = new HMapStIW();
Set bagOfTargetTokens = new HashSet();
@@ -230,8 +230,8 @@ public static Map generateTranslationTable(FileSystem fs, Confi
return scfgDist;
}
- public static void processRule(int isOne2Many, boolean isMany2Many, float score, String rule, Set bagOfTargetTokens, Map probDist,
- HMapSFW phraseDist, HMapSIW srcTokenCnt, Tokenizer queryLangTokenizer, Tokenizer docLangTokenizer, Map stemmed2Stemmed, Set unknownWords) {
+ public static void processRule(int isOne2Many, boolean isMany2Many, float score, String rule, Set bagOfTargetTokens, Map probDist,
+ HMapStFW phraseDist, HMapStIW srcTokenCnt, Tokenizer queryLangTokenizer, Tokenizer docLangTokenizer, Map stemmed2Stemmed, Set unknownWords) {
// LOG.info("Processing rule " + rule);
String[] parts = rule.split("\\|\\|\\|");
@@ -306,10 +306,10 @@ public static void processRule(int isOne2Many, boolean isMany2Many, float score,
bagOfTargetTokens.add(eTerm);
if (isOne2Many <= 1) {
if (probDist.containsKey(fTerm)) {
- HMapSFW eToken2Prob = probDist.get(fTerm);
+ HMapStFW eToken2Prob = probDist.get(fTerm);
eToken2Prob.increment(eTerm, weight);
}else {
- HMapSFW eToken2Prob = new HMapSFW();
+ HMapStFW eToken2Prob = new HMapStFW();
eToken2Prob.put(eTerm, weight);
probDist.put(fTerm, eToken2Prob);
}
@@ -336,10 +336,10 @@ public static void processRule(int isOne2Many, boolean isMany2Many, float score,
// update prob. distr.
if (probDist.containsKey(fTerm)) {
- HMapSFW eToken2Prob = probDist.get(fTerm);
+ HMapStFW eToken2Prob = probDist.get(fTerm);
eToken2Prob.increment(eTerm, weight);
}else {
- HMapSFW eToken2Prob = new HMapSFW();
+ HMapStFW eToken2Prob = new HMapStFW();
eToken2Prob.put(eTerm, weight);
probDist.put(fTerm, eToken2Prob);
}
@@ -427,8 +427,8 @@ private static HMapIV readAlignments(String[] alignments) {
* @param scale
* @param probMap
*/
- public static HMapSFW scaleProbMap(float threshold, float scale, HMapSFW probMap) {
- HMapSFW scaledProbMap = new HMapSFW();
+ public static HMapStFW scaleProbMap(float threshold, float scale, HMapStFW probMap) {
+ HMapStFW scaledProbMap = new HMapStFW();
for (MapKF.Entry entry : probMap.entrySet()) {
float pr = entry.getValue() * scale;
@@ -449,8 +449,8 @@ public static HMapSFW scaleProbMap(float threshold, float scale, HMapSFW probMap
* @param probMaps
* list of probability distributions
*/
- public static HMapSFW combineProbMaps(float threshold, float scale, List probMaps) {
- HMapSFW combinedProbMap = new HMapSFW();
+ public static HMapStFW combineProbMaps(float threshold, float scale, List probMaps) {
+ HMapStFW combinedProbMap = new HMapStFW();
int numDistributions = probMaps.size();
@@ -459,7 +459,7 @@ public static HMapSFW combineProbMaps(float threshold, float scale, List translationAlternatives = new HashSet();
float sumWeights = 0;
for (int i=0; i < numDistributions; i++) {
- HMapSFW dist = probMaps.get(i).getMap();
+ HMapStFW dist = probMaps.get(i).getMap();
float weight = probMaps.get(i).getWeight();
// don't add vocabulary from a distribution that has 0 weight
@@ -473,7 +473,7 @@ public static HMapSFW combineProbMaps(float threshold, float scale, List probMap, float lexProbThreshold, float cumProbThreshold, int maxNumTrans) {
+ public static void normalize(Map