Skip to content

Commit

Permalink
handle labels with language tags
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmad88me committed Mar 27, 2020
1 parent 6c90285 commit e2f86b4
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.DS_Store

# Qt project files
tada-hdt-entity.qbs
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ liblinux:

install:
mkdir -p $(HDIR)
cp $(HEADERS_ABS) $(HDIR)
cp $(HEADERS_ABS) $(HDIR)
$(MAKE) lib
mv $(LIBNAME) $(LIBDIR)
mv $(LIBNAME) $(LIBDIR)
ln -fs $(LIBDIR)/$(LIBNAME) $(LIBDIR)/$(LIBALIAS)
echo -e "tada_hdt_entity lib is installed"
$(MAKE) clean
Expand Down
8 changes: 4 additions & 4 deletions scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ rm v1.0.zip
cd easy-logger-1.0;make install;cd ..;rm -Rf easy-logger-1.0

echo "Installing tabular-parser..."
wget https://github.com/ahmad88me/tabular-parser/archive/v1.1.zip
unzip v1.1.zip
rm v1.1.zip
cd tabular-parser-1.1;make install;cd ..;rm -Rf tabular-parser-1.1
wget https://github.com/ahmad88me/tabular-parser/archive/v1.2.zip
unzip v1.2.zip
rm v1.2.zip
cd tabular-parser-1.2;make install;cd ..;rm -Rf tabular-parser-1.2

echo "update linker caches..."
ldconfig
19 changes: 17 additions & 2 deletions src/entity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,14 @@ std::list<string>* EntityAnn::annotate_column(std::list<std::list<string>*>* dat
entity= *jt;
}
}
m_logger->log("annotate_column> compute intermediate coverage --- >");
if(this->compute_intermediate_coverage(entity, prop, double_levels)) {
m++;
}
delete prop;
}
m_m = m;
m_logger->log("annotate_column> m: "+to_string(m));
return this->annotate_semi_scored_column(m);
}
else {
Expand Down Expand Up @@ -137,13 +139,17 @@ std::list<string>* EntityAnn::get_entities_of_value(string value) {
IteratorTripleString* itt;
TripleString* triple;
std::list<string>* entities = new std::list<string>;
qvalue = get_taged(qvalue);
itt = hdt->search("", rdfs_label.c_str(), qvalue.c_str());
m_logger->log("get_entities_of_value: cell value <"+value+">");
while(itt->hasNext()) {
triple = itt->next();
entities->push_back(triple->getSubject());
m_logger->log("get_entities_of_value: "+triple->getSubject());
}
if(entities->size()==0){
m_logger->log("no values for qvalue<"+qvalue+">");
}
delete itt;
return entities;
}
Expand All @@ -159,6 +165,7 @@ std::list<string>* EntityAnn::get_entities_of_value(string value, std::list<stri
std::list<string>* prop_entities;
bool to_break;
qvalue = get_quoted(value);
qvalue = get_taged(qvalue);
entities = this->get_entities_of_value(value);
for(auto it = entities->cbegin(); it!=entities->cend(); it++) {
for(auto it2=properties->cbegin(); it2!=properties->cend(); it2++) {
Expand Down Expand Up @@ -333,13 +340,13 @@ bool EntityAnn::compute_intermediate_coverage(string cell_value) {


bool EntityAnn::compute_intermediate_coverage(string cell_value, std::list<string>* properties, bool double_level) {
string qcell_value;
// string qcell_value;
std::list<string>* classes;
std::list<string>* entities;
size_t Q_size, Z_size;
TNode* tnode;
m_logger->log("compute_intermediate_coverage> cell value: "+cell_value);
qcell_value= get_quoted(cell_value);
// qcell_value= get_quoted(cell_value);
entities = this->get_entities_of_value(cell_value, properties, double_level); // Z(v):
Z_size = entities->size();
m_logger->log("compute_intermediate_coverage> Z_size 1: "+to_string(Z_size)+" for cell value: "+cell_value);
Expand Down Expand Up @@ -622,9 +629,14 @@ string EntityAnn::get_quoted(string v){
if(qcell_value[0] != '\"') {
qcell_value = "\""+qcell_value+"\"";
}
// return qcell_value+m_lang_tag;
return qcell_value;
}

string EntityAnn::get_taged(string qv){
return qv+m_lang_tag;
}

void EntityAnn::set_alpha(double alpha){
m_alpha = alpha;
}
Expand All @@ -633,3 +645,6 @@ double EntityAnn::get_alpha(){
return m_alpha;
}

void EntityAnn::set_language_tag(string tag){
m_lang_tag = tag;
}
3 changes: 3 additions & 0 deletions src/entity.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ class EntityAnn {
void set_alpha(double);
double get_alpha();
string get_quoted(string);
string get_taged(string);
std::list<string>* recompute_f(double);
void set_language_tag(string);
// EasyLogger* m_logger;
private:
EasyLogger* m_logger;
Expand All @@ -74,6 +76,7 @@ class EntityAnn {
double m_alpha;
double m_ambiguitity_penalty=2;
unsigned long m_m;
string m_lang_tag;
void init(string hdt_file_dir, string log_file_dir, double alpha);
void init(HDT* hdt_ptr, string log_file_dir, double alpha);
};
Expand Down
40 changes: 40 additions & 0 deletions src/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,46 @@ namespace {
delete ea;
}

TEST(EntityTest, LangTag){
EntityAnn* ea = new EntityAnn(hdt_file, log_file, 0);
ea->set_language_tag("@en");
std::list<string>* candidates;
string class_uri = dbo_prefix+"Cyclist";
std::list<std::list<string>*>* data;
Parser p("test_files/test5.csv");
data = p.parse_vertical();
candidates = ea->annotate_column(data, 0, true, true);
ea->get_graph()->print_nodes();
delete candidates;
candidates = ea->recompute_f(0.1);
ASSERT_STREQ(class_uri.c_str(),candidates->front().c_str());
delete ea;
}



// TEST(EntityTest, Temp){
// EntityAnn* ea = new EntityAnn("/Users/aalobaid/workspaces/Cworkspace/tada-hdt/dbpedia_all.hdt", log_file, 0);
// ea->set_language_tag("@en");
// //EntityAnn* ea = new EntityAnn(hdt_file, log_file);
// std::list<string>* candidates;
// string wrestler_class_uri = dbo_prefix+"Country";
// std::list<std::list<string>*>* data;
// Parser p("/Users/aalobaid/workspaces/Pyworkspace/tada-gam/local_data/t2dv2/11688006_0_8123036130090004213.csv");
// data = p.parse_vertical();
// for(auto it=data->cbegin();it!=data->cend();it++){
// for(auto jt=(*it)->cbegin();jt!=(*it)->cend();jt++){
// cout << "<" << (*jt) << ">" << "|";
// }
// cout << endl<<"----------"<<endl;
// }
// candidates = ea->annotate_column(data, 1, true, true);
// ea->get_graph()->print_nodes();
// delete candidates;
// candidates = ea->recompute_f(0.1);
// ASSERT_STREQ(wrestler_class_uri.c_str(),candidates->front().c_str());
// delete ea;
// }

}//namespace

Expand Down
6 changes: 5 additions & 1 deletion test.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ dbr:rower2 rdf:type dbo:Rower .
dbr:rower2 rdfs:label "wr_common_2" .


dbr:cyclist1 rdf:type dbo:Cyclist .
dbr:cyclist2 rdf:type dbo:Cyclist .

dbr:cyclist1 rdfs:label "cyclist1"@en .
dbr:cyclist2 rdfs:label "cyclist2"@en .



Expand Down Expand Up @@ -114,7 +118,7 @@ dbo:FootballPlayer rdfs:subClassOf dbo:Athlete .
dbo:BasketballPlayer rdfs:subClassOf dbo:Athlete .
dbo:VolleyballPlayer rdfs:subClassOf dbo:Athlete .


dbo:Cyclist rdfs:subClassOf dbo:Athlete .
dbo:Rower rdfs:subClassOf dbo:Athlete .
dbo:Wrestler rdfs:subClassOf dbo:Athlete .
dbo:Boxer rdfs:subClassOf dbo:Athlete .
Expand Down
3 changes: 3 additions & 0 deletions test_files/test5.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
someheader
cyclist1
cyclist2

0 comments on commit e2f86b4

Please sign in to comment.