Skip to content

Commit

Permalink
add confidence for most likely host software of plugins
Browse files Browse the repository at this point in the history
  • Loading branch information
dave-s477 committed Dec 14, 2021
1 parent 730e4f6 commit f4bcba2
Showing 1 changed file with 127 additions and 3 deletions.
130 changes: 127 additions & 3 deletions Creation/02_SoftwareKG_Inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@
" \"dct\" : \"http://purl.org/dc/terms/\",\n",
" \"dbpedia-owl\" : \"http://dbpedia.org/ontology/\",\n",
" \"skgv\" : \"http://data.gesis.org/softwarekg/vocab/\",\n",
" \"skg\" : \"http://data.gesis.org/softwarekg/PMC/\"\n",
" \"skg\" : \"http://data.gesis.org/softwarekg/PMC/\",\n",
" \"irao\" : \"http://ontology.ethereal.cz/irao/\"\n",
"}"
]
},
Expand Down Expand Up @@ -419,7 +420,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Host software of plugins"
"# PlugIns of host software"
]
},
{
Expand Down Expand Up @@ -516,7 +517,7 @@
"source": [
"# Add statements with confidence value\n",
"for idx, r in enumerate(plugin_table):\n",
" node = URIRef(\"skg:inference/plugInOf/{}\".format(idx))\n",
" node = URIRef(\"skg:inference/hasPlugIn/{}\".format(idx))\n",
" g.add((node, RDF.type, RDF.Statement))\n",
" g.add((node, RDF.subject, URIRef(r[0])))\n",
" g.add((node, RDF.predicate ,URIRef(\"schema:softwareAddOn\")))\n",
Expand All @@ -537,6 +538,129 @@
"g = Graph()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Host software of plugIns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q1='''\n",
"PREFIX schema: <http://schema.org/>\n",
"PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#>\n",
"PREFIX skgv: <http://data.gesis.org/softwarekg/vocab/>\n",
"\n",
"SELECT ?document ?plugIn ?hostSoftware (COUNT(?hostSoftware) as ?num_mention_per_software_per_plugin_per_document)\n",
"WHERE\n",
"{\n",
" ?hostMention skgv:software ?hostSoftware .\n",
" ?hostMention skgv:referredToByPlugIn ?plugInMention .\n",
" ?plugInMention skgv:software ?plugIn .\n",
" ?document schema:mentions ?plugInMention .\n",
"}\n",
"GROUP BY ?document ?plugIn ?hostSoftware\n",
"OFFSET {offset}\n",
"LIMIT {limit}\n",
"'''\n",
"table1 = build_table(q1, 1_000_000, ['document','plugIn','hostSoftware',\n",
" 'num_mention_per_software_per_plugin_per_document'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q2=\"\"\"\n",
"PREFIX schema: <http://schema.org/>\n",
"PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#>\n",
"PREFIX skgv: <http://data.gesis.org/softwarekg/vocab/>\n",
"\n",
"SELECT ?document ?plugIn (COUNT(?plugIn) AS ?num_mention_per_plugin_per_document)\n",
"WHERE\n",
"{\n",
" ?plugInMention skgv:software ?plugIn .\n",
" ?anyHostMention skgv:referredToByPlugIn ?plugInMention .\n",
" ?document schema:mentions ?plugInMention .\n",
"}\n",
"GROUP BY ?document ?plugIn\n",
"OFFSET {offset}\n",
"LIMIT {limit}\n",
"\"\"\"\n",
"table2 = build_table(q2, 1_000_000, ['document','plugIn', 'num_mention_per_plugin_per_document'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"q3=\"\"\"\n",
"PREFIX schema: <http://schema.org/>\n",
"PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#>\n",
"PREFIX skgv: <http://data.gesis.org/softwarekg/vocab/>\n",
"\n",
"SELECT COUNT(DISTINCT ?document) as ?num_article_with_plugin ?plugIn \n",
"WHERE\n",
"{\n",
" ?plugInMention skgv:software ?plugIn .\n",
" ?someHostMention skgv:referredToByPlugIn ?plugInMention .\n",
" ?document schema:mentions ?plugInMention .\n",
"}\n",
"GROUP BY ?plugIn\n",
"OFFSET {offset}\n",
"LIMIT {limit}\n",
"\"\"\"\n",
"table3 = build_table(q3, 1_000_000, ['plugIn','num_article_with_plugin'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"host_table = workflow(table1,table2,table3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add statements with confidence value\n",
"g = Graph()\n",
"for idx, r in enumerate(host_table):\n",
" node = URIRef(\"skg:inference/hasHostSoftware/{}\".format(idx))\n",
" g.add((node, RDF.type, RDF.Statement))\n",
" g.add((node, RDF.subject, URIRef(r[0])))\n",
" g.add((node, RDF.predicate ,URIRef(\"irao:isPartOfSystem\")))\n",
" g.add((node, RDF.object, URIRef(r[1])))\n",
" g.add((node, URIRef(\"skgv:confidence\"), Literal(r[2], datatype=XSD.float)))\n",
" if float(r[2]) < 1:\n",
" print(r)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Serialize\n",
"g.serialize(format=\"json-ld\", context=context, destination=\"softwarekg-hostsoftware-confidence.jsonld\")\n",
"g = Graph()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down

0 comments on commit f4bcba2

Please sign in to comment.