From 67a73c496076f96ceb1bb90d4f60336213b150d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 31 Jan 2025 15:47:40 +0100 Subject: [PATCH 01/11] Add IndexDataTest --- .../services/sparql/index/IndexDataTest.scala | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala new file mode 100644 index 00000000..655655d2 --- /dev/null +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -0,0 +1,26 @@ +package se.lu.nateko.cp.meta.test.services.sparql.index + +import org.eclipse.rdf4j.model.{IRI, Value} +import org.scalatest.funspec.AnyFunSpec +import se.lu.nateko.cp.meta.instanceserver.{Rdf4jInstanceServer, TriplestoreConnection} +import se.lu.nateko.cp.meta.services.CpmetaVocab +import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData +import se.lu.nateko.cp.meta.utils.rdf4j.Loading + +class IndexDataTest extends AnyFunSpec { + describe("processTriple") { + it("clears fName of ObjEntry when hasName tuple is deleted") { + val repo = Loading.emptyInMemory + val server = new Rdf4jInstanceServer(repo) + val factory = repo.getValueFactory + val vocab = CpmetaVocab(factory) + val data = IndexData(100)() + + given TriplestoreConnection = server.getConnection() + + val insert = data.processTriple(_, _, _, true, vocab) + + insert(factory.createIRI("test:subject"), vocab.hasName, factory.createIRI("test:name")) + } + } +} From 8344ac8fa6a7ae5dadd85891d1258b5cb6abe19c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Fri, 31 Jan 2025 16:50:18 +0100 Subject: [PATCH 02/11] WIP --- .../services/sparql/index/IndexDataTest.scala | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index 655655d2..db079ff7 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -6,6 +6,7 @@ import se.lu.nateko.cp.meta.instanceserver.{Rdf4jInstanceServer, TriplestoreConn import se.lu.nateko.cp.meta.services.CpmetaVocab import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData import se.lu.nateko.cp.meta.utils.rdf4j.Loading +import se.lu.nateko.cp.meta.core.crypto.Sha256Sum class IndexDataTest extends AnyFunSpec { describe("processTriple") { @@ -14,13 +15,41 @@ class IndexDataTest extends AnyFunSpec { val server = new Rdf4jInstanceServer(repo) val factory = repo.getValueFactory val vocab = CpmetaVocab(factory) + + val hash: Sha256Sum = Sha256Sum.fromString("AAAAAAAAAAAAAAAAAAAAAAAA").get + info(hash.toString()) + + // server.access: + // val hash = TriplestoreConnection.getHashsum(subject, vocab.hasSha256sum) + // info("hash: "+hash.result.get.toString()) + + + val subject = factory.createIRI("test:subject") + server.add(factory.createStatement(subject, vocab.hasName, factory.createIRI("test:name"))) + assert(server.getStatements(Some(subject), None, None).length == 1) + val data = IndexData(100)() + server.access { + val insert = data.processTriple(_, _, _, true, vocab) + // Ignored? + insert(subject, vocab.hasName, factory.createIRI("test:name")) + + assert(data.objs.length == 0) + val entry = data.getObjEntry(hash) + assert(data.objs.length == 1) + val entry2 = data.getObjEntry(hash) + assert(data.objs.length == 1) + assert(entry == entry2) + + info(data.objs.toString()) - given TriplestoreConnection = server.getConnection() + // insert(subject, vocab.hasSha256sum, factory.createIRI(hash.toString())) + // info(hash.toString()) - val insert = data.processTriple(_, _, _, true, vocab) + // val hash = TriplestoreConnection.getHashsum(subject, vocab.hasSha256sum) + // info(entry.fName) + } - insert(factory.createIRI("test:subject"), vocab.hasName, factory.createIRI("test:name")) } } } From be9df2c9af01fe07b79790019cd937bc43c0e27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Mon, 3 Feb 2025 15:43:09 +0100 Subject: [PATCH 03/11] Add failing test --- .../sparql/magic/index/IndexData.scala | 21 ++++++---- .../sparql/magic/index/ObjEntry.scala | 4 +- .../services/sparql/index/IndexDataTest.scala | 41 ++++++------------- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index 57f61c5c..b1e4a9d8 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -293,13 +293,16 @@ class IndexData(nObjects: Int)( } def getObjEntry(hash: Sha256Sum): ObjEntry = { - idLookup.get(hash).fold { - val canonicalHash = hash.truncate - val oe = new ObjEntry(canonicalHash, objs.length, "") - objs += oe - idLookup += canonicalHash -> oe.idx - oe - }(objs.apply) + idLookup.get(hash) match { + case None => + val canonicalHash = hash.truncate + val oe = new ObjEntry(canonicalHash, objs.length, "") + objs += oe + idLookup += canonicalHash -> oe.idx + oe + case Some(obj) => + objs.apply(obj) + } } private def handleContinuousPropUpdate( @@ -346,7 +349,9 @@ class IndexData(nObjects: Int)( if (entry.prefix == "") entry.prefix = prefix.intern() Some(mod(entry)) - case _ => None + case _ => + log.warn(s"Not a DataObject: ${dobj.toString()}") + None private def addStat(obj: ObjEntry, initOk: MutableRoaringBitmap): Unit = for key <- keyForDobj(obj) do stats.getOrElseUpdate(key, emptyBitmap).add(obj.idx) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala index aef14e88..c3c84954 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/ObjEntry.scala @@ -7,7 +7,7 @@ import se.lu.nateko.cp.meta.services.sparql.magic.ObjInfo import java.time.Instant import scala.compiletime.uninitialized -class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends ObjInfo with Serializable { +final class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends ObjInfo with Serializable { var spec: IRI = uninitialized var submitter: IRI = uninitialized var station: IRI = uninitialized @@ -21,7 +21,7 @@ class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends Ob var submissionEnd: Long = Long.MinValue var isNextVersion: Boolean = false - private def dateTimeFromLong(dt: Long): Option[Instant] = + private final def dateTimeFromLong(dt: Long): Option[Instant] = if (dt == Long.MinValue) None else Some(Instant.ofEpochMilli(dt)) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index db079ff7..d09a9850 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -7,6 +7,7 @@ import se.lu.nateko.cp.meta.services.CpmetaVocab import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData import se.lu.nateko.cp.meta.utils.rdf4j.Loading import se.lu.nateko.cp.meta.core.crypto.Sha256Sum +import se.lu.nateko.cp.meta.services.CpVocab class IndexDataTest extends AnyFunSpec { describe("processTriple") { @@ -16,40 +17,24 @@ class IndexDataTest extends AnyFunSpec { val factory = repo.getValueFactory val vocab = CpmetaVocab(factory) - val hash: Sha256Sum = Sha256Sum.fromString("AAAAAAAAAAAAAAAAAAAAAAAA").get - info(hash.toString()) + val subject: IRI = factory.createIRI("https://meta.icos-cp.eu/objects/oAzNtfjXddcnG_irI8fJT7W6") - // server.access: - // val hash = TriplestoreConnection.getHashsum(subject, vocab.hasSha256sum) - // info("hash: "+hash.result.get.toString()) - - - val subject = factory.createIRI("test:subject") - server.add(factory.createStatement(subject, vocab.hasName, factory.createIRI("test:name"))) - assert(server.getStatements(Some(subject), None, None).length == 1) + // Make sure we insert a DataObject + val hash : Sha256Sum = + subject match { + case CpVocab.DataObject(hash, _prefix) => hash + } val data = IndexData(100)() server.access { - val insert = data.processTriple(_, _, _, true, vocab) - // Ignored? - insert(subject, vocab.hasName, factory.createIRI("test:name")) - - assert(data.objs.length == 0) - val entry = data.getObjEntry(hash) - assert(data.objs.length == 1) - val entry2 = data.getObjEntry(hash) + // Insert hasName triple + data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), true, vocab) assert(data.objs.length == 1) - assert(entry == entry2) - - info(data.objs.toString()) - - // insert(subject, vocab.hasSha256sum, factory.createIRI(hash.toString())) - // info(hash.toString()) - - // val hash = TriplestoreConnection.getHashsum(subject, vocab.hasSha256sum) - // info(entry.fName) + assert(data.getObjEntry(hash).fName == "test:name") + // Remove it + data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), false, vocab) + assert(data.getObjEntry(hash).fName == null) } - } } } From bee5cc77d5f4fd1dc2930ae67f975e60ea75affc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Mon, 3 Feb 2025 16:20:23 +0100 Subject: [PATCH 04/11] Restore getObjEntry --- .../services/sparql/magic/index/IndexData.scala | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index b1e4a9d8..bdf5128e 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -293,16 +293,13 @@ class IndexData(nObjects: Int)( } def getObjEntry(hash: Sha256Sum): ObjEntry = { - idLookup.get(hash) match { - case None => - val canonicalHash = hash.truncate - val oe = new ObjEntry(canonicalHash, objs.length, "") - objs += oe - idLookup += canonicalHash -> oe.idx - oe - case Some(obj) => - objs.apply(obj) - } + idLookup.get(hash).fold { + val canonicalHash = hash.truncate + val oe = new ObjEntry(canonicalHash, objs.length, "") + objs += oe + idLookup += canonicalHash -> oe.idx + oe + }(objs.apply) } private def handleContinuousPropUpdate( From 35166b0d2edd07187dcb092dc59f4683b5ca1b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Mon, 3 Feb 2025 16:30:51 +0100 Subject: [PATCH 05/11] Clear fName when hasName triple is removed --- .../cp/meta/services/sparql/magic/index/IndexData.scala | 8 +++----- .../meta/test/services/sparql/index/IndexDataTest.scala | 6 +++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index bdf5128e..04de5508 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -93,10 +93,10 @@ class IndexData(nObjects: Int)( } case `hasName` => - modForDobj(subj) { oe => + val _ = modForDobj(subj) { oe => val fName = obj.stringValue if (isAssertion) oe.fName = fName - else if (oe.fName == fName) oe.fileName == null + else if (oe.fName == fName) { oe.fName = null } handleContinuousPropUpdate(FileName, fName, oe.idx, isAssertion) } @@ -346,9 +346,7 @@ class IndexData(nObjects: Int)( if (entry.prefix == "") entry.prefix = prefix.intern() Some(mod(entry)) - case _ => - log.warn(s"Not a DataObject: ${dobj.toString()}") - None + case _ => None private def addStat(obj: ObjEntry, initOk: MutableRoaringBitmap): Unit = for key <- keyForDobj(obj) do stats.getOrElseUpdate(key, emptyBitmap).add(obj.idx) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index d09a9850..92a2a2e4 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -1,6 +1,6 @@ package se.lu.nateko.cp.meta.test.services.sparql.index -import org.eclipse.rdf4j.model.{IRI, Value} +import org.eclipse.rdf4j.model.IRI import org.scalatest.funspec.AnyFunSpec import se.lu.nateko.cp.meta.instanceserver.{Rdf4jInstanceServer, TriplestoreConnection} import se.lu.nateko.cp.meta.services.CpmetaVocab @@ -30,10 +30,10 @@ class IndexDataTest extends AnyFunSpec { // Insert hasName triple data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), true, vocab) assert(data.objs.length == 1) - assert(data.getObjEntry(hash).fName == "test:name") + assert(data.getObjEntry(hash).fileName == Some("test:name")) // Remove it data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), false, vocab) - assert(data.getObjEntry(hash).fName == null) + assert(data.getObjEntry(hash).fileName == None) } } } From 9db08dfb8140ffa11e1a7d8329249330ebb98c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Mon, 3 Feb 2025 16:37:37 +0100 Subject: [PATCH 06/11] Do not instantiate Rdf4jInstanceServer --- .../services/sparql/index/IndexDataTest.scala | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index 92a2a2e4..21fcee8f 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -1,40 +1,50 @@ package se.lu.nateko.cp.meta.test.services.sparql.index -import org.eclipse.rdf4j.model.IRI +import org.eclipse.rdf4j.model.{IRI, Statement, Value, ValueFactory} import org.scalatest.funspec.AnyFunSpec -import se.lu.nateko.cp.meta.instanceserver.{Rdf4jInstanceServer, TriplestoreConnection} -import se.lu.nateko.cp.meta.services.CpmetaVocab +import se.lu.nateko.cp.meta.api.CloseableIterator +import se.lu.nateko.cp.meta.core.crypto.Sha256Sum +import se.lu.nateko.cp.meta.instanceserver.TriplestoreConnection import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData +import se.lu.nateko.cp.meta.services.{CpVocab, CpmetaVocab} import se.lu.nateko.cp.meta.utils.rdf4j.Loading -import se.lu.nateko.cp.meta.core.crypto.Sha256Sum -import se.lu.nateko.cp.meta.services.CpVocab + +// IndexData requires a TriplestoreConnection but in current tests it is not actually used. +private class DummyTSC extends TriplestoreConnection { + override def close(): Unit = ??? + override def primaryContext: IRI = ??? + override def readContexts: Seq[IRI] = ??? + override def factory: ValueFactory = ??? + override def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] = ??? + override def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean = ??? + override def withContexts(primary: IRI, read: Seq[IRI]): TriplestoreConnection = ??? +} class IndexDataTest extends AnyFunSpec { describe("processTriple") { it("clears fName of ObjEntry when hasName tuple is deleted") { val repo = Loading.emptyInMemory - val server = new Rdf4jInstanceServer(repo) val factory = repo.getValueFactory val vocab = CpmetaVocab(factory) val subject: IRI = factory.createIRI("https://meta.icos-cp.eu/objects/oAzNtfjXddcnG_irI8fJT7W6") // Make sure we insert a DataObject - val hash : Sha256Sum = + val hash: Sha256Sum = subject match { case CpVocab.DataObject(hash, _prefix) => hash } val data = IndexData(100)() - server.access { - // Insert hasName triple - data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), true, vocab) - assert(data.objs.length == 1) - assert(data.getObjEntry(hash).fileName == Some("test:name")) - // Remove it - data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), false, vocab) - assert(data.getObjEntry(hash).fileName == None) - } + given TriplestoreConnection = DummyTSC() + + // Insert hasName triple + data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), true, vocab) + assert(data.objs.length == 1) + assert(data.getObjEntry(hash).fileName == Some("test:name")) + // Remove it + data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), false, vocab) + assert(data.getObjEntry(hash).fileName == None) } } } From fa1202ac7f0838aebbea4213db14a435f9903f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Mon, 3 Feb 2025 16:53:24 +0100 Subject: [PATCH 07/11] Add StatementSource trait to reduce input surface of IndexData.processTriple --- .../meta/instanceserver/InstanceServer.scala | 18 ++++++++++-------- .../meta/services/sparql/magic/CpIndex.scala | 7 ++----- .../sparql/magic/index/IndexData.scala | 16 ++++++++-------- .../services/sparql/index/IndexDataTest.scala | 16 ++++++---------- 4 files changed, 26 insertions(+), 31 deletions(-) diff --git a/src/main/scala/se/lu/nateko/cp/meta/instanceserver/InstanceServer.scala b/src/main/scala/se/lu/nateko/cp/meta/instanceserver/InstanceServer.scala index 90e571f4..faa66484 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/instanceserver/InstanceServer.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/instanceserver/InstanceServer.scala @@ -64,13 +64,19 @@ trait InstanceServer extends AutoCloseable: end InstanceServer -trait TriplestoreConnection extends AutoCloseable: +trait StatementSource { + def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] + def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean + + final def hasStatement(st: Statement): Boolean = st.getSubject() match + case subj: IRI => hasStatement(subj, st.getPredicate(), st.getObject()) + case _ => false +} + +trait TriplestoreConnection extends AutoCloseable, StatementSource: def primaryContext: IRI def readContexts: Seq[IRI] def factory: ValueFactory - - def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] - def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean def withContexts(primary: IRI, read: Seq[IRI]): TriplestoreConnection final def withReadContexts(read: Seq[IRI]): TriplestoreConnection = @@ -81,10 +87,6 @@ trait TriplestoreConnection extends AutoCloseable: if readContexts.length == 1 && readContexts.head == primaryContext then this else withContexts(primaryContext, Seq(primaryContext)) - final def hasStatement(st: Statement): Boolean = st.getSubject() match - case subj: IRI => hasStatement(subj, st.getPredicate(), st.getObject()) - case _ => false - object TriplestoreConnection: import Validated.CardinalityExpectation.{AtMostOne, ExactlyOne} diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala index 7cc6e9bb..61cda7db 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/CpIndex.scala @@ -1,6 +1,6 @@ package se.lu.nateko.cp.meta.services.sparql.magic -import org.eclipse.rdf4j.model.{IRI, Value, ValueFactory} +import org.eclipse.rdf4j.model.{IRI, ValueFactory} import org.eclipse.rdf4j.sail.Sail import org.roaringbitmap.buffer.{BufferFastAggregation, ImmutableRoaringBitmap, MutableRoaringBitmap} import org.slf4j.LoggerFactory @@ -232,13 +232,10 @@ class CpIndex(sail: Sail, geo: Future[GeoIndex], data: IndexData) extends ReadWr sail.accessEagerly: list.forEach: case RdfUpdate(Rdf4jStatement(subj, pred, obj), isAssertion) => - processUpdate(subj, pred, obj, isAssertion) + data.processTriple(subj, pred, obj, isAssertion, vocab) case _ => () list.clear() - private def processUpdate(subj: IRI, pred: IRI, obj: Value, isAssertion: Boolean)(using GlobConn): Unit = - data.processTriple(subj, pred, obj, isAssertion, vocab) - end CpIndex diff --git a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala index 04de5508..2e770ec7 100644 --- a/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala +++ b/src/main/scala/se/lu/nateko/cp/meta/services/sparql/magic/index/IndexData.scala @@ -7,7 +7,7 @@ import org.slf4j.LoggerFactory import se.lu.nateko.cp.meta.core.algo.DatetimeHierarchicalBitmap.DateTimeGeo import se.lu.nateko.cp.meta.core.algo.{DatetimeHierarchicalBitmap, HierarchicalBitmap} import se.lu.nateko.cp.meta.core.crypto.Sha256Sum -import se.lu.nateko.cp.meta.instanceserver.TriplestoreConnection as TSC +import se.lu.nateko.cp.meta.instanceserver.StatementSource import se.lu.nateko.cp.meta.services.linkeddata.UriSerializer.Hash import se.lu.nateko.cp.meta.services.sparql.index.* import se.lu.nateko.cp.meta.services.sparql.index.StringHierarchicalBitmap.StringGeo @@ -69,7 +69,7 @@ class IndexData(nObjects: Int)( .getOrElseUpdate(prop, new AnyRefMap[prop.ValueType, MutableRoaringBitmap]) .asInstanceOf[AnyRefMap[prop.ValueType, MutableRoaringBitmap]] - def processTriple(subj: IRI, pred: IRI, obj: Value, isAssertion: Boolean, vocab: CpmetaVocab)(using TSC): Unit = { + def processTriple(subj: IRI, pred: IRI, obj: Value, isAssertion: Boolean, vocab: CpmetaVocab)(using statements: StatementSource): Unit = { import vocab.* import vocab.prov.{wasAssociatedWith, startedAtTime, endedAtTime} import vocab.dcterms.hasPart @@ -204,7 +204,7 @@ class IndexData(nObjects: Int)( case _ => else if deprecated.contains(oe.idx) && // this was to prevent needless repo access - !TSC.hasStatement(null, isNextVersionOf, obj) + !statements.hasStatement(null, isNextVersionOf, obj) then deprecated.remove(oe.idx) } @@ -220,7 +220,7 @@ class IndexData(nObjects: Int)( val deprecated = boolBitmap(DeprecationFlag) val directPrevVers: IndexedSeq[Int] = - TSC.getStatements(subj, isNextVersionOf, null) + statements.getStatements(subj, isNextVersionOf, null) .flatMap(st => modForDobj(st.getObject)(_.idx)) .toIndexedSeq @@ -324,8 +324,8 @@ class IndexData(nObjects: Int)( if (isAssertion) hasVarsBm.add(idx) else hasVarsBm.remove(idx) } - private def nextVersCollIsComplete(obj: IRI, vocab: CpmetaVocab)(using TSC): Boolean = - TSC.getStatements(obj, vocab.dcterms.hasPart, null) + private def nextVersCollIsComplete(obj: IRI, vocab: CpmetaVocab)(using statements: StatementSource): Boolean = + statements.getStatements(obj, vocab.dcterms.hasPart, null) .collect: case Rdf4jStatement(_, _, member: IRI) => modForDobj(member): oe => oe.isNextVersion = true @@ -334,8 +334,8 @@ class IndexData(nObjects: Int)( .toIndexedSeq .exists(identity) - private def getIdxsOfPrevVersThroughColl(deprecator: IRI, vocab: CpmetaVocab)(using TSC): Option[Int] = - TSC.getStatements(null, vocab.dcterms.hasPart, deprecator) + private def getIdxsOfPrevVersThroughColl(deprecator: IRI, vocab: CpmetaVocab)(using statements: StatementSource): Option[Int] = + statements.getStatements(null, vocab.dcterms.hasPart, deprecator) .collect { case Rdf4jStatement(CpVocab.NextVersColl(oldHash), _, _) => getObjEntry(oldHash).idx } .toIndexedSeq .headOption diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index 21fcee8f..0e4e90db 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -1,23 +1,19 @@ package se.lu.nateko.cp.meta.test.services.sparql.index -import org.eclipse.rdf4j.model.{IRI, Statement, Value, ValueFactory} +import org.eclipse.rdf4j.model.{IRI, Statement, Value} import org.scalatest.funspec.AnyFunSpec import se.lu.nateko.cp.meta.api.CloseableIterator import se.lu.nateko.cp.meta.core.crypto.Sha256Sum -import se.lu.nateko.cp.meta.instanceserver.TriplestoreConnection +import se.lu.nateko.cp.meta.instanceserver.StatementSource import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData import se.lu.nateko.cp.meta.services.{CpVocab, CpmetaVocab} import se.lu.nateko.cp.meta.utils.rdf4j.Loading -// IndexData requires a TriplestoreConnection but in current tests it is not actually used. -private class DummyTSC extends TriplestoreConnection { - override def close(): Unit = ??? - override def primaryContext: IRI = ??? - override def readContexts: Seq[IRI] = ??? - override def factory: ValueFactory = ??? +// IndexData requires a StatementSource but in current tests it is not actually used, +// hence we can leave things unimplemented. +private class DummyStatements extends StatementSource { override def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] = ??? override def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean = ??? - override def withContexts(primary: IRI, read: Seq[IRI]): TriplestoreConnection = ??? } class IndexDataTest extends AnyFunSpec { @@ -36,7 +32,7 @@ class IndexDataTest extends AnyFunSpec { } val data = IndexData(100)() - given TriplestoreConnection = DummyTSC() + given StatementSource = DummyStatements() // Insert hasName triple data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), true, vocab) From 3392d9491a0c29bde24aa1a5cb6ba7fe493f1fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Mon, 3 Feb 2025 16:55:27 +0100 Subject: [PATCH 08/11] Improve test name --- .../cp/meta/test/services/sparql/index/IndexDataTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index 0e4e90db..5cfdc0fa 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -18,7 +18,7 @@ private class DummyStatements extends StatementSource { class IndexDataTest extends AnyFunSpec { describe("processTriple") { - it("clears fName of ObjEntry when hasName tuple is deleted") { + it("clears fileName of ObjEntry when hasName tuple is deleted") { val repo = Loading.emptyInMemory val factory = repo.getValueFactory val vocab = CpmetaVocab(factory) From 8488c866b362ca42ebee628083820effd8108f66 Mon Sep 17 00:00:00 2001 From: Oleg Mirzov Date: Wed, 5 Feb 2025 11:20:14 +0100 Subject: [PATCH 09/11] Polish the test code --- .../test/services/sparql/index/IndexDataTest.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index 5cfdc0fa..2c97397c 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -1,13 +1,13 @@ package se.lu.nateko.cp.meta.test.services.sparql.index import org.eclipse.rdf4j.model.{IRI, Statement, Value} +import org.eclipse.rdf4j.model.impl.SimpleValueFactory import org.scalatest.funspec.AnyFunSpec import se.lu.nateko.cp.meta.api.CloseableIterator import se.lu.nateko.cp.meta.core.crypto.Sha256Sum import se.lu.nateko.cp.meta.instanceserver.StatementSource import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData import se.lu.nateko.cp.meta.services.{CpVocab, CpmetaVocab} -import se.lu.nateko.cp.meta.utils.rdf4j.Loading // IndexData requires a StatementSource but in current tests it is not actually used, // hence we can leave things unimplemented. @@ -19,8 +19,7 @@ private class DummyStatements extends StatementSource { class IndexDataTest extends AnyFunSpec { describe("processTriple") { it("clears fileName of ObjEntry when hasName tuple is deleted") { - val repo = Loading.emptyInMemory - val factory = repo.getValueFactory + val factory = SimpleValueFactory.getInstance() val vocab = CpmetaVocab(factory) val subject: IRI = factory.createIRI("https://meta.icos-cp.eu/objects/oAzNtfjXddcnG_irI8fJT7W6") @@ -35,12 +34,12 @@ class IndexDataTest extends AnyFunSpec { given StatementSource = DummyStatements() // Insert hasName triple - data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), true, vocab) + data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), true, vocab) assert(data.objs.length == 1) - assert(data.getObjEntry(hash).fileName == Some("test:name")) + assert(data.getObjEntry(hash).fileName === Some("test name")) // Remove it - data.processTriple(subject, vocab.hasName, factory.createIRI("test:name"), false, vocab) - assert(data.getObjEntry(hash).fileName == None) + data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), false, vocab) + assert(data.getObjEntry(hash).fileName === None) } } } From c1392865a0dd6ac74b9ad9990dca05162f6fc08d Mon Sep 17 00:00:00 2001 From: Oleg Mirzov Date: Wed, 5 Feb 2025 11:49:03 +0100 Subject: [PATCH 10/11] Further test code polishing --- .../services/sparql/index/IndexDataTest.scala | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index 2c97397c..cb74193d 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -4,17 +4,10 @@ import org.eclipse.rdf4j.model.{IRI, Statement, Value} import org.eclipse.rdf4j.model.impl.SimpleValueFactory import org.scalatest.funspec.AnyFunSpec import se.lu.nateko.cp.meta.api.CloseableIterator -import se.lu.nateko.cp.meta.core.crypto.Sha256Sum import se.lu.nateko.cp.meta.instanceserver.StatementSource import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData import se.lu.nateko.cp.meta.services.{CpVocab, CpmetaVocab} -// IndexData requires a StatementSource but in current tests it is not actually used, -// hence we can leave things unimplemented. -private class DummyStatements extends StatementSource { - override def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] = ??? - override def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean = ??? -} class IndexDataTest extends AnyFunSpec { describe("processTriple") { @@ -25,13 +18,18 @@ class IndexDataTest extends AnyFunSpec { val subject: IRI = factory.createIRI("https://meta.icos-cp.eu/objects/oAzNtfjXddcnG_irI8fJT7W6") // Make sure we insert a DataObject - val hash: Sha256Sum = - subject match { - case CpVocab.DataObject(hash, _prefix) => hash - } + val CpVocab.DataObject(hash, _) = subject : @unchecked + // val hash = subject match + // case CpVocab.DataObject(hash, _) => hash val data = IndexData(100)() - given StatementSource = DummyStatements() + + // IndexData requires a StatementSource but in current tests it is not actually used, + // hence we can leave things unimplemented. + given StatementSource with + def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] = ??? + def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean = ??? + // Insert hasName triple data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), true, vocab) From 02358cd412e13661b23a59b46e8236e19bbb5d09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valter=20Sundstr=C3=B6m?= Date: Wed, 5 Feb 2025 12:38:06 +0100 Subject: [PATCH 11/11] Some more fixup --- .../test/services/sparql/index/IndexDataTest.scala | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala index cb74193d..574c5285 100644 --- a/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala +++ b/src/test/scala/se/lu/nateko/cp/meta/test/services/sparql/index/IndexDataTest.scala @@ -14,30 +14,25 @@ class IndexDataTest extends AnyFunSpec { it("clears fileName of ObjEntry when hasName tuple is deleted") { val factory = SimpleValueFactory.getInstance() val vocab = CpmetaVocab(factory) - val subject: IRI = factory.createIRI("https://meta.icos-cp.eu/objects/oAzNtfjXddcnG_irI8fJT7W6") - - // Make sure we insert a DataObject val CpVocab.DataObject(hash, _) = subject : @unchecked - // val hash = subject match - // case CpVocab.DataObject(hash, _) => hash - val data = IndexData(100)() - // IndexData requires a StatementSource but in current tests it is not actually used, + // IndexData requires a StatementSource but in this case we never pull any statements, // hence we can leave things unimplemented. given StatementSource with def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] = ??? def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean = ??? - // Insert hasName triple data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), true, vocab) assert(data.objs.length == 1) assert(data.getObjEntry(hash).fileName === Some("test name")) + // Remove it data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), false, vocab) assert(data.getObjEntry(hash).fileName === None) + assert(data.objs.length == 1) } } }