Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clear fileName of index objects when hasName triple is removed #288

Merged
merged 11 commits into from
Feb 5, 2025
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,19 @@ trait InstanceServer extends AutoCloseable:

end InstanceServer

trait TriplestoreConnection extends AutoCloseable:
trait StatementSource {
def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement]
def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean

final def hasStatement(st: Statement): Boolean = st.getSubject() match
case subj: IRI => hasStatement(subj, st.getPredicate(), st.getObject())
case _ => false
}

trait TriplestoreConnection extends AutoCloseable, StatementSource:
def primaryContext: IRI
def readContexts: Seq[IRI]
def factory: ValueFactory

def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement]
def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean
def withContexts(primary: IRI, read: Seq[IRI]): TriplestoreConnection

final def withReadContexts(read: Seq[IRI]): TriplestoreConnection =
Expand All @@ -81,10 +87,6 @@ trait TriplestoreConnection extends AutoCloseable:
if readContexts.length == 1 && readContexts.head == primaryContext then this
else withContexts(primaryContext, Seq(primaryContext))

final def hasStatement(st: Statement): Boolean = st.getSubject() match
case subj: IRI => hasStatement(subj, st.getPredicate(), st.getObject())
case _ => false


object TriplestoreConnection:
import Validated.CardinalityExpectation.{AtMostOne, ExactlyOne}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package se.lu.nateko.cp.meta.services.sparql.magic

import org.eclipse.rdf4j.model.{IRI, Value, ValueFactory}
import org.eclipse.rdf4j.model.{IRI, ValueFactory}
import org.eclipse.rdf4j.sail.Sail
import org.roaringbitmap.buffer.{BufferFastAggregation, ImmutableRoaringBitmap, MutableRoaringBitmap}
import org.slf4j.LoggerFactory
Expand Down Expand Up @@ -232,13 +232,10 @@ class CpIndex(sail: Sail, geo: Future[GeoIndex], data: IndexData) extends ReadWr
sail.accessEagerly:
list.forEach:
case RdfUpdate(Rdf4jStatement(subj, pred, obj), isAssertion) =>
processUpdate(subj, pred, obj, isAssertion)
data.processTriple(subj, pred, obj, isAssertion, vocab)
case _ => ()
list.clear()

private def processUpdate(subj: IRI, pred: IRI, obj: Value, isAssertion: Boolean)(using GlobConn): Unit =
data.processTriple(subj, pred, obj, isAssertion, vocab)

end CpIndex


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import org.slf4j.LoggerFactory
import se.lu.nateko.cp.meta.core.algo.DatetimeHierarchicalBitmap.DateTimeGeo
import se.lu.nateko.cp.meta.core.algo.{DatetimeHierarchicalBitmap, HierarchicalBitmap}
import se.lu.nateko.cp.meta.core.crypto.Sha256Sum
import se.lu.nateko.cp.meta.instanceserver.TriplestoreConnection as TSC
import se.lu.nateko.cp.meta.instanceserver.StatementSource
import se.lu.nateko.cp.meta.services.linkeddata.UriSerializer.Hash
import se.lu.nateko.cp.meta.services.sparql.index.*
import se.lu.nateko.cp.meta.services.sparql.index.StringHierarchicalBitmap.StringGeo
Expand Down Expand Up @@ -69,7 +69,7 @@ class IndexData(nObjects: Int)(
.getOrElseUpdate(prop, new AnyRefMap[prop.ValueType, MutableRoaringBitmap])
.asInstanceOf[AnyRefMap[prop.ValueType, MutableRoaringBitmap]]

def processTriple(subj: IRI, pred: IRI, obj: Value, isAssertion: Boolean, vocab: CpmetaVocab)(using TSC): Unit = {
def processTriple(subj: IRI, pred: IRI, obj: Value, isAssertion: Boolean, vocab: CpmetaVocab)(using statements: StatementSource): Unit = {
import vocab.*
import vocab.prov.{wasAssociatedWith, startedAtTime, endedAtTime}
import vocab.dcterms.hasPart
Expand All @@ -93,10 +93,10 @@ class IndexData(nObjects: Int)(
}

case `hasName` =>
modForDobj(subj) { oe =>
val _ = modForDobj(subj) { oe =>
val fName = obj.stringValue
if (isAssertion) oe.fName = fName
else if (oe.fName == fName) oe.fileName == null
else if (oe.fName == fName) { oe.fName = null }
handleContinuousPropUpdate(FileName, fName, oe.idx, isAssertion)
}

Expand Down Expand Up @@ -204,7 +204,7 @@ class IndexData(nObjects: Int)(
case _ =>
else if
deprecated.contains(oe.idx) && // this was to prevent needless repo access
!TSC.hasStatement(null, isNextVersionOf, obj)
!statements.hasStatement(null, isNextVersionOf, obj)
then deprecated.remove(oe.idx)
}

Expand All @@ -220,7 +220,7 @@ class IndexData(nObjects: Int)(
val deprecated = boolBitmap(DeprecationFlag)

val directPrevVers: IndexedSeq[Int] =
TSC.getStatements(subj, isNextVersionOf, null)
statements.getStatements(subj, isNextVersionOf, null)
.flatMap(st => modForDobj(st.getObject)(_.idx))
.toIndexedSeq

Expand Down Expand Up @@ -324,8 +324,8 @@ class IndexData(nObjects: Int)(
if (isAssertion) hasVarsBm.add(idx) else hasVarsBm.remove(idx)
}

private def nextVersCollIsComplete(obj: IRI, vocab: CpmetaVocab)(using TSC): Boolean =
TSC.getStatements(obj, vocab.dcterms.hasPart, null)
private def nextVersCollIsComplete(obj: IRI, vocab: CpmetaVocab)(using statements: StatementSource): Boolean =
statements.getStatements(obj, vocab.dcterms.hasPart, null)
.collect:
case Rdf4jStatement(_, _, member: IRI) => modForDobj(member): oe =>
oe.isNextVersion = true
Expand All @@ -334,8 +334,8 @@ class IndexData(nObjects: Int)(
.toIndexedSeq
.exists(identity)

private def getIdxsOfPrevVersThroughColl(deprecator: IRI, vocab: CpmetaVocab)(using TSC): Option[Int] =
TSC.getStatements(null, vocab.dcterms.hasPart, deprecator)
private def getIdxsOfPrevVersThroughColl(deprecator: IRI, vocab: CpmetaVocab)(using statements: StatementSource): Option[Int] =
statements.getStatements(null, vocab.dcterms.hasPart, deprecator)
.collect { case Rdf4jStatement(CpVocab.NextVersColl(oldHash), _, _) => getObjEntry(oldHash).idx }
.toIndexedSeq
.headOption
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import se.lu.nateko.cp.meta.services.sparql.magic.ObjInfo
import java.time.Instant
import scala.compiletime.uninitialized

class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends ObjInfo with Serializable {
final class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends ObjInfo with Serializable {
var spec: IRI = uninitialized
var submitter: IRI = uninitialized
var station: IRI = uninitialized
Expand All @@ -21,7 +21,7 @@ class ObjEntry(val hash: Sha256Sum, val idx: Int, var prefix: String) extends Ob
var submissionEnd: Long = Long.MinValue
var isNextVersion: Boolean = false

private def dateTimeFromLong(dt: Long): Option[Instant] =
private final def dateTimeFromLong(dt: Long): Option[Instant] =
if (dt == Long.MinValue) None
else Some(Instant.ofEpochMilli(dt))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package se.lu.nateko.cp.meta.test.services.sparql.index

import org.eclipse.rdf4j.model.{IRI, Statement, Value}
import org.eclipse.rdf4j.model.impl.SimpleValueFactory
import org.scalatest.funspec.AnyFunSpec
import se.lu.nateko.cp.meta.api.CloseableIterator
import se.lu.nateko.cp.meta.instanceserver.StatementSource
import se.lu.nateko.cp.meta.services.sparql.magic.index.IndexData
import se.lu.nateko.cp.meta.services.{CpVocab, CpmetaVocab}


class IndexDataTest extends AnyFunSpec {
emmaicos marked this conversation as resolved.
Show resolved Hide resolved
describe("processTriple") {
it("clears fileName of ObjEntry when hasName tuple is deleted") {
val factory = SimpleValueFactory.getInstance()
val vocab = CpmetaVocab(factory)
val subject: IRI = factory.createIRI("https://meta.icos-cp.eu/objects/oAzNtfjXddcnG_irI8fJT7W6")
emmaicos marked this conversation as resolved.
Show resolved Hide resolved
val CpVocab.DataObject(hash, _) = subject : @unchecked
val data = IndexData(100)()

// IndexData requires a StatementSource but in this case we never pull any statements,
// hence we can leave things unimplemented.
given StatementSource with
def getStatements(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): CloseableIterator[Statement] = ???
def hasStatement(subject: IRI | Null, predicate: IRI | Null, obj: Value | Null): Boolean = ???

// Insert hasName triple
data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), true, vocab)
assert(data.objs.length == 1)
assert(data.getObjEntry(hash).fileName === Some("test name"))

// Remove it
data.processTriple(subject, vocab.hasName, factory.createLiteral("test name"), false, vocab)
assert(data.getObjEntry(hash).fileName === None)
assert(data.objs.length == 1)
}
}
}