Skip to content

Commit

Permalink
Bug 3052: Support the new identifiers.org format and resources.
Browse files Browse the repository at this point in the history
  • Loading branch information
shoops committed May 5, 2022
1 parent 551c06a commit 6995c9e
Show file tree
Hide file tree
Showing 5 changed files with 1,777 additions and 112 deletions.
61 changes: 8 additions & 53 deletions copasi/MIRIAM/CConstants.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2019 - 2021 by Pedro Mendes, Rector and Visitors of the
// Copyright (C) 2019 - 2022 by Pedro Mendes, Rector and Visitors of the
// University of Virginia, University of Heidelberg, and University
// of Connecticut School of Medicine.
// All rights reserved.
Expand Down Expand Up @@ -48,7 +48,7 @@ void CMIRIAMResourceObject::unescapeId(std::string & id)
id.find_first_not_of("0123456789abcdefABCDEF", pos + 1) > pos + 2)
{
char ascii[2];
ascii[0] = (unsigned char) strtol(id.substr(pos + 1 , 2).c_str(), NULL, 16);
ascii[0] = (unsigned char) strtol(id.substr(pos + 1, 2).c_str(), NULL, 16);
ascii[1] = 0x0;
id.replace(pos, 3, CCopasiXMLInterface::utf8(ascii));
}
Expand Down Expand Up @@ -148,14 +148,15 @@ std::string CMIRIAMResourceObject::getURI() const

std::string CMIRIAMResourceObject::getIdentifiersOrgURL() const
{
std::string URL = mpResources->getMIRIAMResource(mResource).getIdentifiersOrgURL();

if (URL == "http://identifiers.org/unknown")
std::string URL = mpResources->getMIRIAMResource(mResource).createIdentifiersOrgURL(mId);

if (URL.find("http://identifiers.org/unknown") != std::string::npos)
{
return mId;
}

return URL + "/" + mId;
return URL;
}

bool CMIRIAMResourceObject::setNode(CRDFNode * pNode)
Expand Down Expand Up @@ -215,62 +216,16 @@ bool CMIRIAMResourceObject::isValid(const std::string & URI) const

void CMIRIAMResourceObject::extractId(const std::string & uri)
{
std::string URI;

if (uri.length() > 8 && uri.substr(0, 8) == "https://")
URI = "http://" + uri.substr(8);
else
URI = uri;

mId = "";
mId = uri;

// Check whether the resource is known.
if (mpResources == NULL ||
mResource == C_INVALID_INDEX)
{
mId = URI;
return;
}

int offset;
const std::string * pTmp = & mpResources->getMIRIAMResource(mResource).getMIRIAMURI();

if (URI.substr(0, pTmp->length()) == *pTmp &&
URI.length() > pTmp->length())
{
offset = (pTmp->at(pTmp->length() - 1) == '/') ? 0 : 1;
mId = URI.substr(pTmp->length() + offset);
}

if (mId == "")
{
std::string Tmp = mpResources->getMIRIAMResource(mResource).getIdentifiersOrgURL();

if (URI.substr(0, Tmp.length()) == Tmp &&
URI.length() > Tmp.length())
{
offset = (Tmp[Tmp.length() - 1] == '/') ? 0 : 1;
mId = URI.substr(Tmp.length() + offset);
}
}

if (mId == "")
{
// We need to check for deprecated URIs
const CCopasiParameterGroup * pDeprecated = &(mpResources->getMIRIAMResource(mResource)).getMIRIAMDeprecated();
CCopasiParameterGroup::index_iterator itDeprecated = pDeprecated->beginIndex();
CCopasiParameterGroup::index_iterator endDeprecated = pDeprecated->endIndex();

for (; itDeprecated != endDeprecated; ++itDeprecated)
if (URI.substr(0, (*itDeprecated)->getValue< std::string >().length()) == (*itDeprecated)->getValue< std::string >() &&
URI.length() > (*itDeprecated)->getValue< std::string >().length())
{
const std::string & uri = (*itDeprecated)->getValue< std::string >();
offset = (uri[uri.length() - 1] == '/') ? 0 : 1;
mId = URI.substr(uri.length() + offset);
break;
}
}
mId = mpResources->getMIRIAMResource(mResource).extractId(uri);

unescapeId(mId);

Expand Down
179 changes: 146 additions & 33 deletions copasi/MIRIAM/CMIRIAMResource.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2019 - 2021 by Pedro Mendes, Rector and Visitors of the
// Copyright (C) 2019 - 2022 by Pedro Mendes, Rector and Visitors of the
// University of Virginia, University of Heidelberg, and University
// of Connecticut School of Medicine.
// All rights reserved.
Expand Down Expand Up @@ -225,6 +225,7 @@ bool CMIRIAMResources::updateMIRIAMResourcesFromFile(CProcessReport * pProcessRe
URI == "urn:miriam:doi" ||
URI == "urn:miriam:pubmed" ||
URI == "urn:miriam:isbn");
pMIRIAMResource->setMIRIAMNamespaceEmbeddedInPattern(dataType.getAttrValue("namespaceEmbeddedInPattern") == "True");

pTmpCpyCMIRIAMResources->addParameter(pMIRIAMResource);

Expand Down Expand Up @@ -326,8 +327,9 @@ void CMIRIAMResources::createURIMap()
for (; it != end; ++it)
{
pResource = static_cast< CMIRIAMResource * >(*it);
mURI2Resource[pResource->getMIRIAMURI() + ":"] = Index;
mURI2Resource[pResource->getIdentifiersOrgURL() + "/"] = Index;
mURI2Resource[pResource->getMIRIAMURI() + ":" ] = Index;
mURI2Resource[pResource->getIdentifiersOrgURL(true) + ":" ] = Index;
mURI2Resource[pResource->getIdentifiersOrgURL(false) + "/" ] = Index;

const CCopasiParameterGroup * pDeprecated = &pResource->getMIRIAMDeprecated();
CCopasiParameterGroup::index_iterator itDeprecated = pDeprecated->beginIndex();
Expand Down Expand Up @@ -369,20 +371,19 @@ size_t CMIRIAMResources::getMIRIAMResourceIndex(const std::string & uri) const
else
URI = uri;

std::map< std::string, size_t >::const_iterator it = mURI2Resource.lower_bound(URI);
std::map< std::string, size_t >::const_iterator end = mURI2Resource.upper_bound(URI);
std::pair< std::map< std::string, size_t >::const_iterator, std::map< std::string, size_t >::const_iterator > range = mURI2Resource.equal_range(URI);

if (it == mURI2Resource.begin())
if (range.first == mURI2Resource.begin())
return index;

it--;
range.first--;

for (; it != end; ++it)
for (; range.first != range.second; ++range.first)
{
// Check whether the URI base of the candidate matches.
if (URI.compare(0, it->first.length(), it->first) == 0)
if (URI.compare(0, range.first->first.length(), range.first->first) == 0)
{
index = it->second;
index = range.first->second;
break;
}
}
Expand Down Expand Up @@ -431,38 +432,48 @@ CMIRIAMResources::~CMIRIAMResources()

/////////////////////////////////////////////////////////////////////////////////////
CMIRIAMResource::CMIRIAMResource(const std::string & name,
const CDataContainer * pParent) :
CCopasiParameterGroup(name, pParent),
mpDisplayName(NULL),
mpURI(NULL),
mpCitation(NULL),
mpDeprecated(NULL)
{initializeParameter();}
const CDataContainer * pParent)
: CCopasiParameterGroup(name, pParent)
, mpDisplayName(NULL)
, mpURI(NULL)
, mpCitation(NULL)
, mpDeprecated(NULL)
, mpNamespaceEmbeddedInPattern(NULL)
{
initializeParameter();
}

CMIRIAMResource::CMIRIAMResource(const CMIRIAMResource & src,
const CDataContainer * pParent):
CCopasiParameterGroup(src, pParent),
mpDisplayName(NULL),
mpURI(NULL),
mpCitation(NULL),
mpDeprecated(NULL)
{initializeParameter();}
const CDataContainer * pParent)
: CCopasiParameterGroup(src, pParent)
, mpDisplayName(NULL)
, mpURI(NULL)
, mpCitation(NULL)
, mpDeprecated(NULL)
, mpNamespaceEmbeddedInPattern(NULL)
{
initializeParameter();
}

CMIRIAMResource::CMIRIAMResource(const CCopasiParameterGroup & group,
const CDataContainer * pParent):
CCopasiParameterGroup(group, pParent),
mpDisplayName(NULL),
mpURI(NULL),
mpCitation(NULL),
mpDeprecated(NULL)
{initializeParameter();}
const CDataContainer * pParent)
: CCopasiParameterGroup(group, pParent)
, mpDisplayName(NULL)
, mpURI(NULL)
, mpCitation(NULL)
, mpDeprecated(NULL)
, mpNamespaceEmbeddedInPattern(NULL)
{
initializeParameter();
}

void CMIRIAMResource::initializeParameter()
{
mpDisplayName = assertParameter("DisplayName", CCopasiParameter::Type::STRING, (std::string) "");
mpURI = assertParameter("URI", CCopasiParameter::Type::STRING, (std::string) "");
mpPattern = assertParameter("Pattern", CCopasiParameter::Type::STRING, (std::string) "");
mpCitation = assertParameter("Citation", CCopasiParameter::Type::BOOL, false);
mpNamespaceEmbeddedInPattern = assertParameter("NamespaceEmbeddedInPattern", CCopasiParameter::Type::BOOL, false);
mpDeprecated = assertGroup("Deprecated");
}

Expand Down Expand Up @@ -514,9 +525,24 @@ const std::string & CMIRIAMResource::getMIRIAMURI() const
return *mpURI;
}

std::string CMIRIAMResource::getIdentifiersOrgURL() const
std::string CMIRIAMResource::getIdentifiersOrgURL(const bool & compact) const
{
if (!compact
|| !*mpNamespaceEmbeddedInPattern)
return "http://identifiers.org/" + mpURI->substr(11);

return "http://identifiers.org/" + extractNamespaceFromPattern();
}

std::string CMIRIAMResource::extractNamespaceFromPattern() const
{
return "http://identifiers.org/" + mpURI->substr(11);
if (*mpNamespaceEmbeddedInPattern)
{
std::string::size_type start = mpPattern->find('^') + 1;
return mpPattern->substr(start, mpPattern->find(':', start) - start);
}

return "";
}

void CMIRIAMResource::setMIRIAMPattern(const std::string & pattern)
Expand All @@ -539,6 +565,16 @@ const bool & CMIRIAMResource::getMIRIAMCitation() const
return *mpCitation;
}

void CMIRIAMResource::setMIRIAMNamespaceEmbeddedInPattern(const bool & namespaceEmbeddedInPattern)
{
*mpNamespaceEmbeddedInPattern = namespaceEmbeddedInPattern;
}

const bool & CMIRIAMResource::getMIRIAMNamespaceEmbeddedInPattern() const
{
return *mpNamespaceEmbeddedInPattern;
}

void CMIRIAMResource::addDeprecatedURL(const std::string & URL)
{
mpDeprecated->addParameter("URL", CCopasiParameter::Type::STRING, URL);
Expand All @@ -548,3 +584,80 @@ const CCopasiParameterGroup & CMIRIAMResource::getMIRIAMDeprecated() const
{
return *mpDeprecated;
}

std::string CMIRIAMResource::extractId(const std::string & uri) const
{
std::string Id;
std::string URI;

if (uri.length() > 8 && uri.substr(0, 8) == "https://")
URI = "http://" + uri.substr(8);
else
URI = uri;

int offset;
const std::string & Tmp = getMIRIAMURI();

if (URI.substr(0, Tmp.length()) == Tmp &&
URI.length() > Tmp.length())
{
offset = (Tmp.at(Tmp.length() - 1) == '/') ? 0 : 1;
Id = URI.substr(Tmp.length() + offset);
}

if (Id == ""
&& *mpNamespaceEmbeddedInPattern)
{
std::string Tmp = getIdentifiersOrgURL(true);

if (URI.substr(0, Tmp.length()) == Tmp &&
URI.length() > Tmp.length())
{
offset = (Tmp[Tmp.length() - 1] == '/') ? 0 : 1;
Id = extractNamespaceFromPattern() + ":" + URI.substr(Tmp.length() + offset);
}
}

if (Id == "")
{
std::string Tmp = getIdentifiersOrgURL(false);

if (URI.substr(0, Tmp.length()) == Tmp &&
URI.length() > Tmp.length())
{
offset = (Tmp[Tmp.length() - 1] == '/') ? 0 : 1;
Id = URI.substr(Tmp.length() + offset);
}
}

if (Id == "")
{
// We need to check for deprecated URIs
const CCopasiParameterGroup * pDeprecated = &getMIRIAMDeprecated();
CCopasiParameterGroup::index_iterator itDeprecated = pDeprecated->beginIndex();
CCopasiParameterGroup::index_iterator endDeprecated = pDeprecated->endIndex();

for (; itDeprecated != endDeprecated; ++itDeprecated)
{
const std::string & Tmp = (*itDeprecated)->getValue< std::string >();

if (URI.substr(0, Tmp.length()) == Tmp &&
URI.length() > Tmp.length())
{
offset = (Tmp[Tmp.length() - 1] == '/') ? 0 : 1;
Id = URI.substr(Tmp.length() + offset);
break;
}
}
}

return Id;
}

std::string CMIRIAMResource::createIdentifiersOrgURL(const std::string id) const
{
if (*mpNamespaceEmbeddedInPattern)
return "http://identifiers.org/" + id;

return getIdentifiersOrgURL(false) + "/" + id;
}
Loading

0 comments on commit 6995c9e

Please sign in to comment.