Skip to content

Commit

Permalink
Optimize the JSON parsing in NpmPackageIndexBuilder.seeFile
Browse files Browse the repository at this point in the history
  • Loading branch information
qligier committed Feb 6, 2025
1 parent e67fbb9 commit ec8d461
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashSet;
import java.util.Set;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonToken;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
import org.hl7.fhir.utilities.FileUtilities;
import org.hl7.fhir.utilities.Utilities;
import org.hl7.fhir.utilities.filesystem.ManagedFileAccess;
Expand Down Expand Up @@ -77,57 +76,68 @@ public void start(String filename) {

public boolean seeFile(String name, byte[] content) {
if (name.endsWith(".json")) {
try {
JsonObject json = JsonParser.parseObject(content);
if (json.has("resourceType")) {
// ok we treat it as a resource
JsonObject fi = new JsonObject();
files.add(fi);
fi.add("filename", name);
fi.add("resourceType", json.asString("resourceType"));
if (json.hasPrimitive("id")) {
fi.add("id", json.asString("id"));
}
if (json.hasPrimitive("url")) {
fi.add("url", json.asString("url"));
}
if (json.hasPrimitive("version")) {
fi.add("version", json.asString("version"));
}
if (json.hasPrimitive("kind")) {
fi.add("kind", json.asString("kind"));
}
if (json.hasPrimitive("type")) {
fi.add("type", json.asString("type"));
}
if (json.hasPrimitive("supplements")) {
fi.add("supplements", json.asString("supplements"));
}
if (json.hasPrimitive("content")) {
fi.add("content", json.asString("content"));
/* We are only interested in some String fields on the first level of the JSON file.
* We can then use a streaming parser to get the values of these fields instead of parsing the whole file and
* allocating memory for everything in it.
* The key 'resourceType' should happen before all other keys, but in R4B, there are resources where it's not
* the case.
*/
try (final var parser = new JsonFactory().createParser(content)) {
final var fi = new JsonObject();
int level = 0;

while (parser.nextToken() != null) {
if (parser.currentToken() == JsonToken.START_OBJECT || parser.currentToken() == JsonToken.START_ARRAY) {
level++;
} else if (parser.currentToken() == JsonToken.END_OBJECT || parser.currentToken() == JsonToken.END_ARRAY) {
level--;
}
if (json.hasPrimitive("valueSet")) {
fi.add("valueSet", json.asString("valueSet"));
if (level != 1) {
continue;
}
if (json.hasPrimitive("derivation")) {
fi.add("derivation", json.asString("derivation"));

String fieldName = parser.currentName();

if ("resourceType".equals(fieldName)) {
parser.nextToken();
files.add(fi);
fi.add("filename", name);
fi.add(fieldName, parser.getText());
}

if (psql != null) {
psql.setString(1, name); // FileName);
psql.setString(2, json.asString("resourceType")); // ResourceType");
psql.setString(3, json.asString("id")); // Id");
psql.setString(4, json.asString("url")); // Url");
psql.setString(5, json.asString("version")); // Version");
psql.setString(6, json.asString("kind")); // Kind");
psql.setString(7, json.asString("type")); // Type");
psql.setString(8, json.asString("supplements")); // Supplements");
psql.setString(9, json.asString("content")); // Content");
psql.setString(10, json.asString("valueSet")); // ValueSet");
psql.setString(10, json.asString("derivation")); // ValueSet");
psql.execute();

if ("id".equals(fieldName)
|| "url".equals(fieldName)
|| "version".equals(fieldName)
|| "kind".equals(fieldName)
|| "type".equals(fieldName)
|| "supplements".equals(fieldName)
|| "content".equals(fieldName)
|| "valueSet".equals(fieldName)
|| "derivation".equals(fieldName)) {
parser.nextToken();
fi.add(fieldName, parser.getText());
}
}

if (!fi.has("resourceType")) {
// We haven't seen the 'resourceType' key, it's not a FHIR resource
return true;
}

if (psql != null) {
psql.setString(1, name); // FileName);
psql.setString(2, fi.asString("resourceType")); // ResourceType");
psql.setString(3, fi.asString("id")); // Id");
psql.setString(4, fi.asString("url")); // Url");
psql.setString(5, fi.asString("version")); // Version");
psql.setString(6, fi.asString("kind")); // Kind");
psql.setString(7, fi.asString("type")); // Type");
psql.setString(8, fi.asString("supplements")); // Supplements");
psql.setString(9, fi.asString("content")); // Content");
psql.setString(10, fi.asString("valueSet")); // ValueSet");
psql.setString(10, fi.asString("derivation")); // Derivation");
psql.execute();
}
} catch (Exception e) {
// System.out.println("Error parsing "+name+": "+e.getMessage());
if (name.contains("openapi")) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.hl7.fhir.utilities.npm;

import org.hl7.fhir.utilities.json.model.JsonObject;
import org.hl7.fhir.utilities.json.parser.JsonParser;
import org.junit.jupiter.api.Test;

import java.nio.charset.StandardCharsets;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;

class NpmPackageIndexBuilderTest {

@Test
void testBuildWithoutDb() throws Exception {
final var builder = new NpmPackageIndexBuilder();
builder.start(null);

final var cs = "{\"resourceType\":\"CapabilityStatement\",\"id\":\"base2\",\"meta\":{\"lastUpdated\":\"2019-11-01T09:29:23.356+11:00\"},\"url\":\"http://hl7.org/fhir/CapabilityStatement/base2\",\"version\":\"4.0.1\",\"name\":\"Base FHIR Capability Statement (Empty)\",\"status\":\"draft\",\"experimental\":true,\"date\":\"2019-11-01T09:29:23+11:00\",\"publisher\":\"FHIR Project Team\",\"contact\":[{\"telecom\":[{\"system\":\"url\",\"value\":\"http://hl7.org/fhir\"}]}],\"description\":\"This is the base Capability Statement for FHIR. It represents a server that provides the none of the functionality defined by FHIR. It is provided to use as a template for system designers to build their own Capability Statements from. A capability statement has to contain something, so this contains a read of a Capability Statement\",\"kind\":\"capability\",\"software\":{\"name\":\"Insert your software name here...\"},\"fhirVersion\":\"4.0.1\",\"format\":[\"xml\",\"json\"],\"rest\":[{\"mode\":\"server\",\"documentation\":\"An empty Capability Statement\",\"security\":{\"cors\":true,\"service\":[{\"coding\":[{\"system\":\"http://terminology.hl7.org/CodeSystem/restful-security-service\",\"code\":\"SMART-on-FHIR\",\"display\":\"SMART-on-FHIR\"}],\"text\":\"See http://docs.smarthealthit.org/\"}],\"description\":\"This is the Capability Statement to declare that the server supports SMART-on-FHIR. See the SMART-on-FHIR docs for the extension that would go with such a server\"},\"resource\":[{\"type\":\"CapabilityStatement\",\"interaction\":[{\"code\":\"read\",\"documentation\":\"Read CapabilityStatement Resource\"}]}]}]}";
builder.seeFile("CapabilityStatement-base2.json", cs.getBytes(StandardCharsets.UTF_8));

final var codeSystem = "{\"resourceType\":\"CodeSystem\",\"id\":\"action-type\"," +
"\"meta\":{\"lastUpdated\":\"2019-11-01T09:29:23.356+11:00\"},\"extension\":[{\"url\":\"http://hl7.org/fhir/StructureDefinition/structuredefinition-wg\",\"valueCode\":\"cds\"},{\"url\":\"http://hl7.org/fhir/StructureDefinition/structuredefinition-standards-status\",\"valueCode\":\"trial-use\"},{\"url\":\"http://hl7.org/fhir/StructureDefinition/structuredefinition-fmm\",\"valueInteger\":2}],\"url\":\"http://terminology.hl7.org/CodeSystem/action-type\",\"identifier\":[{\"system\":\"urn:ietf:rfc:3986\",\"value\":\"urn:oid:2.16.840.1.113883.4.642.4.1246\"}],\"version\":\"4.0.1\",\"name\":\"ActionType\",\"title\":\"ActionType\",\"status\":\"draft\",\"experimental\":false,\"date\":\"2019-11-01T09:29:23+11:00\",\"publisher\":\"HL7 (FHIR Project)\",\"contact\":[{\"telecom\":[{\"system\":\"url\",\"value\":\"http://hl7.org/fhir\"},{\"system\":\"email\",\"value\":\"fhir@lists.hl7.org\"}]}],\"description\":\"The type of action to be performed.\",\"caseSensitive\":true,\"valueSet\":\"http://hl7.org/fhir/ValueSet/action-type\",\"content\":\"complete\",\"concept\":[{\"code\":\"create\",\"display\":\"Create\",\"definition\":\"The action is to create a new resource.\"},{\"code\":\"update\",\"display\":\"Update\",\"definition\":\"The action is to update an existing resource.\"},{\"code\":\"remove\",\"display\":\"Remove\",\"definition\":\"The action is to remove an existing resource.\"},{\"code\":\"fire-event\",\"display\":\"Fire Event\",\"definition\":\"The action is to fire a specific event.\"}]}";
builder.seeFile("CodeSystem-action-type.json", codeSystem.getBytes(StandardCharsets.UTF_8));

builder.seeFile("observation-spreadsheet.xml", getClass().getResourceAsStream("/observation-spreadsheet.xml").readAllBytes());
builder.seeFile("settings-example.json",
getClass().getResourceAsStream("/settings/settings-example.json").readAllBytes());
builder.seeFile("tgz-normal.tgz",
getClass().getResourceAsStream("/npm/tar/tgz-normal.tgz").readAllBytes());


final var result = builder.build();
final var json = JsonParser.parseObject(result);
assertEquals(2, json.getJsonNumber("index-version").getInteger());

final var files = json.getJsonArray("files");
assertEquals(2, files.size());

var file = (JsonObject) files.get(0);
assertEquals("CapabilityStatement-base2.json", file.getJsonString("filename").getValue());
assertEquals("CapabilityStatement", file.getJsonString("resourceType").getValue());
assertEquals("base2", file.getJsonString("id").getValue());
assertEquals("http://hl7.org/fhir/CapabilityStatement/base2", file.getJsonString("url").getValue());
assertEquals("4.0.1", file.getJsonString("version").getValue());
assertEquals("capability", file.getJsonString("kind").getValue());
assertFalse(file.has("supplements"));
assertFalse(file.has("content"));
assertFalse(file.has("valueSet"));
assertFalse(file.has("derivation"));

file = (JsonObject) files.get(1);
assertEquals("CodeSystem-action-type.json", file.getJsonString("filename").getValue());
assertEquals("CodeSystem", file.getJsonString("resourceType").getValue());
assertEquals("action-type", file.getJsonString("id").getValue());
assertEquals("http://terminology.hl7.org/CodeSystem/action-type", file.getJsonString("url").getValue());
assertEquals("4.0.1", file.getJsonString("version").getValue());
assertEquals("http://hl7.org/fhir/ValueSet/action-type", file.getJsonString("valueSet").getValue());
assertEquals("complete", file.getJsonString("content").getValue());
assertFalse(file.has("supplements"));
assertFalse(file.has("kind"));
assertFalse(file.has("derivation"));

}
}

0 comments on commit ec8d461

Please sign in to comment.