From 036af5facdc5b7ad3f0623975795473ce7cc0303 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 30 Jan 2025 06:43:48 -0800 Subject: [PATCH] Add check against duplicated IDs in MCC ETL --- .../org/labkey/mcc/etl/PopulateIdsStep.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java b/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java index 638af85df..81debad62 100644 --- a/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java +++ b/mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java @@ -43,9 +43,34 @@ public RecordedActionSet run(@NotNull PipelineJob job) throws PipelineJobExcepti populateForDemographics(job); populateForKinship(job); + // Check for duplicate IDs and proactively error: + performDuplicateCheck(job); + return new RecordedActionSet(); } + private void performDuplicateCheck(PipelineJob job) throws PipelineJobException + { + // Query aggregated demographics: + UserSchema sourceSchema = QueryService.get().getUserSchema(_containerUser.getUser(), _containerUser.getContainer(), MccSchema.NAME); + if (sourceSchema == null) + { + throw new PipelineJobException("Unable to find source schema: " + MccSchema.NAME); + } + + TableInfo sourceTi = sourceSchema.getTable("duplicatedAggregatedDemographics"); + if (sourceTi == null) + { + throw new PipelineJobException("Unable to find table: duplicatedAggregatedDemographics"); + } + + TableSelector ts = new TableSelector(sourceTi); + if (ts.exists()) + { + throw new PipelineJobException("There were duplicated IDs in aggregatedDemographics"); + } + } + private void populateForDemographics(PipelineJob job) throws PipelineJobException { // Query aggregated demographics: