Skip to content

Commit

Permalink
Add check against duplicated IDs in MCC ETL
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber committed Jan 30, 2025
1 parent 475c925 commit 036af5f
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions mcc/src/org/labkey/mcc/etl/PopulateIdsStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,34 @@ public RecordedActionSet run(@NotNull PipelineJob job) throws PipelineJobExcepti
populateForDemographics(job);
populateForKinship(job);

// Check for duplicate IDs and proactively error:
performDuplicateCheck(job);

return new RecordedActionSet();
}

private void performDuplicateCheck(PipelineJob job) throws PipelineJobException
{
// Query aggregated demographics:
UserSchema sourceSchema = QueryService.get().getUserSchema(_containerUser.getUser(), _containerUser.getContainer(), MccSchema.NAME);
if (sourceSchema == null)
{
throw new PipelineJobException("Unable to find source schema: " + MccSchema.NAME);
}

TableInfo sourceTi = sourceSchema.getTable("duplicatedAggregatedDemographics");
if (sourceTi == null)
{
throw new PipelineJobException("Unable to find table: duplicatedAggregatedDemographics");
}

TableSelector ts = new TableSelector(sourceTi);
if (ts.exists())
{
throw new PipelineJobException("There were duplicated IDs in aggregatedDemographics");
}
}

private void populateForDemographics(PipelineJob job) throws PipelineJobException
{
// Query aggregated demographics:
Expand Down

0 comments on commit 036af5f

Please sign in to comment.