Skip to content

Commit

Permalink
ENCD-6099-audit-index-reads-no-read-structure (#4071)
Browse files Browse the repository at this point in the history
  • Loading branch information
bonitalam authored Aug 27, 2021
1 parent b17ca4d commit aa964c9
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/encoded/audit/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,17 @@ def audit_file_index_of(value, system):
)
yield AuditFailure('inconsistent index file', detail, level='ERROR')


def audit_index_reads_read_structure(value, system):
if value['output_type'] == 'index reads':
if 'read_structure' not in value or \
'read_structure' in value and len(value['read_structure']) == 0:
detail = (
f'Index file {audit_link(path_to_text(value["@id"]), value["@id"])} '
f'is missing read structure information.'
)
yield AuditFailure('missing read structure', detail, level='INTERNAL_ACTION')

function_dispatcher = {
'audit_step_run': audit_file_processed_step_run,
'audit_derived_from': audit_file_processed_derived_from,
Expand All @@ -643,6 +654,7 @@ def audit_file_index_of(value, system):
'audit_read_structure': audit_read_structure,
'audit_file_matching_md5sum': audit_file_matching_md5sum,
'audit_file_index_of': audit_file_index_of,
'audit_index_reads_read_structure': audit_index_reads_read_structure,
}


Expand Down
1 change: 1 addition & 0 deletions src/encoded/schemas/changelogs/file.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Added *fragments* to the enum list for *output_type*, the new output type is to be submitted as tar.gz only
* Added *sparse peak count matrix* to the enum list for *output_type*
* Added *functional conservation quantifications* to the enum list for *output_type*
* Added *cell barcode* and *element barcode* to the enum list for *sequence_element* of the *read_structure* property

### Schema version 29
* Added Ultima Genomics platform to dependency that enforces that *read_length* property should not be specified for FASTQ files produced by Oxford Nanopore and PacBio platforms.
Expand Down
2 changes: 2 additions & 0 deletions src/encoded/schemas/file.json
Original file line number Diff line number Diff line change
Expand Up @@ -1996,6 +1996,8 @@
"enum": [
"adapter",
"barcode",
"cell barcode",
"element barcode",
"UMI"
]
},
Expand Down
10 changes: 10 additions & 0 deletions src/encoded/tests/data/inserts/file.json
Original file line number Diff line number Diff line change
Expand Up @@ -2808,6 +2808,16 @@
"sequence_element": "UMI",
"start": 13,
"end": 18
},
{
"sequence_element": "cell barcode",
"start": 19,
"end": 25
},
{
"sequence_element": "element barcode",
"start": 26,
"end": 33
}
]
},
Expand Down
22 changes: 22 additions & 0 deletions src/encoded/tests/test_audit_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,3 +1127,25 @@ def test_audit_incorrect_index(testapp,
assert any(error['category'] == 'inconsistent index file'
and 'multiple non-Illumina fastq' in error['detail']
for error in errors_list)


def test_audit_index_reads_read_structure(testapp, fastq_index):
res = testapp.get(fastq_index['@id'] + '@@index-data')
errors = res.json['audit']
errors_list = [error for v in errors.values() for error in v if error['category'] == 'missing read structure']
assert errors_list
testapp.patch_json(
fastq_index['@id'],
{
'read_structure': [{
'sequence_element': 'cell barcode',
'start': 1,
'end': 20
}]
}
)
res = testapp.get(fastq_index['@id'] + '@@index-data')
errors = [error for v in res.json['audit'].values() for error in v]
assert not any(
error['category'] == 'missing read structure' for error in errors
)

0 comments on commit aa964c9

Please sign in to comment.