diff --git a/README.md b/README.md index 23ce5c2..c7ddd92 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ # hAMRonization This repo contains the hAMRonization module and CLI parser tools combine the outputs of -18 disparate antimicrobial resistance gene detection tools into a single unified format. +17 disparate antimicrobial resistance gene detection tools into a single unified format. This is an implementation of the [hAMRonization AMR detection specification scheme](docs/hAMRonization_specification_details.csv) which supports gene presence/absence resistance and mutational resistance (if supported by the underlying tool). @@ -66,7 +66,7 @@ options: -v, --version show program's version number and exit Tools with hAMRonizable reports: - {abricate,amrfinderplus,amrplusplus,ariba,csstar,deeparg,fargene,groot,kmerresistance,resfams,resfinder,mykrobe,pointfinder,rgi,srax,srst2,staramr,tbprofiler,summarize} + {abricate,amrfinderplus,amrplusplus,ariba,csstar,deeparg,fargene,groot,kmerresistance,resfams,resfinder,mykrobe,rgi,srax,srst2,staramr,tbprofiler,summarize} abricate hAMRonize abricate's output report i.e., OUTPUT.tsv amrfinderplus hAMRonize amrfinderplus's output report i.e., OUTPUT.tsv amrplusplus hAMRonize amrplusplus's output report i.e., gene.tsv @@ -80,11 +80,8 @@ Tools with hAMRonizable reports: report`) kmerresistance hAMRonize kmerresistance's output report i.e., OUTPUT.res resfams hAMRonize resfams's output report i.e., resfams.tblout - resfinder hAMRonize resfinder's output report i.e., - ResFinder_results_tab.txt + resfinder hAMRonize resfinder's JSON output report (use -j to produce) mykrobe hAMRonize mykrobe's output report i.e., OUTPUT.json - pointfinder hAMRonize pointfinder's output report i.e., - PointFinder_results.txt rgi hAMRonize rgi's output report i.e., OUTPUT.txt or OUTPUT_bwtoutput.gene_mapping_data.txt srax hAMRonize srax's output report i.e., sraX_detected_ARGs.tsv @@ -193,7 +190,7 @@ If you want to write multiple reports to one file, this `.write` method can acce Currently implemented parsers and the last tool version for which they have been validated: 1. [abricate](hAMRonization/AbricateIO.py): last updated for v1.0.0 -2. [amrfinderplus](hAMRonization/AmrFinderPlusIO.py): last updated for v3.12.18 +2. [amrfinderplus](hAMRonization/AmrFinderPlusIO.py): last updated for v4.0.3 3. [amrplusplus](hAMRonization/AmrPlusPlusIO.py): last updated for c6b097a 4. [ariba](hAMRonization/AribaIO.py): last updated for v2.14.6 5. [csstar](hAMRonization/CSStarIO.py): last updated for v2.1.0 @@ -202,7 +199,7 @@ Currently implemented parsers and the last tool version for which they have been 8. [groot](hAMRonization/GrootIO.py): last updated for v1.1.2 9. [kmerresistance](hAMRonization/KmerResistanceIO.py): late updated for v2.2.0 10. [mykrobe](test/data/raw_outputs/mykrobe/report.json): last updated for v0.8.1 -11. [pointfinder](hAMRonization/PointFinderIO.py): last updated for v4.1.0 +11. ~pointfinder~ (removed, PointFinder is now integrated in ResFinder) 12. [resfams](hAMRonization/ResFamsIO.py): last updated for hmmer v3.3.2 13. [resfinder](hAMRonization/ResFinderIO.py): last updated for v4.6.0 14. [rgi](hAMRonization/RgiIO.py) (includes RGI-BWT) last updated for v5.2.0 @@ -263,13 +260,12 @@ First fork this repository and set up a development environment (replacing `YOUR ``` git clone https://github.com/YOURUSERNAME/hAMRonization -conda create -n hAMRonization +conda create -n hAMRonization python pip pytest flake8 conda activate hAMRonization cd hAMRonization -pip install pytest flake8 pip install -e . - ``` + ## Testing and Linting On every commit github actions automatically runs tests and linting to check diff --git a/hAMRonization/AmrFinderPlusIO.py b/hAMRonization/AmrFinderPlusIO.py index 9d1f2f0..f60da66 100644 --- a/hAMRonization/AmrFinderPlusIO.py +++ b/hAMRonization/AmrFinderPlusIO.py @@ -18,78 +18,56 @@ class AmrFinderPlusIterator(hAMRonizedResultIterator): + + nuc_field_map = { + "Protein id": None, + "Contig id": "input_sequence_id", + "Start": "input_gene_start", + "Stop": "input_gene_stop", + "Strand": "strand_orientation", + "Element symbol": "gene_symbol", + "Element name": "gene_name", + "Scope": None, + "Type": None, + "Subtype": None, + "Class": "drug_class", + "Subclass": "antimicrobial_agent", + "Method": None, + "Target length": "input_gene_length", + "Reference sequence length": "reference_gene_length", + "% Coverage of reference": "coverage_percentage", + "% Identity to reference": "sequence_identity", + "Alignment length": None, + "Closest reference accession": "reference_accession", + "Closest reference name": None, + "HMM accession": None, + "HMM description": None, + "Hierarchy node": None, + # Fields we compute below (not in TSV) + "amino_acid_mutation": "amino_acid_mutation", + "nucleotide_mutation": "nucleotide_mutation", + "genetic_variation_type": "genetic_variation_type", + } + + # AMP outputs the same column set for nuc and prot detections, + # with Start and Stop always in nt units; however target and + # reference length are reported in AA for proteins. + prot_field_map = nuc_field_map.copy() + prot_field_map.update({ + "Target length": "input_protein_length", + "Reference sequence length": "reference_protein_length" + }) + def __init__(self, source, metadata): metadata["analysis_software_name"] = "amrfinderplus" metadata["reference_database_name"] = "NCBI Reference Gene Database" self.metadata = metadata - # check source for whether AMFP has been run in protein or nt mode - - nucleotide_field_mapping = { - "Protein identifier": None, - "Contig id": "input_sequence_id", - "Start": "input_gene_start", - "Stop": "input_gene_stop", - "Strand": "strand_orientation", - "Gene symbol": "gene_symbol", - "Sequence name": "gene_name", - "Scope": None, - "Element type": None, - "Element subtype": None, - "Class": "drug_class", - "Subclass": "antimicrobial_agent", - "Method": None, - "Target length": "input_protein_length", - "Reference sequence length": "reference_protein_length", - "% Coverage of reference sequence": "coverage_percentage", - "% Identity to reference sequence": "sequence_identity", - "Alignment length": None, - "Accession of closest sequence": "reference_accession", - "Name of closest sequence": None, - "HMM id": None, - "HMM description": None, - "AA Mutation": "amino_acid_mutation", - "Nucleotide Mutation": "nucleotide_mutation", - "genetic_variation_type": "genetic_variation_type", - } - protein_field_mapping = { - "Protein identifier": "input_sequence_id", - "Gene symbol": "gene_symbol", - "Sequence name": "gene_name", - "Scope": None, - "Element": None, - "Element subtype": None, - "Class": "drug_class", - "Subclass": "antimicrobial_agent", - "Method": None, - "Target length": "input_protein_length", - "Reference sequence length": "reference_protein_length", - "% Coverage of reference sequence": "coverage_percentage", - "% Identity to reference sequence": "sequence_identity", - "Alignment length": None, - "Accession of closest sequence": "reference_accession", - "Name of closest sequence": None, - "HMM id": None, - "HMM description": None, - "AA Mutation": "amino_acid_mutation", - "genetic_variation_type": "genetic_variation_type", - } - - with open(source) as fh: - header = next(fh).strip().split("\t") - try: - first_result = next(fh) - prot_id = header.index("Protein identifier") - if first_result.strip().split("\t")[prot_id] == "NA": - self.field_mapping = nucleotide_field_mapping - else: - self.field_mapping = protein_field_mapping - except StopIteration: - # doesn't really matter which mapping as this error indicates - # this is an empty results file - self.field_mapping = nucleotide_field_mapping - - super().__init__(source, self.field_mapping, self.metadata) + # We pass None for the field_map as it differs depending on + # whether we return a nucleotide or protein variant detection. + # TODO: refactor field_map out of super's constructor, and make + # it a parameter on super's hARMonize(). + super().__init__(source, None, self.metadata) def parse(self, handle): """ @@ -98,11 +76,16 @@ def parse(self, handle): skipped_truncated = 0 reader = csv.DictReader(handle, delimiter="\t") for result in reader: - # replace NA value with None for consitency + + # Replace NA value with None for consistency for field, value in result.items(): if value == "NA": result[field] = None + # Skip reported virulence genes + if result['Type'] == "VIRULENCE": + continue + # AFP reports partial hits so to avoid misleadingly listing these # as present skip results with INTERNAL_STOP # recommended by developers @@ -113,24 +96,40 @@ def parse(self, handle): # "POINT" indicates mutational resistance # amrfinderplus has no special fields but the mutation itself is # appended to the symbol name so we want to split this - result["AA Mutation"] = None - result["Nucleotide Mutation"] = None - result["genetic_variation_type"] = GENE_PRESENCE + result['amino_acid_mutation'] = None + result['nucleotide_mutation'] = None + result['genetic_variation_type'] = GENE_PRESENCE - if result["Element subtype"] == "POINT": - gene_symbol, mutation = result["Gene symbol"].rsplit("_", 1) - result["Gene symbol"] = gene_symbol + if result['Subtype'] == "POINT": + gene_symbol, mutation = result['Element symbol'].rsplit("_", 1) + result['Element symbol'] = gene_symbol _, ref, pos, alt, _ = re.split(r"(\D+)(\d+)(\D+)", mutation) # this means it is a protein mutation - if result["Method"] in ["POINTX", "POINTP"]: - result["AA Mutation"] = f"p.{ref}{pos}{alt}" - result["genetic_variation_type"] = AMINO_ACID_VARIANT - elif result["Method"] == "POINTN": + if result['Method'] in ["POINTX", "POINTP"]: + result['amino_acid_mutation'] = f"p.{ref}{pos}{alt}" + result['genetic_variation_type'] = AMINO_ACID_VARIANT + elif result['Method'] == "POINTN": # e.g., 23S_G2032G ampC_C-11C -> c.2032G>G - result["Nucleotide Mutation"] = f"c.{pos}{ref}>{alt}" - result["genetic_variation_type"] = NUCLEOTIDE_VARIANT + result['nucleotide_mutation'] = f"c.{pos}{ref}>{alt}" + result['genetic_variation_type'] = NUCLEOTIDE_VARIANT + + # Determine the field_map to use depending on the method used + # The following seems to cover all bases with a minimum of fuss + have_prot = result['Protein id'] is not None + method = result['Method'] + if method.endswith('P') or method.endswith('X'): + field_map = self.prot_field_map + elif method.endswith('N'): + field_map = self.nuc_field_map + elif method in ['COMPLETE', 'HMM']: + field_map = self.prot_field_map if have_prot else self.nuc_field_map + else: + warnings.warn(f"Assuming unknown method {method} implies a protein detection" + f" in {self.metadata['input_file_name']}") + field_map = self.prot_field_map - yield self.hAMRonize(result, self.metadata) + # This uses the "override hack" that should perhaps be cleaned up + yield self.hAMRonize(result, self.metadata, field_map) if skipped_truncated > 0: warnings.warn(f"Skipping {skipped_truncated} records with INTERNAL_STOP " diff --git a/hAMRonization/Interfaces.py b/hAMRonization/Interfaces.py index d1ab584..f9b7d5d 100644 --- a/hAMRonization/Interfaces.py +++ b/hAMRonization/Interfaces.py @@ -50,16 +50,24 @@ def __init__(self, source, field_map, metadata): except Exception: self.stream.close() - def hAMRonize(self, report_data, metadata): + # TODO: the field_map_override is a half-hack to support the scenario + # (as for amrfinderplus) where different records need different mappings, + # so setting a field_map in the constructor makes no sense. + # It might be cleaner to remove it from the constructor altogether and + # make it a parameter of this method (which is the only place where it + # is referenced anyway), and subclasses can trivially pass it in. + def hAMRonize(self, report_data, metadata, field_map_override=None): """ Convert a line of parsed AMR report in original format to the hAMRonization specification - - report_result parsed dict of single results from report - - metadata dict of additional metadata fields that need added + - report_data parsed dict of single result from report + - metadata dict of additional metadata fields + - field_map_override optional override of field_map passed in c'tor """ hAMRonized_result_data = {**metadata} - for original_field, hAMRonized_field in self.field_map.items(): + field_map = field_map_override or self.field_map + for original_field, hAMRonized_field in field_map.items(): if hAMRonized_field: hAMRonized_result_data[hAMRonized_field] = report_data[original_field] diff --git a/hAMRonization/PointFinderIO.py b/hAMRonization/PointFinderIO.py deleted file mode 100644 index 7f3e91e..0000000 --- a/hAMRonization/PointFinderIO.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python - -import csv -from .Interfaces import hAMRonizedResultIterator -from hAMRonization.constants import NUCLEOTIDE_VARIANT, AMINO_ACID_VARIANT - -required_metadata = [ - "analysis_software_version", - "reference_database_version", - "input_file_name", -] - - -class PointFinderIterator(hAMRonizedResultIterator): - """ - Updated for ResFinder v4.1 using the `PointFinder_results.txt` output - file - """ - - # Mutation - # Nucleotide change - # Amino acid change - # Resistance - # PMID - - def __init__(self, source, metadata): - metadata["reference_database_name"] = "pointfinder" - metadata["analysis_software_name"] = "pointfinder" - # even though resfinderv4 runs pointfinder - # parsing mutational resistance requires parsing a different file - # to get gene presence absence - self.metadata = metadata - - self.field_mapping = { - "Mutation": "reference_accession", - "Nucleotide change": "nucleotide_mutation", - "Amino acid change": "amino_acid_mutation", - "Resistance": "drug_class", - "PMID": None, - "_type": "genetic_variation_type", - "_gene_symbol": "gene_symbol", - "_gene_name": "gene_name", - } - - super().__init__(source, self.field_mapping, self.metadata) - - def parse(self, handle): - """ - Read each and return it - """ - reader = csv.DictReader(handle, delimiter="\t") - for result in reader: - gene, mutation = result["Mutation"].split() - result["_gene_symbol"] = gene - result["_gene_name"] = gene - - if mutation.startswith("r."): - result["_type"] = NUCLEOTIDE_VARIANT - result["Nucleotide change"] = gene - result["Amino acid change"] = None - elif mutation.startswith("p."): - result["_type"] = AMINO_ACID_VARIANT - result["Amino acid change"] = mutation - else: - raise ValueError(f"Mutation type of {result} not recognised") - - yield self.hAMRonize(result, self.metadata) - result = {} diff --git a/hAMRonization/README.md b/hAMRonization/README.md index b89b402..8db837f 100644 --- a/hAMRonization/README.md +++ b/hAMRonization/README.md @@ -4,7 +4,7 @@ All reimplemented by @fmaguire in refactored code. Original parser and mapping authors: - AbricateIO.py: @dfornika -- AmrFinderPlusIO.py: @dfornika @fmaguire +- AmrFinderPlusIO.py: @zwets @dfornika @fmaguire - AmrPlusPlusIO.py: @fmaguire - AribaIO.py: @fmaguire - CSStarIO.py: @fmaguire @@ -12,7 +12,7 @@ Original parser and mapping authors: - GrootIO.py: @fmaguire - KmerResistanceIO.py: @fmaguire - ResFamsIO.py: @fmaguire -- ResFinderIO.py: @raphenya @fmaguire +- ResFinderIO.py: @zwets @raphenya @fmaguire - PointFinderIO.py: @fmaguire - RgiIO.py: @dfornika @raphenya @fmaguire - SraxIO.py: @fmaguire diff --git a/hAMRonization/ResFinderIO.py b/hAMRonization/ResFinderIO.py index 052d06d..b06d8ec 100644 --- a/hAMRonization/ResFinderIO.py +++ b/hAMRonization/ResFinderIO.py @@ -63,11 +63,13 @@ def set_shared_fields(r): res.gene_symbol = r.get('name', "unspecified") res.gene_name = r.get('name', "unspecified") res.reference_accession = r.get('ref_acc', r.get('ref_id', r.get('key', "unknown"))) + res.reference_database_name = _get_db_name(r.get('ref_database')) + res.reference_database_version = _get_db_ver(r.get('ref_database')) # optional res.coverage_percentage = _safe_round(r.get('coverage'), 1) res.coverage_depth = None # we may have this for mutations detected from reads - res.coverage_ratio = r.get('coverage')/100.0 + res.coverage_ratio = None res.input_sequence_id = r.get('query_id') res.input_gene_length = _get_length(r.get('query_start_pos'), r.get('query_end_pos')) res.input_gene_start = _get_start_pos(r.get('query_start_pos'), r.get('query_end_pos')) @@ -118,9 +120,9 @@ def set_variation_fields(r, vs): _codon.append(v.get('codon_change')) # Add the content of the list fields to the bags above - fold(lambda s, e: s.add(e), _phenos, v.get('phenotypes', [])) - fold(lambda s, e: s.add(e), _notes, v.get('notes', [])) - fold(lambda s, e: s.add(e), _pmids, v.get('pmids', [])) + _phenos.update(v.get('phenotypes', [])) + _notes.update(v.get('notes', [])) + _pmids.update(v.get('pmids', [])) # We have collected all variations on region r, now collapse into fields on res res.predicted_phenotype = _empty_to_none(", ".join(filter(None, _phenos))) @@ -145,11 +147,9 @@ def set_variation_fields(r, vs): # - for each r report one AMINO_ACID_VARIANT record, collapsing the seq_variations for p in filter(lambda d: d.get('amr_resistant', False), data['phenotypes'].values()): - # Set the fields available on phenotype object + # Set the fields available on the phenotype object res.drug_class = ", ".join(p.get('amr_classes', [])) res.antimicrobial_agent = p.get('amr_resistance', "unspecified") - res.reference_database_name = _get_db_name(p.get('ref_database')) - res.reference_database_version = _get_db_ver(p.get('ref_database')) # Iterate r over the regions (AMR genes) referenced by p, and yield each in turn for r in map(lambda k: data['seq_regions'][k], p.get('seq_regions', [])): diff --git a/hAMRonization/__init__.py b/hAMRonization/__init__.py index c4b558d..df48ee9 100644 --- a/hAMRonization/__init__.py +++ b/hAMRonization/__init__.py @@ -18,7 +18,6 @@ from hAMRonization import ResFamsIO from hAMRonization import TBProfilerIO from hAMRonization import MykrobeIO -from hAMRonization import PointFinderIO from hAMRonization import FARGeneIO _FormatToIterator = { @@ -38,7 +37,6 @@ "resfams": ResFamsIO.ResFamsIterator, "tbprofiler": TBProfilerIO.TBProfilerIterator, "mykrobe": MykrobeIO.MykrobeIterator, - "pointfinder": PointFinderIO.PointFinderIterator, "fargene": FARGeneIO.FARGeneIOIterator, } @@ -59,7 +57,6 @@ "resfams": "resfams.tblout", "tbprofiler": "OUTPUT.results.json", "mykrobe": "OUTPUT.json", - "pointfinder": "PointFinder_results.txt", "fargene": "retrieved-genes-*-hmmsearched.out" } @@ -77,7 +74,6 @@ "resfams": ResFamsIO.required_metadata, "resfinder": ResFinderIO.required_metadata, "mykrobe": MykrobeIO.required_metadata, - "pointfinder": PointFinderIO.required_metadata, "rgi": RgiIO.required_metadata, "srax": SraxIO.required_metadata, "srst2": Srst2IO.required_metadata, diff --git a/hAMRonization/hAMRonizedResult.py b/hAMRonization/hAMRonizedResult.py index 439ef42..3a2d8e4 100644 --- a/hAMRonization/hAMRonizedResult.py +++ b/hAMRonization/hAMRonizedResult.py @@ -77,7 +77,7 @@ def __post_init__(self): input_file_name = getattr(self, "input_file_name") input_file_name = os.path.basename(input_file_name) - for suffix in [ ".gz", ".fna", ".fasta", ".fsa", ".faa", ".fa" ]: + for suffix in [".gz", ".fna", ".fasta", ".fsa", ".faa", ".fa"]: input_file_name = input_file_name.removesuffix(suffix) setattr(self, "input_file_name", input_file_name) diff --git a/test/data/dummy/amrfinderplus/report.tsv b/test/data/dummy/amrfinderplus/report.tsv index ff86d55..2388d0b 100644 --- a/test/data/dummy/amrfinderplus/report.tsv +++ b/test/data/dummy/amrfinderplus/report.tsv @@ -1,2 +1,2 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -NA NZ_LR792628.1 1333611 1334783 - oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100 99.49 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NF000272.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description +NA NZ_LR792628.1 1333611 1334783 - oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 99.49 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA diff --git a/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv b/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv index 3da9343..f56f1b9 100644 --- a/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv +++ b/test/data/raw_outputs/amrfinderplus/afp_non_coding.tsv @@ -1,2 +1,2 @@ -Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -DAWXTK010000082_noncoding_test NA DAWXTK010000082.1:68-2970 1 2903 + 23S_A2062G Neisseria gonorrhoeae azithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN POINTN 2903 2910 100.00 99.35 2910 NC_002946.2:1119158-1116249 23S ribosomal RNA NA NA +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description +NA DAWXTK010000082.1:68-2970 1 2903 + 23S_A2059G Neisseria gonorrhoeae azithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN POINTN 2903 2910 100.00 99.35 2910 NC_002946.2:1119158-1116249 23S ribosomal RNA NA NA diff --git a/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv b/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv index d2a14f3..cfd40d2 100644 --- a/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv +++ b/test/data/raw_outputs/amrfinderplus/empty_report_with_header.tsv @@ -1 +1 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node diff --git a/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv b/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv index 8e70e9f..13fc47c 100644 --- a/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv +++ b/test/data/raw_outputs/amrfinderplus/report_nucleotide.tsv @@ -1,89 +1,89 @@ -Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description -Testname NA contig01 101 958 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEX 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -Testname NA contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTX 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NA NA -Testname NA contig03 101 802 + blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALX 234 265 88.30 100.00 234 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NA NA -Testname NA contig04 101 1147 + vanG D-alanine--D-serine ligase VanG core AMR AMR GLYCOPEPTIDE VANCOMYCIN EXACTX 349 349 100.00 100.00 349 WP_063856695.1 D-alanine--D-serine ligase VanG NA NA -Testname NA contig04 1261 2391 + blaEC BlaEC family class C beta-lactamase plus AMR AMR BETA-LACTAM BETA-LACTAM BLASTX 377 377 100.00 98.14 377 WP_063610930.1 extended-spectrum class C beta-lactamase EC-15 NA NA -Testname NA contig08 101 700 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIAL_CONTIG_ENDX 200 286 69.93 100.00 200 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -Testname NA contig09 1 675 - aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIAL_CONTIG_ENDX 225 275 81.82 100.00 225 WP_109545041.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NA NA -Testname NA contig09 715 1377 - sul2 sulfonamide-resistant dihydropteroate synthase Sul2 core AMR AMR SULFONAMIDE SULFONAMIDE PARTIAL_CONTIG_ENDX 221 271 81.55 100.00 221 WP_001043265.1 sulfonamide-resistant dihydropteroate synthase Sul2 NA NA -Testname NA contig10 486 1307 + blaOXA OXA-9 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 274 274 100.00 99.64 274 WP_000722315.1 oxacillin-hydrolyzing class D beta-lactamase OXA-9 NA NA -Testname NA contig11 101 958 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 286 286 100.00 93.01 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA -Testname NA contig12 71 634 + qacR multidrug-binding transcriptional regulator QacR plus STRESS BIOCIDE QUATERNARY AMMONIUM QUATERNARY AMMONIUM BLASTX 188 188 100.00 99.47 188 ADK23698.1 multidrug-binding transcriptional regulator QacR NA NA -Testname NA contig13 1 1137 + emrD3 multidrug efflux MFS transporter EmrD-3 plus AMR AMR EFFLUX EFFLUX EXACTX 379 379 100.00 100.00 379 ABQ18953.1 multidrug efflux MFS transporter EmrD-3 NA NA -Testname NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig05 237 1224 - 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig05 237 1224 - 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig14 1 1089 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1 1089 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 1093 2181 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_C84C two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig14 2185 3273 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA -Testname NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 1 2905 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig15 2906 5810 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA -Testname NA contig16 1 720 + nfsA_E223E nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_G131G nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_Q44Q nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R133R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_R203R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig16 1 720 + nfsA_S33S nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA -Testname NA contig17 1 247 + ampC_C-11C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_C-42C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_G-15G Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_T-14T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA -Testname NA contig17 1 247 + ampC_T-32T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node +NA contig01 101 958 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEX 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTX 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NA NA +NA contig03 101 802 + blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALX 234 265 88.30 100.00 234 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NA NA +NA contig04 101 1147 + vanG D-alanine--D-serine ligase VanG core AMR AMR GLYCOPEPTIDE VANCOMYCIN EXACTX 349 349 100.00 100.00 349 WP_063856695.1 D-alanine--D-serine ligase VanG NA NA +NA contig04 1261 2391 + blaEC BlaEC family class C beta-lactamase plus AMR AMR BETA-LACTAM BETA-LACTAM BLASTX 377 377 100.00 98.14 377 WP_063610930.1 extended-spectrum class C beta-lactamase EC-15 NA NA +NA contig08 101 700 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIAL_CONTIG_ENDX 200 286 69.93 100.00 200 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig09 1 675 - aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIAL_CONTIG_ENDX 225 275 81.82 100.00 225 WP_109545041.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NA NA +NA contig09 715 1377 - sul2 sulfonamide-resistant dihydropteroate synthase Sul2 core AMR AMR SULFONAMIDE SULFONAMIDE PARTIAL_CONTIG_ENDX 221 271 81.55 100.00 221 WP_001043265.1 sulfonamide-resistant dihydropteroate synthase Sul2 NA NA +NA contig10 486 1307 + blaOXA OXA-9 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 274 274 100.00 99.64 274 WP_000722315.1 oxacillin-hydrolyzing class D beta-lactamase OXA-9 NA NA +NA contig11 101 958 + blaTEM TEM family class A beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM INTERNAL_STOP 286 286 100.00 93.01 286 WP_061158039.1 class A beta-lactamase TEM-156 NA NA +NA contig12 71 634 + qacR multidrug-binding transcriptional regulator QacR plus STRESS BIOCIDE QUATERNARY AMMONIUM QUATERNARY AMMONIUM BLASTX 188 188 100.00 99.47 188 ADK23698.1 multidrug-binding transcriptional regulator QacR NA NA +NA contig13 1 1137 + emrD3 multidrug efflux MFS transporter EmrD-3 plus AMR AMR EFFLUX EFFLUX EXACTX 379 379 100.00 100.00 379 ABQ18953.1 multidrug efflux MFS transporter EmrD-3 NA NA +NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig05 237 1224 - 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig05 237 1224 - 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 988 2905 34.01 81.08 1004 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig14 1 1089 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1 1089 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 1093 2181 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN POINTX 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_A159A two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_C84C two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_E121E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_E166E two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_G206G two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_L10L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_L14L two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_P94P two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_T147T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_T156T two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig14 2185 3273 + pmrB_V161V two-component system sensor histidine kinase PmrB [WILDTYPE] core AMR POINT COLISTIN COLISTIN EXACTX 363 363 100.00 100.00 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA +NA contig15 1 2905 + 23S_A2058T Escherichia azithromycin/erythromycin/telithromycin resistant 23S core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 1 2905 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 99.97 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_A2058A Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE AZITHROMYCIN/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_C2611C Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT LINCOSAMIDE/OXAZOLIDINONE CLINDAMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE/OXAZOLIDINONE CLARITHROMYCIN/LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2032G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2057G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MULTIDRUG CHLORAMPHENICOL/ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_G2447G Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT OXAZOLIDINONE LINEZOLID POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_T2609T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig15 2906 5810 + 23S_T754T Escherichia 23S ribosomal RNA [WILDTYPE] core AMR POINT MACROLIDE ERYTHROMYCIN/TELITHROMYCIN POINTN 2905 2905 100.00 100.00 2905 NC_004431.1:237160-240064 23S ribosomal RNA NA NA +NA contig16 1 720 + nfsA_E223E nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_G131G nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_K141STOP Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_Q44Q nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R133R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_R203R nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig16 1 720 + nfsA_S33S nitroreductase NfsA [WILDTYPE] core AMR POINT NITROFURAN NITROFURANTOIN POINTX 240 240 100.00 99.17 240 WP_089631889.1 nitroreductase NfsA NA NA +NA contig17 1 247 + ampC_C-11C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_C-42C Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_G-15G Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_T-14T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_T-14TGT Escherichia cephalosporin resistant ampC core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA +NA contig17 1 247 + ampC_T-32T Escherichia ampC/blaEC promoter region [WILDTYPE] core AMR POINT BETA-LACTAM CEPHALOSPORIN POINTN 247 245 100.00 99.19 247 NZ_CP041538.1:1149245-1149489 ampC/blaEC promoter region NA NA diff --git a/test/data/raw_outputs/amrfinderplus/report_protein.tsv b/test/data/raw_outputs/amrfinderplus/report_protein.tsv index 50b9cdc..a5a3f17 100644 --- a/test/data/raw_outputs/amrfinderplus/report_protein.tsv +++ b/test/data/raw_outputs/amrfinderplus/report_protein.tsv @@ -1,4 +1,4 @@ -Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description blaTEM-156 contig01 101 961 + blaTEM-156 class A beta-lactamase TEM-156 core AMR AMR BETA-LACTAM BETA-LACTAM ALLELEP 286 286 100.00 100.00 286 WP_061158039.1 class A beta-lactamase TEM-156 NF000531.2 TEM family class A beta-lactamase blaPDC-114_blast contig02 1 1191 + blaPDC PDC family class C beta-lactamase core AMR AMR BETA-LACTAM CEPHALOSPORIN BLASTP 397 397 100.00 99.75 397 WP_061189306.1 class C beta-lactamase PDC-114 NF000422.6 PDC family class C beta-lactamase blaOXA-436_partial contig03 101 802 + blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase diff --git a/test/run_integration_test.sh b/test/run_integration_test.sh index 28445a6..6c863bd 100755 --- a/test/run_integration_test.sh +++ b/test/run_integration_test.sh @@ -10,10 +10,10 @@ hamronize abricate data/raw_outputs/abricate/report.tsv --reference_database_ver hamronize ariba data/raw_outputs/ariba/report.tsv --reference_database_version db_v_1 --reference_database_name dbname --input_file_name ariba_report --analysis_software_version ariba_v1 --format json --output hamronized_ariba.json hamronize ariba data/raw_outputs/ariba/report.tsv --reference_database_version db_v_1 --reference_database_name dbname --input_file_name ariba_report --analysis_software_version ariba_v1 --format tsv --output hamronized_ariba.tsv -hamronize amrfinderplus --input_file_name amrfinderplus_nucleotide_report --analysis_software_version AFP_nt_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format json --output hamronized_amrfinderplus_nt.json -hamronize amrfinderplus --input_file_name amrfinderplus_nucleotide_report --analysis_software_version AFP_nt_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format tsv --output hamronized_amrfinderplus_nt.tsv -hamronize amrfinderplus --input_file_name amrfinderplus_protein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format json --output hamronized_amrfinderplus_aa.json -hamronize amrfinderplus --input_file_name amrfinderplus_protein_report --analysis_software_version AFP_aa_v1 --reference_database_version db_v_1 data/raw_outputs/amrfinderplus/report_protein.tsv --format tsv --output hamronized_amrfinderplus_aa.tsv +hamronize amrfinderplus --input_file_name afp_nt_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format json --output hamronized_amrfinderplus_nt.json +hamronize amrfinderplus --input_file_name afp_nt_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_nucleotide.tsv --format tsv --output hamronized_amrfinderplus_nt.tsv +hamronize amrfinderplus --input_file_name afp_aa_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_protein.tsv --format json --output hamronized_amrfinderplus_aa.json +hamronize amrfinderplus --input_file_name afp_aa_report --analysis_software_version AFP4 --reference_database_version v4 data/raw_outputs/amrfinderplus/report_protein.tsv --format tsv --output hamronized_amrfinderplus_aa.tsv hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 --reference_database_version card_v1 data/raw_outputs/rgi/rgi.txt --format json --output hamronized_rgi.json hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 --reference_database_version card_v1 data/raw_outputs/rgi/rgi.txt --format tsv --output hamronized_rgi.tsv @@ -27,9 +27,6 @@ hamronize rgi --input_file_name rgi_report --analysis_software_version rgi_v1 -- hamronize resfinder data/raw_outputs/resfinder/data_resfinder.json --format json --output hamronized_resfinder.json hamronize resfinder data/raw_outputs/resfinder/data_resfinder.json --format tsv --output hamronized_resfinder.tsv -hamronize pointfinder --input_file_name pointfinder_report --analysis_software_version resfinder_v4 --reference_database_version pointfinder_db_v1 data/raw_outputs/pointfinder/PointFinder_results.txt --format json --output hamronized_pointfinder.json -hamronize pointfinder --input_file_name pointfinder_report --analysis_software_version resfinder_v4 --reference_database_version pointfinder_db_v1 data/raw_outputs/pointfinder/PointFinder_results.txt --format tsv --output hamronized_pointfinder.tsv - hamronize srax --reference_database_name srax_default --input_file_name srax_report --reference_database_version srax_db_v1 --analysis_software_version srax_v1 --format json data/raw_outputs/srax/sraX_detected_ARGs.tsv --output hamronized_srax.json hamronize srax --reference_database_name srax_default --input_file_name srax_report --reference_database_version srax_db_v1 --analysis_software_version srax_v1 --format tsv data/raw_outputs/srax/sraX_detected_ARGs.tsv --output hamronized_srax.tsv diff --git a/test/test_parsing_validity.py b/test/test_parsing_validity.py index 7e81c52..74c7cf2 100644 --- a/test/test_parsing_validity.py +++ b/test/test_parsing_validity.py @@ -64,8 +64,8 @@ def test_abricate(): def test_amrfinderplus(): metadata = { - "analysis_software_version": "3.6.10", - "reference_database_version": "2019-Jul-28", + "analysis_software_version": "4.0.3", + "reference_database_version": "2024-12-18.1", "input_file_name": "Dummy", } parsed_report = hAMRonization.parse( @@ -81,10 +81,10 @@ def test_amrfinderplus(): == "multidrug efflux RND transporter periplasmic adaptor subunit OqxA" ) assert result.reference_database_name == "NCBI Reference Gene Database" - assert result.reference_database_version == "2019-Jul-28" + assert result.reference_database_version == "2024-12-18.1" assert result.reference_accession == "WP_002914189.1" assert result.analysis_software_name == "amrfinderplus" - assert result.analysis_software_version == "3.6.10" + assert result.analysis_software_version == "4.0.3" assert result.genetic_variation_type == "gene_presence_detected" # optional fields - present in dummy dataset @@ -100,8 +100,8 @@ def test_amrfinderplus(): assert result.input_protein_length == 391 # missing data in report - assert result.reference_gene_length == None - assert result.input_gene_length == None + assert result.reference_gene_length is None + assert result.input_gene_length is None assert result.coverage_depth is None assert result.coverage_ratio is None assert result.resistance_mechanism is None @@ -292,32 +292,6 @@ def test_kmerresistance(): assert result.reference_gene_stop is None -def test_pointfinder(): - metadata = { - "analysis_software_version": "4.1.0", - "reference_database_version": "2021-02-01", - "input_file_name": "Dummy", - } - parsed_report = hAMRonization.parse( - "data/dummy/pointfinder/PointFinder_results.txt", metadata, "pointfinder" - ) - - for result in parsed_report: - # assert mandatory fields - assert result.input_file_name == "Dummy" - assert result.gene_symbol == "gyrA" - assert result.gene_name == "gyrA" - assert result.reference_database_name == "pointfinder" - assert result.reference_database_version == "2021-02-01" - assert result.reference_accession == "gyrA p.G81D" - assert result.analysis_software_name == "pointfinder" - assert result.analysis_software_version == "4.1.0" - assert result.genetic_variation_type == "protein_variant_detected" - - assert result.drug_class == "Ciprofloxacin,Nalidixic acid,Ciprofloxacin" - assert result.nucleotide_mutation == "GGT -> GAT" - assert result.amino_acid_mutation == "p.G81D" - def test_resfinder(): metadata = { } @@ -354,11 +328,10 @@ def test_resfinder(): assert result.reference_database_version == "2.4.0" assert result.reference_accession == "EU370913" - # optional fields (13) + # optional fields (12) assert result.predicted_phenotype == "ciprofloxacin, nalidixic acid, trimethoprim, chloramphenicol" assert result.predicted_phenotype_confidence_level == "Must be in an operon with oqxB,phenotype differs based on genomic location of the operon PMID 25801572,also nitrofurantoin resistance PMID 26552976. Natural in K. pneumoniae. PMIDs: 18440636" assert result.coverage_percentage == 100.0 - assert result.coverage_ratio == 1.0 assert result.input_sequence_id == "contig1" assert result.input_gene_length == 1176 assert result.input_gene_start == 101 @@ -369,8 +342,9 @@ def test_resfinder(): assert result.reference_gene_stop == 1176 assert result.sequence_identity == 100.0 - # not set (12) + # not set (13) assert result.coverage_depth is None + assert result.coverage_ratio is None assert result.input_protein_length is None assert result.input_protein_start is None assert result.input_protein_stop is None @@ -400,7 +374,6 @@ def test_resfinder(): assert result.predicted_phenotype == "ampicillin" assert result.predicted_phenotype_confidence_level == "The nineteen pbp5 mutations must be present simultaneously for resistance phenotype. PMIDs: 25182648" assert result.coverage_percentage == 100.0 - assert result.coverage_ratio == 1.0 assert result.input_sequence_id == "contig2" assert result.input_gene_length == 2037 assert result.input_gene_start == 64029 @@ -411,13 +384,14 @@ def test_resfinder(): assert result.reference_gene_stop == 2037 assert result.sequence_identity == 95.34 - # mutation fields (2) + # mutation fields (3) assert result.amino_acid_mutation == "p.V24A, p.S27G, p.R34Q, p.G66E, p.A68T, p.E85D, p.E100Q, p.K144Q, p.T172A, p.L177I, p.D204G, p.A216S, p.T324A, p.N496K, p.A499T, p.E525D, p.P667S" assert result.nucleotide_mutation is None assert result.nucleotide_mutation_interpretation == "Codon changes: gta>gca agt>ggt cgg>cag gga>gaa gca>aca gaa>gat gag>cag aaa>caa aca>gca tta>ata gac>ggc gca>tcc aca>gca aat>aaa gca>aca gag>gat ccc>tcg" # not set (10) assert result.coverage_depth is None + assert result.coverage_ratio is None assert result.input_protein_length is None assert result.input_protein_start is None assert result.input_protein_stop is None @@ -428,7 +402,7 @@ def test_resfinder(): assert result.amino_acid_mutation_interpretation is None else: - assert result.genetic_variation_type == False # just to stop + assert result.genetic_variation_type is False # just to stop # Check that we saw all assert seen_genes == 4