-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpaper.bib
231 lines (218 loc) · 23.3 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
@article{alliance_of_genome_resources_consortium_harmonizing_2022,
title = {Harmonizing model organism data in the {Alliance} of {Genome} {Resources}},
volume = {220},
issn = {1943-2631},
doi = {10.1093/genetics/iyac022},
abstract = {The Alliance of Genome Resources (the Alliance) is a combined effort of 7 knowledgebase projects: Saccharomyces Genome Database, WormBase, FlyBase, Mouse Genome Database, the Zebrafish Information Network, Rat Genome Database, and the Gene Ontology Resource. The Alliance seeks to provide several benefits: better service to the various communities served by these projects; a harmonized view of data for all biomedical researchers, bioinformaticians, clinicians, and students; and a more sustainable infrastructure. The Alliance has harmonized cross-organism data to provide useful comparative views of gene function, gene expression, and human disease relevance. The basis of the comparative views is shared calls of orthology relationships and the use of common ontologies. The key types of data are alleles and variants, gene function based on gene ontology annotations, phenotypes, association to human disease, gene expression, protein-protein and genetic interactions, and participation in pathways. The information is presented on uniform gene pages that allow facile summarization of information about each gene in each of the 7 organisms covered (budding yeast, roundworm Caenorhabditis elegans, fruit fly, house mouse, zebrafish, brown rat, and human). The harmonized knowledge is freely available on the alliancegenome.org portal, as downloadable files, and by APIs. We expect other existing and emerging knowledge bases to join in the effort to provide the union of useful data and features that each knowledge base currently provides.},
language = {eng},
number = {4},
journal = {Genetics},
author = {{Alliance of Genome Resources Consortium}},
month = apr,
year = {2022},
pmid = {35380658},
pmcid = {PMC8982023},
keywords = {Alleles, Animals, biocuration, Caenorhabditis elegans, data mining, Databases, Genetic, Drosophila, gene expression, gene function, gene interaction, Gene Ontology, genome, Humans, Internet, knowledgebase, Mice, Molecular Sequence Annotation, phenotype, Rats, Saccharomycetales, variants, Zebrafish},
pages = {iyac022},
}
@article{yates_ensembl_2015,
title = {The {Ensembl} {REST} {API}: {Ensembl} {Data} for {Any} {Language}},
volume = {31},
copyright = {cc by},
issn = {1367-4811},
shorttitle = {The {Ensembl} {REST} {API}},
url = {https://europepmc.org/articles/PMC4271150},
doi = {10.1093/bioinformatics/btu613},
abstract = {MotivationWe present a Web service to access Ensembl data using Representational State Transfer (REST). The Ensembl REST server enables the easy retrieval of a wide range of Ensembl data by most programming languages, using standard formats such as JSON and FASTA while minimizing client work. We also introduce bindings to the popular Ensembl Variant Effect Predictor tool permitting large-scale programmatic variant analysis independent of any specific programming language.Availability and implementationThe Ensembl REST API can be accessed at http://rest.ensembl.org and source code is freely available under an Apache 2.0 license from http://github.com/Ensembl/ensembl-rest.},
language = {eng},
number = {1},
urldate = {2023-02-06},
journal = {Bioinformatics (Oxford, England)},
author = {Yates, Andrew and Beal, Kathryn and Keenan, Stephen and McLaren, William and Pignatelli, Miguel and Ritchie, Graham R S and Ruffier, Magali and Taylor, Kieron and Vullo, Alessandro and Flicek, Paul},
month = jan,
year = {2015},
pmid = {25236461},
pmcid = {PMC4271150},
pages = {143--145},
}
@article{zerbino_ensembl_2018,
title = {Ensembl 2018},
volume = {46},
issn = {0305-1048},
url = {https://doi.org/10.1093/nar/gkx1098},
doi = {10.1093/nar/gkx1098},
abstract = {The Ensembl project has been aggregating, processing, integrating and redistributing genomic datasets since the initial releases of the draft human genome, with the aim of accelerating genomics research through rapid open distribution of public data. Large amounts of raw data are thus transformed into knowledge, which is made available via a multitude of channels, in particular our browser (http://www.ensembl.org). Over time, we have expanded in multiple directions. First, our resources describe multiple fields of genomics, in particular gene annotation, comparative genomics, genetics and epigenomics. Second, we cover a growing number of genome assemblies; Ensembl Release 90 contains exactly 100. Third, our databases feed simultaneously into an array of services designed around different use cases, ranging from quick browsing to genome-wide bioinformatic analysis. We present here the latest developments of the Ensembl project, with a focus on managing an increasing number of assemblies, supporting efforts in genome interpretation and improving our browser.},
number = {D1},
urldate = {2023-02-06},
journal = {Nucleic Acids Research},
author = {Zerbino, Daniel R and Achuthan, Premanand and Akanni, Wasiu and Amode, M Ridwan and Barrell, Daniel and Bhai, Jyothish and Billis, Konstantinos and Cummins, Carla and Gall, Astrid and Girón, Carlos García and Gil, Laurent and Gordon, Leo and Haggerty, Leanne and Haskell, Erin and Hourlier, Thibaut and Izuogu, Osagie G and Janacek, Sophie H and Juettemann, Thomas and To, Jimmy Kiang and Laird, Matthew R and Lavidas, Ilias and Liu, Zhicheng and Loveland, Jane E and Maurel, Thomas and McLaren, William and Moore, Benjamin and Mudge, Jonathan and Murphy, Daniel N and Newman, Victoria and Nuhn, Michael and Ogeh, Denye and Ong, Chuang Kee and Parker, Anne and Patricio, Mateus and Riat, Harpreet Singh and Schuilenburg, Helen and Sheppard, Dan and Sparrow, Helen and Taylor, Kieron and Thormann, Anja and Vullo, Alessandro and Walts, Brandon and Zadissa, Amonida and Frankish, Adam and Hunt, Sarah E and Kostadima, Myrto and Langridge, Nicholas and Martin, Fergal J and Muffato, Matthieu and Perry, Emily and Ruffier, Magali and Staines, Dan M and Trevanion, Stephen J and Aken, Bronwen L and Cunningham, Fiona and Yates, Andrew and Flicek, Paul},
month = jan,
year = {2018},
pages = {D754--D761},
}
@article{mclaren_ensembl_2016,
title = {The {Ensembl} {Variant} {Effect} {Predictor}},
volume = {17},
issn = {1474-760X},
url = {https://doi.org/10.1186/s13059-016-0974-4},
doi = {10.1186/s13059-016-0974-4},
abstract = {The Ensembl Variant Effect Predictor is a powerful toolset for the analysis, annotation, and prioritization of genomic variants in coding and non-coding regions. It provides access to an extensive collection of genomic annotation, with a variety of interfaces to suit different requirements, and simple options for configuring and extending analysis. It is open source, free to use, and supports full reproducibility of results. The Ensembl Variant Effect Predictor can simplify and accelerate variant interpretation in a wide range of study designs.},
number = {1},
urldate = {2023-02-06},
journal = {Genome Biology},
author = {McLaren, William and Gil, Laurent and Hunt, Sarah E. and Riat, Harpreet Singh and Ritchie, Graham R. S. and Thormann, Anja and Flicek, Paul and Cunningham, Fiona},
month = jun,
year = {2016},
keywords = {Genome, NGS, SNP, Variant annotation},
pages = {122},
}
@article{kohler_human_2021,
title = {The {Human} {Phenotype} {Ontology} in 2021},
volume = {49},
issn = {0305-1048},
url = {https://doi.org/10.1093/nar/gkaa1043},
doi = {10.1093/nar/gkaa1043},
abstract = {The Human Phenotype Ontology (HPO, https://hpo.jax.org) was launched in 2008 to provide a comprehensive logical standard to describe and computationally analyze phenotypic abnormalities found in human disease. The HPO is now a worldwide standard for phenotype exchange. The HPO has grown steadily since its inception due to considerable contributions from clinical experts and researchers from a diverse range of disciplines. Here, we present recent major extensions of the HPO for neurology, nephrology, immunology, pulmonology, newborn screening, and other areas. For example, the seizure subontology now reflects the International League Against Epilepsy (ILAE) guidelines and these enhancements have already shown clinical validity. We present new efforts to harmonize computational definitions of phenotypic abnormalities across the HPO and multiple phenotype ontologies used for animal models of disease. These efforts will benefit software such as Exomiser by improving the accuracy and scope of cross-species phenotype matching. The computational modeling strategy used by the HPO to define disease entities and phenotypic features and distinguish between them is explained in detail.We also report on recent efforts to translate the HPO into indigenous languages. Finally, we summarize recent advances in the use of HPO in electronic health record systems.},
number = {D1},
urldate = {2023-02-06},
journal = {Nucleic Acids Research},
author = {Köhler, Sebastian and Gargano, Michael and Matentzoglu, Nicolas and Carmody, Leigh C and Lewis-Smith, David and Vasilevsky, Nicole A and Danis, Daniel and Balagura, Ganna and Baynam, Gareth and Brower, Amy M and Callahan, Tiffany J and Chute, Christopher G and Est, Johanna L and Galer, Peter D and Ganesan, Shiva and Griese, Matthias and Haimel, Matthias and Pazmandi, Julia and Hanauer, Marc and Harris, Nomi L and Hartnett, Michael J and Hastreiter, Maximilian and Hauck, Fabian and He, Yongqun and Jeske, Tim and Kearney, Hugh and Kindle, Gerhard and Klein, Christoph and Knoflach, Katrin and Krause, Roland and Lagorce, David and McMurry, Julie A and Miller, Jillian A and Munoz-Torres, Monica C and Peters, Rebecca L and Rapp, Christina K and Rath, Ana M and Rind, Shahmir A and Rosenberg, Avi Z and Segal, Michael M and Seidel, Markus G and Smedley, Damian and Talmy, Tomer and Thomas, Yarlalu and Wiafe, Samuel A and Xian, Julie and Yüksel, Zafer and Helbig, Ingo and Mungall, Christopher J and Haendel, Melissa A and Robinson, Peter N},
month = jan,
year = {2021},
pages = {D1207--D1217},
}
@article{maglott_entrez_2007,
title = {Entrez {Gene}: gene-centered information at {NCBI}},
volume = {35},
issn = {0305-1048},
shorttitle = {Entrez {Gene}},
url = {https://doi.org/10.1093/nar/gkl993},
doi = {10.1093/nar/gkl993},
abstract = {Entrez Gene () is NCBI's database for gene-specific information. Entrez Gene includes records from genomes that have been completely sequenced, that have an active research community to contribute gene-specific information or that are scheduled for intense sequence analysis. The content of Entrez Gene represents the result of both curation and automated integration of data from NCBI's Reference Sequence project (RefSeq), from collaborating model organism databases and from other databases within NCBI. Records in Entrez Gene are assigned unique, stable and tracked integers as identifiers. The content (nomenclature, map location, gene products and their attributes, markers, phenotypes and links to citations, sequences, variation details, maps, expression, homologs, protein domains and external databases) is provided via interactive browsing through NCBI's Entrez system, via NCBI's Entrez programing utilities (E-Utilities), and for bulk transfer by ftp.},
number = {suppl\_1},
urldate = {2023-02-06},
journal = {Nucleic Acids Research},
author = {Maglott, Donna and Ostell, Jim and Pruitt, Kim D. and Tatusova, Tatiana},
month = jan,
year = {2007},
pages = {D26--D31},
}
@article{seal_genenamesorg_2023,
title = {Genenames.org: the {HGNC} resources in 2023},
volume = {51},
issn = {0305-1048},
shorttitle = {Genenames.org},
url = {https://doi.org/10.1093/nar/gkac888},
doi = {10.1093/nar/gkac888},
abstract = {The HUGO Gene Nomenclature Committee (HGNC) assigns unique symbols and names to human genes. The HGNC database (www.genenames.org) currently contains over 43 000 approved gene symbols, over 19 200 of which are assigned to protein-coding genes, 14 000 to pseudogenes and nearly 9000 to non-coding RNA genes. The public website, www.genenames.org, displays all approved nomenclature within Symbol Reports that contain data curated by HGNC nomenclature advisors and links to related genomic, clinical, and proteomic information. Here, we describe updates to our resource, including improvements to our search facility and new download features.},
number = {D1},
urldate = {2023-02-06},
journal = {Nucleic Acids Research},
author = {Seal, Ruth L and Braschi, Bryony and Gray, Kristian and Jones, Tamsin E M and Tweedie, Susan and Haim-Vilmovsky, Liora and Bruford, Elspeth A},
month = jan,
year = {2023},
pages = {D1003--D1009},
}
@article{landrum_clinvar_2018,
title = {{ClinVar}: improving access to variant interpretations and supporting evidence},
volume = {46},
issn = {1362-4962},
shorttitle = {{ClinVar}},
doi = {10.1093/nar/gkx1153},
abstract = {ClinVar (https://www.ncbi.nlm.nih.gov/clinvar/) is a freely available, public archive of human genetic variants and interpretations of their significance to disease, maintained at the National Institutes of Health. Interpretations of the clinical significance of variants are submitted by clinical testing laboratories, research laboratories, expert panels and other groups. ClinVar aggregates data by variant-disease pairs, and by variant (or set of variants). Data aggregated by variant are accessible on the website, in an improved set of variant call format files and as a new comprehensive XML report. ClinVar recently started accepting submissions that are focused primarily on providing phenotypic information for individuals who have had genetic testing. Submissions may come from clinical providers providing their own interpretation of the variant ('provider interpretation') or from groups such as patient registries that primarily provide phenotypic information from patients ('phenotyping only'). ClinVar continues to make improvements to its search and retrieval functions. Several new fields are now indexed for more precise searching, and filters allow the user to narrow down a large set of search results.},
language = {eng},
number = {D1},
journal = {Nucleic Acids Research},
author = {Landrum, Melissa J. and Lee, Jennifer M. and Benson, Mark and Brown, Garth R. and Chao, Chen and Chitipiralla, Shanmuga and Gu, Baoshan and Hart, Jennifer and Hoffman, Douglas and Jang, Wonhee and Karapetyan, Karen and Katz, Kenneth and Liu, Chunlei and Maddipatla, Zenith and Malheiro, Adriana and McDaniel, Kurt and Ovetsky, Michael and Riley, George and Zhou, George and Holmes, J. Bradley and Kattman, Brandi L. and Maglott, Donna R.},
month = jan,
year = {2018},
pmid = {29165669},
pmcid = {PMC5753237},
keywords = {Humans, Disease, Phenotype, Databases, Nucleic Acid, Genetic Variation},
pages = {D1062--D1067},
}
@article{amberger_omimorg_2015,
title = {{OMIM}.org: {Online} {Mendelian} {Inheritance} in {Man} ({OMIM}®), an online catalog of human genes and genetic disorders},
volume = {43},
issn = {0305-1048},
shorttitle = {{OMIM}.org},
url = {https://doi.org/10.1093/nar/gku1205},
doi = {10.1093/nar/gku1205},
abstract = {Online Mendelian Inheritance in Man, OMIM®, is a comprehensive, authoritative and timely research resource of curated descriptions of human genes and phenotypes and the relationships between them. The new official website for OMIM, OMIM.org (http://omim.org), was launched in January 2011. OMIM is based on the published peer-reviewed biomedical literature and is used by overlapping and diverse communities of clinicians, molecular biologists and genome scientists, as well as by students and teachers of these disciplines. Genes and phenotypes are described in separate entries and are given unique, stable six-digit identifiers (MIM numbers). OMIM entries have a structured free-text format that provides the flexibility necessary to describe the complex and nuanced relationships between genes and genetic phenotypes in an efficient manner. OMIM also has a derivative table of genes and genetic phenotypes, the Morbid Map. OMIM.org has enhanced search capabilities such as genome coordinate searching and thesaurus-enhanced search term options. Phenotypic series have been created to facilitate viewing genetic heterogeneity of phenotypes. Clinical synopsis features are enhanced with UMLS, Human Phenotype Ontology and Elements of Morphology terms and image links. All OMIM data are available for FTP download and through an API. MIMmatch is a novel outreach feature to disseminate updates and encourage collaboration.},
number = {D1},
urldate = {2023-02-06},
journal = {Nucleic Acids Research},
author = {Amberger, Joanna S. and Bocchini, Carol A. and Schiettecatte, François and Scott, Alan F. and Hamosh, Ada},
month = jan,
year = {2015},
pages = {D789--D798},
}
@article{ng_predicting_2001,
title = {Predicting deleterious amino acid substitutions},
volume = {11},
issn = {1088-9051},
doi = {10.1101/gr.176601},
abstract = {Many missense substitutions are identified in single nucleotide polymorphism (SNP) data and large-scale random mutagenesis projects. Each amino acid substitution potentially affects protein function. We have constructed a tool that uses sequence homology to predict whether a substitution affects protein function. SIFT, which sorts intolerant from tolerant substitutions, classifies substitutions as tolerated or deleterious. A higher proportion of substitutions predicted to be deleterious by SIFT gives an affected phenotype than substitutions predicted to be deleterious by substitution scoring matrices in three test cases. Using SIFT before mutagenesis studies could reduce the number of functional assays required and yield a higher proportion of affected phenotypes. may be used to identify plausible disease candidates among the SNPs that cause missense substitutions.},
language = {eng},
number = {5},
journal = {Genome Research},
author = {Ng, P. C. and Henikoff, S.},
month = may,
year = {2001},
pmid = {11337480},
pmcid = {PMC311071},
keywords = {Humans, Computational Biology, Phenotype, Software, Amino Acid Sequence, Amino Acid Substitution, Bacterial Proteins, Bacteriophage T4, Conserved Sequence, Escherichia coli Proteins, Genetic Diseases, Inborn, HIV Protease, HIV-1, Lac Repressors, Lactose, Molecular Sequence Data, Muramidase, Mutation, Missense, Probability, Repressor Proteins, Sequence Alignment},
pages = {863--874},
}
@article{adzhubei_method_2010,
title = {A method and server for predicting damaging missense mutations},
volume = {7},
copyright = {2010 Nature Publishing Group},
issn = {1548-7105},
url = {https://www.nature.com/articles/nmeth0410-248},
doi = {10.1038/nmeth0410-248},
language = {en},
number = {4},
urldate = {2023-02-13},
journal = {Nature Methods},
author = {Adzhubei, Ivan A. and Schmidt, Steffen and Peshkin, Leonid and Ramensky, Vasily E. and Gerasimova, Anna and Bork, Peer and Kondrashov, Alexey S. and Sunyaev, Shamil R.},
month = apr,
year = {2010},
note = {Number: 4
Publisher: Nature Publishing Group},
keywords = {Bioinformatics, Genetic testing, Mutation, Software},
pages = {248--249},
}
@article{oleary_reference_2016,
title = {Reference sequence ({RefSeq}) database at {NCBI}: current status, taxonomic expansion, and functional annotation},
volume = {44},
issn = {1362-4962},
shorttitle = {Reference sequence ({RefSeq}) database at {NCBI}},
doi = {10.1093/nar/gkv1189},
abstract = {The RefSeq project at the National Center for Biotechnology Information (NCBI) maintains and curates a publicly available database of annotated genomic, transcript, and protein sequence records (http://www.ncbi.nlm.nih.gov/refseq/). The RefSeq project leverages the data submitted to the International Nucleotide Sequence Database Collaboration (INSDC) against a combination of computation, manual curation, and collaboration to produce a standard set of stable, non-redundant reference sequences. The RefSeq project augments these reference sequences with current knowledge including publications, functional features and informative nomenclature. The database currently represents sequences from more than 55,000 organisms ({\textgreater}4800 viruses, {\textgreater}40,000 prokaryotes and {\textgreater}10,000 eukaryotes; RefSeq release 71), ranging from a single record to complete genomes. This paper summarizes the current status of the viral, prokaryotic, and eukaryotic branches of the RefSeq project, reports on improvements to data access and details efforts to further expand the taxonomic representation of the collection. We also highlight diverse functional curation initiatives that support multiple uses of RefSeq data including taxonomic validation, genome annotation, comparative genomics, and clinical testing. We summarize our approach to utilizing available RNA-Seq and other data types in our manual curation process for vertebrate, plant, and other species, and describe a new direction for prokaryotic genomes and protein name management.},
language = {eng},
number = {D1},
journal = {Nucleic Acids Research},
author = {O'Leary, Nuala A. and Wright, Mathew W. and Brister, J. Rodney and Ciufo, Stacy and Haddad, Diana and McVeigh, Rich and Rajput, Bhanu and Robbertse, Barbara and Smith-White, Brian and Ako-Adjei, Danso and Astashyn, Alexander and Badretdin, Azat and Bao, Yiming and Blinkova, Olga and Brover, Vyacheslav and Chetvernin, Vyacheslav and Choi, Jinna and Cox, Eric and Ermolaeva, Olga and Farrell, Catherine M. and Goldfarb, Tamara and Gupta, Tripti and Haft, Daniel and Hatcher, Eneida and Hlavina, Wratko and Joardar, Vinita S. and Kodali, Vamsi K. and Li, Wenjun and Maglott, Donna and Masterson, Patrick and McGarvey, Kelly M. and Murphy, Michael R. and O'Neill, Kathleen and Pujar, Shashikant and Rangwala, Sanjida H. and Rausch, Daniel and Riddick, Lillian D. and Schoch, Conrad and Shkeda, Andrei and Storz, Susan S. and Sun, Hanzhen and Thibaud-Nissen, Francoise and Tolstoy, Igor and Tully, Raymond E. and Vatsan, Anjana R. and Wallin, Craig and Webb, David and Wu, Wendy and Landrum, Melissa J. and Kimchi, Avi and Tatusova, Tatiana and DiCuccio, Michael and Kitts, Paul and Murphy, Terence D. and Pruitt, Kim D.},
month = jan,
year = {2016},
pmid = {26553804},
pmcid = {PMC4702849},
keywords = {Animals, Cattle, Databases, Genetic, Gene Expression Profiling, Genome, Fungal, Genome, Human, Genome, Microbial, Genome, Plant, Genome, Viral, Genomics, Humans, Invertebrates, Mice, Molecular Sequence Annotation, Nematoda, Phylogeny, Rats, Reference Standards, RNA, Long Noncoding, Sequence Analysis, Protein, Sequence Analysis, RNA, Vertebrates},
pages = {D733--745},
}
@article{rentzsch_cadd-spliceimproving_2021,
title = {{CADD}-{Splice}—improving genome-wide variant effect prediction using deep learning-derived splice scores},
volume = {13},
issn = {1756-994X},
url = {https://doi.org/10.1186/s13073-021-00835-9},
doi = {10.1186/s13073-021-00835-9},
abstract = {Splicing of genomic exons into mRNAs is a critical prerequisite for the accurate synthesis of human proteins. Genetic variants impacting splicing underlie a substantial proportion of genetic disease, but are challenging to identify beyond those occurring at donor and acceptor dinucleotides. To address this, various methods aim to predict variant effects on splicing. Recently, deep neural networks (DNNs) have been shown to achieve better results in predicting splice variants than other strategies.},
number = {1},
urldate = {2023-02-13},
journal = {Genome Medicine},
author = {Rentzsch, Philipp and Schubach, Max and Shendure, Jay and Kircher, Martin},
month = feb,
year = {2021},
pages = {31},
}