From 361a8bb9ae7b9787c126149e41bcac9aab9e435c Mon Sep 17 00:00:00 2001 From: Jaclyn Beck Date: Wed, 18 Dec 2024 12:13:05 -0800 Subject: [PATCH] Bumped version of Uniprot mapping file and pharos class file --- config.yaml | 8 ++++---- .../preprocessing/AG-1388_ENSG_Uniprot_Mapping.ipynb | 4 +++- test_config.yaml | 8 ++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/config.yaml b/config.yaml index 14cdcc78..b095e808 100644 --- a/config.yaml +++ b/config.yaml @@ -167,10 +167,10 @@ datasets: id: syn51942280.4 format: csv - name: ensg_to_uniprot_mapping - id: syn54113663.3 + id: syn54113663.5 format: tsv - name: pharos_classes - id: syn64123611.1 + id: syn64123611.2 format: csv final_format: json custom_transformations: @@ -203,8 +203,8 @@ datasets: - syn27211878.2 - *genes_biodomains_provenance - syn51942280.4 - - syn54113663.3 - - syn64123611.1 + - syn54113663.5 + - syn64123611.2 agora_rename: symbol: hgnc_symbol destination: *dest diff --git a/data_analysis/agora/notebooks/preprocessing/AG-1388_ENSG_Uniprot_Mapping.ipynb b/data_analysis/agora/notebooks/preprocessing/AG-1388_ENSG_Uniprot_Mapping.ipynb index 200019b3..d9069d98 100644 --- a/data_analysis/agora/notebooks/preprocessing/AG-1388_ENSG_Uniprot_Mapping.ipynb +++ b/data_analysis/agora/notebooks/preprocessing/AG-1388_ENSG_Uniprot_Mapping.ipynb @@ -65,7 +65,7 @@ "source": [ "ensembl_ids = preprocessing_utils.get_all_adt_ensembl_ids(\n", " config_filename=config_filename,\n", - " exclude_files=[\"gene_metadata\", \"druggability\"],\n", + " exclude_files=[\"gene_metadata\", \"ensg_to_uniprot_mapping\"],\n", ")\n", "print(\"\")\n", "print(str(len(ensembl_ids)) + \" Ensembl IDs found.\")" @@ -125,6 +125,8 @@ "mapping[\"OPTIONAL_INFORMATION\"].loc[\n", " mapping[\"RESOURCE_IDENTIFIER\"].isin(targets_df[\"ensembl_gene_id\"])\n", "] = nomination_string\n", + "\n", + "mapping = mapping.sort_values(by=\"RESOURCE_IDENTIFIER\")\n", "mapping" ] }, diff --git a/test_config.yaml b/test_config.yaml index 8372878b..6ed5754f 100644 --- a/test_config.yaml +++ b/test_config.yaml @@ -167,10 +167,10 @@ datasets: id: syn51942280.4 format: csv - name: ensg_to_uniprot_mapping - id: syn54113663.3 + id: syn54113663.5 format: tsv - name: pharos_classes - id: syn64123611.1 + id: syn64123611.2 format: csv final_format: json custom_transformations: @@ -203,8 +203,8 @@ datasets: - syn27211878.2 - *genes_biodomains_provenance - syn51942280.4 - - syn54113663.3 - - syn64123611.1 + - syn54113663.5 + - syn64123611.2 agora_rename: symbol: hgnc_symbol destination: *dest