diff --git a/sample_annotator/non_edge.py b/sample_annotator/non_edge.py new file mode 100644 index 0000000..8f0df58 --- /dev/null +++ b/sample_annotator/non_edge.py @@ -0,0 +1,31 @@ +import sqlite3 +import pandas as pd +import numpy as np +import re + +def to_csv(): + db = sqlite3.connect('biosample.db') + cursor = db.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") + tables = cursor.fetchall() + for table_name in tables: + table_name = table_name[0] + table = pd.read_sql_query("SELECT * from %s" % table_name, db) + table.to_csv(table_name + '.csv', index_label='index') + cursor.close() + db.close() +to_csv() + +#tried dumping env_mapping into biosample instead +map = pd.read_csv('new_env_mapping.csv') +main = pd.read_csv('harmonized_wide_sel_envs.csv') + +main['env_broad_scale'] = main['env_broad_scale'].str.lower() +main['env_broad_scale'] = main['env_broad_scale'].str.replace('envo:','', regex=True) +merge_table = main.merge(map, left_on = 'env_broad_scale', right_on = 'label', how='left') +merge_table['broad_scale_fixed'] = merge_table['env_broad_scale'].astype(str) + ' ' + '['+merge_table['term_id'].astype(str) +']' + +#still need to change the 'nan[nan]' string values to NaN (np.nan) values +#not sure how +merge_table['broad_scale_fixed'] = merge_table['broad_scale_fixed'].replace('nan[nan]',np.nan) +merge_table['broad_scale_fixed'].value_counts() \ No newline at end of file diff --git a/sql/env_mapping.sql b/sql/env_mapping.sql new file mode 100644 index 0000000..8c70d63 --- /dev/null +++ b/sql/env_mapping.sql @@ -0,0 +1,10 @@ +-- SQLITE +CREATE TABLE new_env_mapping AS +SELECT distinct + subject as term_id, + value as label +from + statements s +where + predicate = 'rdfs:label' + and subject like 'ENVO:%'; \ No newline at end of file diff --git a/sql/merged_table.sql b/sql/merged_table.sql new file mode 100644 index 0000000..90f84dc --- /dev/null +++ b/sql/merged_table.sql @@ -0,0 +1,10 @@ +-- SQLite +-- before this i created a new table for the mapping called new_env_mapping and +-- dumpped the harmonized table into envo.db +CREATE TABLE merged AS + SELECT * + FROM + harmonized_wide_sel_envs big_table + LEFT JOIN new_env_mapping AS map + ON + big_table.env_broad_scale = map.label; \ No newline at end of file