Skip to content

Commit

Permalink
inconsistent env_broad_scale #77
Browse files Browse the repository at this point in the history
  • Loading branch information
jeaniceangelica committed Apr 28, 2022
1 parent ca76f01 commit 99e4bb8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 0 deletions.
31 changes: 31 additions & 0 deletions sample_annotator/non_edge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import sqlite3
import pandas as pd
import numpy as np
import re

def to_csv():
db = sqlite3.connect('biosample.db')
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
table.to_csv(table_name + '.csv', index_label='index')
cursor.close()
db.close()
to_csv()

#tried dumping env_mapping into biosample instead
map = pd.read_csv('new_env_mapping.csv')
main = pd.read_csv('harmonized_wide_sel_envs.csv')

main['env_broad_scale'] = main['env_broad_scale'].str.lower()
main['env_broad_scale'] = main['env_broad_scale'].str.replace('envo:','', regex=True)
merge_table = main.merge(map, left_on = 'env_broad_scale', right_on = 'label', how='left')
merge_table['broad_scale_fixed'] = merge_table['env_broad_scale'].astype(str) + ' ' + '['+merge_table['term_id'].astype(str) +']'

#still need to change the 'nan[nan]' string values to NaN (np.nan) values
#not sure how
merge_table['broad_scale_fixed'] = merge_table['broad_scale_fixed'].replace('nan[nan]',np.nan)
merge_table['broad_scale_fixed'].value_counts()
10 changes: 10 additions & 0 deletions sql/env_mapping.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- SQLITE
CREATE TABLE new_env_mapping AS
SELECT distinct
subject as term_id,
value as label
from
statements s
where
predicate = 'rdfs:label'
and subject like 'ENVO:%';
10 changes: 10 additions & 0 deletions sql/merged_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- SQLite
-- before this i created a new table for the mapping called new_env_mapping and
-- dumpped the harmonized table into envo.db
CREATE TABLE merged AS
SELECT *
FROM
harmonized_wide_sel_envs big_table
LEFT JOIN new_env_mapping AS map
ON
big_table.env_broad_scale = map.label;

0 comments on commit 99e4bb8

Please sign in to comment.