-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bbac9f1
commit 89a951c
Showing
13 changed files
with
904 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# dwi environment | ||
|
||
|
||
name: witty | ||
channels: | ||
- conda-forge | ||
|
||
dependencies: | ||
- dipy=1.7.0 | ||
- fury=0.7.1 | ||
- jupyter | ||
- jupyterlab | ||
- matplotlib=3.5.3 | ||
- nilearn=0.7.0 | ||
- osfclient=0.0.5 | ||
- python>=3.10 | ||
- pybids | ||
|
||
# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids` | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# dwi environment | ||
|
||
|
||
name: wittier | ||
channels: | ||
- conda-forge | ||
|
||
dependencies: | ||
- dipy=1.7.0 | ||
- fury=0.7.1 | ||
- jupyter | ||
- jupyterlab | ||
- matplotlib=3.5.3 | ||
- nilearn=0.7.0 | ||
- numexpr=2.8.4 | ||
- osfclient=0.0.5 | ||
- python>=3.10 | ||
- pybids | ||
|
||
# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids` | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
## in this script, I will try to clean up the dataexport of castor on november 11th. At this thime, score repeated data, volbrain data and freesurfer data have not been uploaded yet | ||
#weirdly enough, the age_T0_MRI_months is exported incorrectly by castor. I decided to limit my next export to just the baseline_MRI form at T0 and to merge it later | ||
library(dplyr) | ||
library(writexl) #to write the tables to excel files | ||
#read in the raw data file. For now in my personal folder, will get moved around when we arrive at the final datastructure | ||
raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";") | ||
ages <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data, as I apparently cannot export calculated fields from castor | ||
volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output | ||
|
||
#rename first columns in order to merge later. | ||
raw <- raw %>% | ||
rename(Participant_Id = `ï..Participant.Id`) | ||
|
||
ages <- ages %>% | ||
rename(Participant_Id = Participant.Id) | ||
|
||
volbrain <- volbrain %>% | ||
rename(Participant_Id = Participant.Id) | ||
|
||
#I decided not to clean it up. Instead. Just start with creating the tables. | ||
|
||
table1 <- raw %>% select(Participant_Id, Hydrea_at_scan_T0, brick_genotype, mothersbirth, fathersbirth) | ||
|
||
#import age in months from the ages df | ||
table1 <- table1 %>% | ||
left_join(select(ages, Participant_Id, Age_at_scan_m_T0X, gender_BRICK), by = "Participant_Id") | ||
|
||
#add a column in years. now we have the ingredients for table1 | ||
table1 <- table1 %>% | ||
mutate(Age_at_scan_years = round(Age_at_scan_m_T0X / 12, 1)) | ||
|
||
#Table 2: Lab values including HbF | ||
|
||
table2 <- raw %>% select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, SCHWARTZ_Bedsite0, FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120, HPLC_HBF_T0, HPLC_HBF_date_T0, HPLC_HbS_T0, HPLC_HbS_date_T0) | ||
|
||
#Table 3: Descriptives on radiology reports | ||
table3 <- raw %>% select(Participant_Id,Measurement_moment_QC_T1w_T0, Score_QC_T1w_T0, Exclude_Score_QC_T1w_T0, Remarks_Score_QC_T1w_T0, Screened_for_WMH_T0, WMH_observed_T0, Vasculature_examined_T0, Vascular_malformations_T0, Microbleeds_screened_T0, Microbleeds_present_T0, Incidental_finding_T0, Marjolein_T0, Remarks_MRI_T0) | ||
|
||
#Table 4: White matter hyperintensities | ||
table4 <- volbrain %>% | ||
select( | ||
Participant_Id, dl_Sex_T0, dl_Age_T0, | ||
dl_Total_lesion_count_T0, dl_Total_lesion_volume_.absolute._cm3_T0, | ||
dl_Total_lesion_volume_.normalized._._T0, dl_Total_lesion_burden_T0, | ||
dl_Periventricular_lesion_count_T0, dl_Periventricular_lesion_volume_.absolute._cm3_T0, | ||
dl_Periventricular_lesion_volume_.normalized._._T0, dl_Periventricular_lesion_burden_T0, | ||
dl_Deep_white_lesion_count_T0, dl_Deep_white_lesion_volume_.absolute._cm3_T0, | ||
dl_Deep_white_lesion_volume_.normalized._._T0, dl_Deep_white_lesion_burden_T0, | ||
dl_Juxtacortical_lesion_count_T0, dl_Juxtacortical_lesion_volume_.absolute._cm3_T0, | ||
dl_Juxtacortical_lesion_volume_.normalized._._T0, dl_Juxtacortical_lesion_burden_T0, | ||
dl_Infratentorial_lesion_count_T0, dl_Infratentorial_lesion_volume_.absolute._cm3_T0, | ||
dl_Infratentorial_lesion_volume_.normalized._._T0, dl_Infratentorial_lesion_burden_T0 | ||
) | ||
#Table 5: Recon-all volumetrics (white, grey and subcortical matter) with z-scores for 2 different kinds of growth charts. | ||
# Calculate the Total Cortical Volume and Total Cerebellar Volume | ||
table5 <- ages %>% | ||
mutate( | ||
Total_Cortical_Volume = CortexVol + Left.Cerebellum.Cortex + Right.Cerebellum.Cortex, | ||
Total_Cerebellar_Volume = Left.Cerebellum.Cortex + Right.Cerebellum.Cortex | ||
) %>% | ||
|
||
# Select relevant columns for the table and rename for clarity | ||
select( | ||
`Participant ID` = Participant_Id, | ||
`Age (years)` = Age_at_scan_y_T0X, | ||
`Gender` = gender_BRICK, | ||
|
||
# Total Volumes | ||
`Total Cortical Volume` = Total_Cortical_Volume, | ||
`Total White Matter Volume` = CerebralWhiteMatterVol, | ||
`Total Gray Matter Volume` = TotalGrayVol, | ||
`Total Cerebellar Volume` = Total_Cerebellar_Volume, | ||
|
||
# Subcortical Structures | ||
`Thalamus Left` = Left.Thalamus.Proper, | ||
`Thalamus Right` = Right.Thalamus.Proper, | ||
`Caudate Left` = Left.Caudate, | ||
`Caudate Right` = Right.Caudate, | ||
`Putamen Left` = Left.Putamen, | ||
`Putamen Right` = Right.Putamen, | ||
`Pallidum Left` = Left.Pallidum, | ||
`Pallidum Right` = Right.Pallidum, | ||
`Hippocampus Left` = Left.Hippocampus, | ||
`Hippocampus Right` = Right.Hippocampus | ||
) | ||
#Table 5_1: Neuropsychologic outcomes(WISCV&WAISIV) + Education level parents | ||
#WiscV | ||
table6_wisc <- raw %>% | ||
select( | ||
Participant_Id, | ||
scorePrimIndex_TIQ_S_1, # Full Scale IQ | ||
scorePrimIndex_VBI_IQ_1, # Verbal Comprehension Index | ||
scorePrimIndex_VRI_IQ_1, # Visual Spatial Index | ||
scorePrimIndex_FRI_IQ_1, # Fluid Reasoning Index | ||
scorePrimIndex_WgI_IQ_1, # Working Memory Index | ||
scorePrimIndex_VsI_IQ_1, # Processing Speed Index | ||
scoreSecIndex_KRI_IQ_1, # Quantitative Reasoning Index | ||
scoreSecIndex_AWI_IQ_1, # Auditory Working Memory Index | ||
scoreSecIndex_NVI_IQ_1, # Nonverbal Index | ||
scoreSecIndex_AVI_IQ_1, # General Ability Index | ||
) | ||
|
||
#waisIV | ||
|
||
table6_wais <- raw %>% | ||
select( | ||
"Participant_Id", | ||
"ScoreTIQ_1", # Full Scale IQ (FSIQ) | ||
"ScoreSomVBI_1", # Verbal Comprehension Index (VCI) | ||
"ScorePerVBI_1", # Perceptual Verbal Comprehension Index (related to FRI) | ||
"ScoreBIVBI_1", # Blocked Verbal Comprehension Index (related to FRI) | ||
"ScoreWgI_1", # Working Memory Index (WMI) | ||
"ScoreVsI_1" # Processing Speed Index (PSI) | ||
) | ||
|
||
#create a real table1 | ||
library(tableone) | ||
|
||
# Specify the categorical and continuous variables | ||
categorical_vars <- c("Hydrea_at_scan_T0", "brick_genotype", "gender_BRICK") | ||
continuous_vars <- c("Age_at_scan_years") | ||
|
||
# Specify the categorical and continuous variables for CreateTableOne | ||
vars <- c(categorical_vars, continuous_vars) | ||
|
||
# Create the table using CreateTableOne | ||
table1_summary <- CreateTableOne(vars = vars, data = combined_data, factorVars = categorical_vars) | ||
|
||
# Print the table1 summary | ||
print(table1_summary) | ||
|
||
#I want the median and IQR for HbS | ||
# Manually calculate median and IQR for 'HPLC_HbS_T0' | ||
hplc_summary <- combined_data %>% | ||
summarise( | ||
Median_HPLC_HbS_T0 = median(HPLC_HbS_T0, na.rm = TRUE), | ||
IQR_HPLC_HbS_T0 = IQR(HPLC_HbS_T0, na.rm = TRUE) | ||
) | ||
|
||
# Manually calculate mean and SD for 'HB0' | ||
hb0_summary <- combined_data %>% | ||
summarise( | ||
Mean_HB0 = mean(HB0, na.rm = TRUE), | ||
SD_HB0 = sd(HB0, na.rm = TRUE) | ||
) | ||
|
||
# Print the custom summaries for HPLC_HbS_T0 and HB0 | ||
print("HPLC_HbS_T0 - Median and IQR:") | ||
print(hplc_summary) | ||
|
||
print("HB0 - Mean and SD:") | ||
print(hb0_summary) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
library(dplyr) | ||
library(tableone) | ||
library(writexl) | ||
library(tibble) # For rownames_to_column() | ||
|
||
# Select relevant columns from table2 | ||
table2_filtered <- table2 %>% | ||
select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, | ||
TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, | ||
FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, | ||
TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120) | ||
|
||
|
||
# Define continuous variables | ||
continuous_vars <- c("ERY0", "HB0", "MCV0", "HT0", "reticulocyte_count_percentage_1", | ||
"LEU0", "TROMBO0", "FE0", "FERT0", "TRAF0", "TSAT0", "ALAT0", | ||
"LDH0", "TBIL0", "DBIL0", "KREA0", "FOLZ0", "UREU0", "NA0", | ||
"K00", "VITD0", "ASAT0", "AFOS0", "GGT0", "CRP0", "KREA_U0", | ||
"TE_U0", "TE_U_KR0", "ALB_U0", "ALB_U_KR0", "B120") | ||
|
||
# Create a named vector of labels for the continuous variables, with units | ||
labels <- c( | ||
"ERY0" = "Red blood cell (RBC) count (x10^12/L)", | ||
"HB0" = "Hemoglobin (g/dL)", | ||
"MCV0" = "Mean corpuscular volume (MCV) (fL)", | ||
"HT0" = "Hematocrit (HCT) (%)", | ||
"reticulocyte_count_percentage_1" = "Reticulocyte count (%)", | ||
"LEU0" = "White blood cell (WBC) count (x10^9/L)", | ||
"TROMBO0" = "Platelet count (x10^9/L)", | ||
"FE0" = "Serum iron (µg/dL)", | ||
"FERT0" = "Ferritin serum (ng/mL)", | ||
"TRAF0" = "Transferrin (mg/dL)", | ||
"TSAT0" = "Transferrin saturation (%)", | ||
"ALAT0" = "Alanine transaminase (ALT) (U/L)", | ||
"LDH0" = "Lactate Dehydrogenase (LDH) (U/L)", | ||
"TBIL0" = "Total bilirubin (mg/dL)", | ||
"DBIL0" = "Conjugated/direct bilirubin (mg/dL)", | ||
"KREA0" = "Creatinine (mg/dL)", | ||
"FOLZ0" = "Folate (ng/mL)", | ||
"UREU0" = "Urea (mg/dL)", | ||
"NA0" = "Sodium (mmol/L)", | ||
"K00" = "Potassium (mmol/L)", | ||
"VITD0" = "Vitamin D (ng/mL)", | ||
"ASAT0" = "Aspartate transaminase (AST) (U/L)", | ||
"AFOS0" = "Alkaline phosphatase (ALP) (U/L)", | ||
"GGT0" = "Gamma-glutamyl transferase (GGT) (U/L)", | ||
"CRP0" = "C-reactive protein (CRP) (mg/L)", | ||
"KREA_U0" = "Creatinine (Urine) (mg/dL)", | ||
"TE_U0" = "Iron (Urine) (µg/dL)", | ||
"TE_U_KR0" = "Iron (Urine, Kr) (µg/dL)", | ||
"ALB_U0" = "Albumin (Urine) (g/dL)", | ||
"ALB_U_KR0" = "Albumin (Urine, Kr) (g/dL)", | ||
"B120" = "Bilirubin 120 (mg/dL)" | ||
) | ||
|
||
# Create a summary table with continuous variables | ||
table2_summary <- CreateTableOne( | ||
vars = continuous_vars, | ||
data = table2_filtered, | ||
factorVars = character(0) # No factor variables | ||
) | ||
|
||
# Convert the table summary to a data frame | ||
table2_df <- as.data.frame(print(table2_summary, quote = FALSE, noSpaces = TRUE)) | ||
|
||
# Add the labels as a new column by matching with continuous_vars | ||
table2_df <- table2_df %>% | ||
rownames_to_column(var = "Variable") %>% | ||
mutate(Variable = labels[continuous_vars]) | ||
|
||
# Rename the 'Overall' column to "Mean (SD)" to indicate the values | ||
colnames(table2_df)[colnames(table2_df) == "Overall"] <- "Mean (SD)" | ||
|
||
|
||
# View the final table | ||
print(table2_df) | ||
#question: weird results, how do we impute missing values? | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#this script is to find the gentotypes that apply to the bridge subset of brick for the ash poster 2024 | ||
library(dplyr) | ||
library(readr) | ||
library(tableone) | ||
|
||
#load dataset with genotypes | ||
genotype <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein_with_genotype.csv") | ||
|
||
# Create a new column with rounded "Age_at_scan_y_T0X" values in the genotype DataFrame. This is needed for an exact match | ||
genotype <- genotype %>% | ||
mutate(Age_at_scan_y_T0X = round(Age_at_scan_y_T0X, 2)) | ||
|
||
# Define ages for each gender | ||
ages_male <- c(9.94, 11.74, 12.73, 12.81, 13.94, 15.09, 15.22, 16.02, 16.65, 17.4, 17.48) | ||
ages_female <- c(8.05, 9.16, 10.32, 13.72, 13.77, 14.85, 16.25) | ||
|
||
# Filter genotype DataFrame based on matching ages for males and females separately | ||
matched_data_male <- genotype %>% | ||
filter(Age_at_scan_y_T0X %in% ages_male & gender_BRICK == 1) %>% | ||
select(Participant_Id, brick_genotype, Age_at_scan_y_T0X, gender_BRICK) | ||
|
||
matched_data_female <- genotype %>% | ||
filter(Age_at_scan_y_T0X %in% ages_female & gender_BRICK == 2) %>% | ||
select(Participant_Id, brick_genotype, Age_at_scan_y_T0X, gender_BRICK) | ||
|
||
# Assign to BRIDGE_male and BRIDGE_female DataFrames | ||
BRIDGE_male <- matched_data_male | ||
BRIDGE_female <- matched_data_female | ||
|
||
# Display the resulting DataFrames | ||
print("BRIDGE_male:") | ||
print(BRIDGE_male) | ||
|
||
print("BRIDGE_female:") | ||
print(BRIDGE_female) | ||
|
||
#not an exact match. let's just compare it manually | ||
|
||
genotype_male <- genotype %>% filter(gender_BRICK == 1) | ||
|
||
genotype_female <- genotype %>% filter(gender_BRICK == 2) | ||
|
||
#now some relevant extra rows are the hydrea use row, Hb and HbF that are interesting for table 1 | ||
#load the bridge subset of participants, I manually added gender, brick number and genotype to the bridge subset. I've also ran th script of 1162024Tables_paper_1.R and 1172024_Table2_Growthcurves_script | ||
#beforehand | ||
male_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_boys.csv") | ||
female_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_girls.csv") | ||
|
||
#from df table1 I need "Hydrea_at_scan_T0" and from dataframe "table2_filtered" I need HB0. The NA values need to be kicked out. merge on Participant_Id into new df for males and females | ||
|
||
# Extract the relevant columns from table1, table2_filtered, and raw for males | ||
table1_filtered_male <- table1 %>% | ||
select(Participant_Id, Hydrea_at_scan_T0) %>% | ||
filter(!is.na(Hydrea_at_scan_T0)) | ||
|
||
table2_filtered_male <- table2_filtered %>% | ||
select(Participant_Id, HB0) %>% | ||
filter(!is.na(HB0)) | ||
|
||
raw_filtered_male <- raw %>% | ||
select(Participant_Id, HPLC_HbS_T0) %>% | ||
filter(!is.na(HPLC_HbS_T0)) | ||
|
||
# Merge male data with additional columns | ||
male_data_merged <- male_brick %>% | ||
inner_join(table1_filtered_male, by = "Participant_Id") %>% | ||
inner_join(table2_filtered_male, by = "Participant_Id") %>% | ||
inner_join(raw_filtered_male, by = "Participant_Id") | ||
|
||
# Repeat the process for females | ||
table1_filtered_female <- table1 %>% | ||
select(Participant_Id, Hydrea_at_scan_T0) %>% | ||
filter(!is.na(Hydrea_at_scan_T0)) | ||
|
||
table2_filtered_female <- table2_filtered %>% | ||
select(Participant_Id, HB0) %>% | ||
filter(!is.na(HB0)) | ||
|
||
raw_filtered_female <- raw %>% | ||
select(Participant_Id, HPLC_HbS_T0) %>% | ||
filter(!is.na(HPLC_HbS_T0)) | ||
|
||
# Merge female data with additional columns | ||
female_data_merged <- female_brick %>% | ||
inner_join(table1_filtered_female, by = "Participant_Id") %>% | ||
inner_join(table2_filtered_female, by = "Participant_Id") %>% | ||
inner_join(raw_filtered_female, by = "Participant_Id") | ||
|
||
# Display the resulting data frames | ||
print("Male Data Merged:") | ||
print(male_data_merged) | ||
|
||
print("Female Data Merged:") | ||
print(female_data_merged) | ||
|
||
|
||
#now make the dfs one and create a table 1 | ||
# Merge male and female brick datasets into one, keeping the 'gender' column intact | ||
combined_data <- bind_rows(male_data_merged, female_data_merged) | ||
|
||
# Define categorical and continuous variables | ||
categorical_vars <- c("Hydrea_at_scan_T0", "genotype", "gender_brick") | ||
continuous_vars <- c("AgeChild", "HB0", "HPLC_HbS_T0") | ||
|
||
# Use the table1 package to create a summary table | ||
table1_summary <- CreateTableOne(~ Hydrea_at_scan_T0 + genotype + gender_brick + | ||
AgeChild + HB0 + HPLC_HbS_T0, | ||
data = combined_data, | ||
render.categorical = "Freq", | ||
render.continuous = c("Median", "IQR")) | ||
|
||
# Print the table1 summary | ||
print(table1_summary) | ||
|
||
|
||
|
||
|
Oops, something went wrong.