Skip to content
This repository was archived by the owner on Mar 6, 2025. It is now read-only.

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
wzbillings committed Jul 5, 2024
0 parents commit 2dcbaa2
Show file tree
Hide file tree
Showing 132 changed files with 28,267 additions and 0 deletions.
1 change: 1 addition & 0 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
source("renv/activate.R")
55 changes: 55 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# History files
.Rhistory
.Rapp.history

# Session Data files
.RData
.RDataTmp

# User-specific files
.Ruserdata

# Example code in package build process
*-Ex.R

# Output files from R CMD build
/*.tar.gz

# Output files from R CMD check
/*.Rcheck/

# RStudio files
.Rproj.user/

# produced vignettes
vignettes/*.html
vignettes/*.pdf

# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth

# knitr and R markdown default cache directories
*_cache/
/cache/

# Temporary files created by R markdown
*.utf8.md
*.knit.md

# R Environment Variables
.Renviron

# pkgdown site
docs/

# translation temp files
po/*~

# Large files
results/largefiles/

# Temporary word files
*$*.docx

# Ignore the supplement cause quarto is making the rendered doc huge
products/manuscript/supplement.docx
13 changes: 13 additions & 0 deletions Billings-2024-HD-Heterologous.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: No
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
659 changes: 659 additions & 0 deletions LICENSE.md

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# High dose inactivated influenza vaccine inconsistently improves heterologous antibody responses in an elderly human cohort

This repo contains the code, data, and results associated with the manuscript
"High dose inactivated influenza vaccine inconsistently improves heterologous antibody responses in an elderly human cohort"
by Billings et al.

## Reproducability instructions

* You can find the reproducibility instructions in [the Supplement](./products/manuscript/supplement.pdf).
* This code was generated with R 4.4.1 and the package versions specified in
`renv`. If you don't install R 4.4.1 and use `renv::restore()` to get the
correct package versions, it might not work.

## Licensing

All code written by us is licensed under the [GNU Affero General Public License,
version 3](./LICENSE.md).

## Contact information

Please contact Zane Billings (wesley dot billings at uga dot edu) or
Andreas Handel (ahandel at uga dot edu) for more information.

<!-- end of file -->
141 changes: 141 additions & 0 deletions code/01-Data-Processing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
###
# Data Processing
# Zane Billings
# 2024-01-29
# Starting with the cleaned data from ahgroup/UGAFluVac repo, we need to do
# some data processing to get the data ready for our analyses.
# This script processes and filters the data.
###

# Setup ========================================================================
# Declare necessary dependencies
library(readr, include.only = NULL)
library(here, include.only = NULL)
library(dplyr, include.only = NULL)
library(forcats, include.only = NULL)

# Load the "raw" data
# This data is the output of the cleaning process from the repo
# https://github.com/ahgroup/UGAFluVac-data/, which includes the raw excel
# data files. We pulled the cleaned data from that repo on 2024-01-29.
raw_data <- readr::read_rds(here::here("data", "raw", "clean-data.Rds"))

# Data cleaning ================================================================
# First filter out the records we don't want
dat_filtered <-
raw_data |>
dplyr::filter(
# Remove individuals with intradermal vaccines
dose %in% c("SD", "HD"),
# Remove individuals with infinite titerincrease (1 person who has a
# pretiter of 0, probably a data entry error)
is.finite(titerincrease),
# Remove flu B assays
strain_type %in% c("H1N1", "H3N2"),
# Only individuals >= age 65 were allowed to get the HD vaccine, so we
# want to filter out any records from individuals below that age.
age >= 65
) |>
# Drop the missing factor levels that remain after filtering
dplyr::mutate(
# Change the sichuan 1989 (incorrect) records to 1987 so they are all
# listed as the same strain, which they are, due to the insane data
# cleaning error. SHould eventually be moved to the UGAFluVac repo.
strains_fullname = forcats::fct_recode(
strains_fullname,
"H3N2-Sichuan-1987" = "H3N2-Sichuan-1989"
),
# Now drop the factor levels.
dplyr::across(
c(dose, strain_type, strains_fullname),
forcats::fct_drop
)
)

# Now we need to make edits to certain variables and drop the variables that
# we don't need
dat_clean <-
dat_filtered |>
# First we need to "condense" the vaccine name variables. We just need one
# vaccine name variable that has the name of the H1 vaccine strain for H1
# assays, and the name of the H3 vaccine strain for H3 assays.
dplyr::mutate(
vaccine_name = dplyr::if_else(
strain_type == "H1N1",
h1n1_vaccine_fullname,
h3n2_vaccine_fullname
),
.after = dose
) |>
# Now drop the *_vaccine_fullname columns -- we'll also drop out the
# variables we don't plan to use at all in this analysis.
dplyr::select(
-dplyr::ends_with("vaccine_fullname"),
-c(uniq_id, id, bmi, days_since_vac, date_vaccinated, race, gender),
-dplyr::contains("particip")
) |>
# Next we need to clean up the date of birth variable. We just need a variable
# for the birth year.
dplyr::mutate(
birth_year = substring(dateofbirth, 1, 4) |> as.integer(),
.keep = "unused"
) |>
# Now we'll create a fold change variable which is 2 ^ (titerincrease)
# which can make plotting easier
dplyr::mutate(fold_change = 2 ^ titerincrease, .before = titerincrease) |>
# Next let's transform the season variable into an ordered factor to ensure
# it behaves the way we want during plotting. Since the levels are already
# alphabetical it should be fine but better to go ahead and do it.
dplyr::mutate(season = factor(season, ordered = TRUE)) |>
# Finally let's clean up the names of the some of the variables.
dplyr::rename(
strain_name = strains_fullname,
posttiter = postiter,
log_pretiter = prevactiter,
log_posttiter = postvactiter,
titer_increase = titerincrease,
id = subject_id
)

# Now we need to make a few additional changes before we can pass the
# data to our models.
# First get the minimum birth year for making a centered/scaled/whatever version
# of the birth year variable.
MIN_BY <- raw_data$dateofbirth |> substring(1, 4) |> as.integer() |> min()

# Now we make those changes in the dataset.
dat_model <-
dat_clean |>
dplyr::mutate(
# We need to make versions of the time and birth year variables that are
# closer to being scale-free than the current versions -- models that
# have those numbers in the thousands have worse conditioning problems that
# can lead to numerical issues in an otherwise fine model. So we'll scale
# the year variable by subtracting 2013, the first year, and we'll scale
# the birth_year variable by subtracting the minimum birth year.
year_c = year - 2013,
birth_year_c = birth_year - MIN_BY,
# We'll also center the age variable by subtracting 65.
age_c = age - 65
) |>
# Finally let's reorder the variables. This is for no practical purpose
# but it makes me happier.
dplyr::select(
id, study, season, year, year_c, age, age_c, birth_year, birth_year_c,
dose, vaccine_name, strain_name, strain_type, pretiter, log_pretiter,
posttiter, log_posttiter, fold_change, titer_increase, seroconversion,
seroprotection
)

# Save data to file ============================================================
readr::write_rds(
dat_model,
file = here::here("data", "processed", "model-data.Rds"),
compress = "gz"
)
readr::write_csv(
dat_model,
file = here::here("data", "processed", "model-data.csv")
)

# END OF FILE ==================================================================
Loading

0 comments on commit 2dcbaa2

Please sign in to comment.