Skip to content

Rpackage #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 58 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
becf992
fix openmp compatibility
yukt May 13, 2019
c2bf8ce
clean up warnings
yukt May 13, 2019
97afe2c
Update README.md
yukt May 13, 2019
4eda293
make OpenMP optional
yukt May 13, 2019
5e63536
Merge branch 'rpackage' of https://github.com/yukt/dap into rpackage
yukt May 13, 2019
2e940a2
clean up
yukt May 13, 2019
b2dffc4
Update README.md
yukt May 13, 2019
a835761
mute stdout running in r
yukt May 13, 2019
e3350d9
adjust stdout for R
yukt May 14, 2019
f474b5b
optimize nmodel_vec
yukt May 14, 2019
3b56bf3
separate output
yukt May 14, 2019
cf97ac1
clean up sort_snp_vec
yukt May 14, 2019
8d8fd85
setup r package
yukt May 14, 2019
9f1fd9f
clean up cluster vectors in fine map
yukt May 14, 2019
52ec3d6
finish up r-package option 0
yukt May 14, 2019
30e0ab4
add options
yukt May 14, 2019
875441e
support read in sbams in R
yukt May 14, 2019
dc1eb8a
accept data frame
yukt May 15, 2019
400a8ab
improve R log output
yukt May 15, 2019
a092019
add summary function for dap
yukt May 15, 2019
1b9df07
add summary function for dap
yukt May 15, 2019
168346d
add summary function for dap
yukt May 15, 2019
c000863
add top models to summary
yukt May 15, 2019
94d2ec9
add documentations
yukt May 16, 2019
346a321
add license and data
yukt May 16, 2019
9950750
add citations
yukt May 16, 2019
4aa9f67
add quiet mode and README
yukt May 16, 2019
7dbcf5b
add quiet mode and README
yukt May 16, 2019
b6aaf54
fix bugs
yukt May 16, 2019
35d7847
update README and CITATION
yukt May 16, 2019
fba7da8
update README
yukt May 16, 2019
84b75ac
fix bugs
yukt May 16, 2019
140b008
try add openmp flag
yukt May 23, 2019
d125115
fix openmp bug
yukt May 23, 2019
7011c5e
inactive fopenmp
yukt May 23, 2019
70e6241
update structure of return object
yukt Jul 29, 2019
4c2d17d
update structure of return object
yukt Jul 29, 2019
cdf5d84
implement option 1 --scan
yukt Jul 29, 2019
3166100
add twas option to dap function
yukt Jul 29, 2019
0f5832f
remove cump
yukt Jul 29, 2019
4450f3f
fix print.summary.dap bug
yukt Aug 9, 2019
a13c652
finish twas
yukt Aug 12, 2019
915c911
coef->weight
yukt Aug 12, 2019
c0c0631
add data
yukt Aug 12, 2019
967a37c
dap.ss
yukt Aug 12, 2019
b74d526
optimize main.cpp
yukt Aug 12, 2019
bfdd700
polish doc
yukt Aug 12, 2019
b90667e
finish dap.z
yukt Aug 13, 2019
3f13dce
merge R-package/src with /src
yukt Aug 14, 2019
1637c4d
fix bug in initialize zval
yukt Aug 14, 2019
1a228ae
fix bug in initialize zval
yukt Aug 14, 2019
70e5617
fix bug in initialize zval
yukt Aug 14, 2019
fc7195b
finish extract.sbams (--dump_summary2)
yukt Aug 14, 2019
a0b0882
add options to dap.z and dap.ss
yukt Aug 14, 2019
42e4088
small change in doc
yukt Aug 15, 2019
e3cd190
fix bug when prior file contains new snp
yukt Aug 15, 2019
ff95f97
add prior option
yukt Aug 15, 2019
27119d7
add prior example
yukt Aug 15, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions dap_src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
cmake_minimum_required(VERSION 3.2)
project (dap-g VERSION 0.0.1)

set(CMAKE_CXX_STANDARD 11)
execute_process(COMMAND date OUTPUT_VARIABLE DATE OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND whoami OUTPUT_VARIABLE USER OUTPUT_STRIP_TRAILING_WHITESPACE)

add_definitions(-DVERSION="${PROJECT_VERSION}" -DUSER="${USER}" -DDATE="${DATE}")

find_package(OpenMP)
if (OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

find_package(GSL REQUIRED)

add_executable(dap-g
src/main.cc
src/MLR.h src/MLR.cc
src/parser.h src/parser.cc
src/controller.h src/controller.cc
)

target_link_libraries(dap-g PUBLIC GSL::gsl GSL::gslcblas)

install(TARGETS dap-g RUNTIME DESTINATION bin)
14 changes: 0 additions & 14 deletions dap_src/Makefile

This file was deleted.

17 changes: 17 additions & 0 deletions dap_src/R-package/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Package: dap
Type: Package
Title: Structured Bayesian Model Selection via Adaptive DAP Algorithm
Version: 0.1.0
Author: William Wen, Ketian Yu
Maintainer: Ketian Yu <yukt@umich.edu>
Description: An implementations of structured Bayesian model selection via adaptive deterministic approximation of posteriors (DAP) algorithm.
Applications include genetic association analysis integrating genomic annotations.
These methods are designed to perform rigorous enrichment analysis, QTL discovery and multi-SNP fine-mapping analysis in a highly efficient way.
License: GPL-3
Encoding: UTF-8
LazyData: true
LinkingTo:
Rcpp
Imports:
Rcpp
RoxygenNote: 6.1.1
595 changes: 595 additions & 0 deletions dap_src/R-package/LICENSE.md

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions dap_src/R-package/NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by roxygen2: do not edit by hand

S3method(print,dap)
S3method(print,summary.dap)
S3method(summary,dap)
export(dap)
export(dap.sbams)
export(dap.ss)
export(dap.z)
export(extract.sbams)
export(model.dap)
export(read.sbams)
export(scan)
export(scan.sbams)
export(twas)
importFrom(Rcpp,sourceCpp)
useDynLib(dap, .registration = TRUE)
15 changes: 15 additions & 0 deletions dap_src/R-package/R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

dap_main <- function(input_format, arg, quiet) {
.Call(`_dap_dap_main`, input_format, arg, quiet)
}

read_sbams <- function(data_file) {
.Call(`_dap_read_sbams`, data_file)
}

extract_sbams <- function(arg) {
.Call(`_dap_extract_sbams`, arg)
}

456 changes: 456 additions & 0 deletions dap_src/R-package/R/main.R

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions dap_src/R-package/R/sbams.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#' Read a sbams file
#'
#' This function will import the sbams file, automatically impute missing values with mean, regress pheno and geno out of controlled covariants if applicable, and generate an \R data frame which can be passed into the \code{\link{dap}} function.
#'
#' @param file file path to the sbams file
#' @return a data.frame with the first colunm as the normalized phenotype, and the following columns as the normalized genotypes.
#' @details Please refer to \url{https://github.com/xqwen/dap/tree/master/dap_src} for more details.
#' @examples
#' sbams.file = system.file("sbamsdat", "sim.1.sbams.dat", package = "dap")
#' sbams.dat = read.sbams(sbams.file)
#' test.dap.sbams.dat = dap(gene~., sbams.dat)
#'
#' @useDynLib dap, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @export
read.sbams <- function(file) {
result = .Call(`_dap_read_sbams`, PACKAGE = 'dap', file)
return(result)
}

#' Extract Sufficient Statistics from a sbams file
#'
#' The sufficient summary statistics refer to the following information: \itemize{
#' \item estimated effect size and corresponding estimated standard error from single SNP testing for each SNP
#' \item correlation matrix of SNPs
#' \item sample size
#' \item total sum of squares for the quantitative phenotype
#' }
#'
#' @param file file path to the sbams file
#' @param ens (optional) prior expected number of signals, \code{ens=1} by default.
#' @param pi1 (optional) the exchangeable prior probability, values \code{0<pi1<1} accepted. By default -1, \code{pi1=ens/p}, where \code{p} is number of SNPs in the input file.
#' @return \code{extract.sbams} returns a list containing the following components: \item{est}{a \code{data.frame} including snp names, estimated effect size and corresponding estimated standard error.}
#' \item{ld}{a \code{matrix} representing the correlation matrix of SNPs.}
#' \item{n}{sample size.}
#' \item{syy}{total sum of squares for the quantitative phenotype.}
#' \item{name}{name of the phenotype}
#' @examples
#' sbams.file = system.file("sbamsdat", "sim.1.sbams.dat", package = "dap")
#' ss = extract.sbams(sbams.file)
#' dap.ss(ss$est, ss$ld, ss$n, ss$syy, ss$name)
#'
#' @useDynLib dap, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @export
extract.sbams <- function(file, ens=1, pi1=-1) {
params = list(data=file)

if(class(ens)=="numeric" & ens > 0) params$ens=ens
if(class(pi1)=="numeric" & pi1>0 & pi1<1) params$pi1=pi1

result = .Call(`_dap_extract_sbams`, PACKAGE = 'dap', params)
return(result)
}

89 changes: 89 additions & 0 deletions dap_src/R-package/R/scan.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#' Single-SNP Association Analysis
#'
#' \code{scan} is used perform perform single-SNP fine-mapping analysis.
#'
#' @usage
#' scan(formula, data)
#'
#' @aliases scan
#' @param formula an object of class \code{formula} (or one that can be coerced to that class): a symbolic description of the model to be fitted.
#' @param data a data frame containing the variables in the model.
#' @return \code{scan} returns a \code{data.frame} containing effect estimates for each single SNP.
#' @examples
#' set.seed(0)
#' n = 100
#' p = 1000
#'
#' a1 = rnorm(n)
#' a2 = 10*a1+rnorm(n)
#' a3 = 2*a1+9*a2+rnorm(n)
#'
#' b1 = rnorm(n)
#' b2 = 8*b1+rnorm(n)
#'
#' x = matrix(rnorm(n*(p-5)), nrow=n)
#'
#' df = data.frame(a1,a2,a3,b1,b2,x)
#' df$y = 2*a1+b1+rnorm(n)
#'
#' test.scan = scan(y~., df)
#' head(test.scan)
#'
#' @seealso \code{\link{scan.sbams}} for a different interface directly analyzing a sbams-format file.
#' @useDynLib dap, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @export
scan = function(formula, data){
cl = match.call()
mf = match.call(expand.dots = FALSE)
m <- match(c("formula", "data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")

y = model.response(mf, "numeric")
attr(mt, "intercept") = 0
x <- model.matrix(mt, mf)

# impute with mean
y[is.na(y)] = mean(y, na.rm = TRUE)
x = apply(x, 2, function(t) replace(t, is.na(t), mean(t, na.rm=TRUE)))

params = list(x=x,y=y,pheno_name=all.vars(cl)[1],scan=TRUE)

result = .Call(`_dap_dap_main`, PACKAGE = 'dap', 2, params, 1)

return(result[[1]])
}

#' Single-SNP Association Analysis for SBAMS-format Data
#'
#' \code{scan.sbams} is an interface built especially for SBAMS-format files, which is designed to perform single-SNP fine-mapping analysis.
#'
#' @usage
#' scan.sbams(file)
#'
#' @aliases scan.sbams
#' @param file file path to a sbams file
#' @return \code{scan.sbams} returns a \code{data.frame} containing effect estimates for each single SNP.
#' @examples \dontrun{
#'
#' sbams.file = system.file("sbamsdat", "sim.1.sbams.dat", package = "dap")
#' test.scan.sbams = scan.sbams(sbams.file)
#' head(test.scan.sbams)
#'
#' }
#' @seealso \code{\link{read.sbams}} for reading in sbams-format files as an \R \code{data.frame} which can call the general version of \code{\link{scan}} function.
#' @useDynLib dap, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @export
scan.sbams <- function(file)
{
params = list(data=file, scan=TRUE)

result = .Call(`_dap_dap_main`, PACKAGE = 'dap', 1, params, 1)

return(result[[1]])
}
78 changes: 78 additions & 0 deletions dap_src/R-package/R/twas.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' TWAS Builder
#'
#' \code{twas} calculates weights on variants.
#'
#' @usage
#' twas(data, dap)
#'
#' @aliases twas
#' @param data a data frame containing the variables in the model.
#' @param dap a dap object
#' @return \code{twas} returns an object of \code{"twas"}, which is a list containing the following components:
#' \item{weight}{a data frame of variants and corresponding weights.}
#' \item{ER2}{the expected R-squared.}
#' @examples
#' set.seed(0)
#' n = 100
#' p = 1000
#'
#' a1 = rnorm(n)
#' a2 = 10*a1+rnorm(n)
#' a3 = 2*a1+9*a2+rnorm(n)
#'
#' b1 = rnorm(n)
#' b2 = 8*b1+rnorm(n)
#'
#' x = matrix(rnorm(n*(p-5)), nrow=n)
#'
#' df = data.frame(a1,a2,a3,b1,b2,x)
#' df$y = 2*a1+b1+rnorm(n)
#'
#' test.dap = dap(y~., df)
#' twas(df, test.dap)
#'
#' @useDynLib dap, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @export
twas = function(data, dap){

# sanity check
cl = match.call()
if(class(data)!="data.frame") stop(paste(deparse(cl$data),"is not a data.frame!"))
if(class(dap) !="dap") stop(paste(deparse(cl$dap)), "is not a dap object!")
if(dap$model.summary$N != nrow(data)) warning(paste("The number of samples in", deparse(cl$data), "does not match that in", deparse(cl$dap),"!"))
if(!(dap$model.summary$response %in% names(data))) stop(paste("The response variable", dap$model.summary$response, "is not found in", deparse(cl$data)))

models = dap$model.summary$model[dap$model.summary$model$size>0,]
vars = unique(unlist(sapply(models$configuration, function(x) strsplit(x,"\\+"))))
vars_not_in = !(vars%in% names(data))
if(sum(vars_not_in) > 0) stop(paste(paste(vars[vars_not_in], collapse=","), "NOT found in", deparse(cl$data)))

X = data[,vars]
y = data[,dap$model.summary$response]

# impute with mean
y[is.na(y)] = mean(y, na.rm = TRUE)
X = apply(X, 2, function(t) replace(t, is.na(t), mean(t, na.rm=TRUE)))

ymean = mean(y)
const = sum((y-ymean)^2)
coefs = rep(0, length(vars))
names(coefs) = vars
ER2 = 0

for(i in 1:nrow(models)){
this_vars = strsplit(models$configuration[i],"\\+")[[1]]
this_result = lm.fit(cbind(1,as.matrix(X[,this_vars])),y)
this_weight = models$posterior[i]
coefs[names(this_result$coef[-1])] = coefs[names(this_result$coef[-1])]+this_result$coef[-1]*this_weight
ER2 = ER2 + this_weight*(1-sum(this_result$residuals^2)/const)
}

index_sorted = sort(abs(coefs), decreasing = TRUE, index.return=TRUE)$ix
twas_df = data.frame(predictor=names(coefs), weight = coefs)[index_sorted,]
row.names(twas_df) = 1:length(coefs)
result = list(weights=twas_df, ER2=ER2)
class(result) = "twas"
return(result)
}
Loading