From f15f23e574491dd63f984eeee5c66fbe4d430ea0 Mon Sep 17 00:00:00 2001 From: Xiuwen Zheng Date: Sun, 30 Sep 2018 18:15:11 -0700 Subject: [PATCH] a new option 'useMatrix' in snpgdsIndivBeta() --- DESCRIPTION | 4 +- NEWS | 4 +- R/IBD.R | 11 +++-- README.md | 2 +- inst/unitTests/test_rel.R | 6 +++ man/snpgdsIndivBeta.Rd | 4 +- src/SNPRelate.cpp | 4 +- src/genBeta.cpp | 95 +++++++++++++++++++++++++++------------ 8 files changed, 89 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 193b78b..e221b38 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: SNPRelate Type: Package Title: Parallel Computing Toolset for Relatedness and Principal Component Analysis of SNP Data -Version: 1.15.5 -Date: 2018-09-23 +Version: 1.15.6 +Date: 2018-09-30 Depends: R (>= 2.15), gdsfmt (>= 1.8.3) LinkingTo: gdsfmt Imports: methods diff --git a/NEWS b/NEWS index e36970e..55e0330 100644 --- a/NEWS +++ b/NEWS @@ -2,8 +2,8 @@ CHANGES IN VERSION 1.15.1-1.15.5 ------------------------- o a new option 'useMatrix' to allow for the packed symmetric matrix using - the Matrix package in `snpgdsIBDMoM()`, `snpgdsIBDKING()` and - `snpgdsIBS()` + the Matrix package in `snpgdsIBDMoM()`, `snpgdsIBDKING()`, + `snpgdsIBS()` and `snpgdsIndivBeta()` o fix a bug of missing sample and SNP IDs in the output of `snpgdsIndInb()` diff --git a/R/IBD.R b/R/IBD.R index f067ac9..c98964d 100755 --- a/R/IBD.R +++ b/R/IBD.R @@ -833,7 +833,7 @@ snpgdsFst <- function(gdsobj, population, method=c("W&C84", "W&H02"), snpgdsIndivBeta <- function(gdsobj, sample.id=NULL, snp.id=NULL, autosome.only=TRUE, remove.monosnp=TRUE, maf=NaN, missing.rate=NaN, method=c("weighted"), inbreeding=TRUE, num.thread=1L, with.id=TRUE, - verbose=TRUE) + useMatrix=FALSE, verbose=TRUE) { # check and initialize ... method <- match.arg(method) @@ -843,13 +843,16 @@ snpgdsIndivBeta <- function(gdsobj, sample.id=NULL, snp.id=NULL, autosome.only=autosome.only, remove.monosnp=remove.monosnp, maf=maf, missing.rate=missing.rate, num.thread=num.thread, verbose=verbose) - stopifnot(is.logical(with.id)) + stopifnot(is.logical(with.id), length(with.id)==1L) + stopifnot(is.logical(useMatrix), length(useMatrix)==1L) # call GRM C function - rv <- .Call(gnrIBD_Beta, inbreeding, ws$num.thread, verbose) + rv <- .Call(gnrIBD_Beta, inbreeding, ws$num.thread, useMatrix, verbose) + if (isTRUE(useMatrix)) + rv <- .newmat(ws$n.samp, rv) # return - if (with.id) + if (isTRUE(with.id)) { rv <- list(sample.id=ws$sample.id, snp.id=ws$snp.id, inbreeding=inbreeding, beta=rv, avg_val=.Call(gnrGRM_avg_val)) diff --git a/README.md b/README.md index b524369..cccdcfa 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Release Version: v1.14.1 [http://www.bioconductor.org/packages/release/bioc/html/SNPRelate.html](http://www.bioconductor.org/packages/release/bioc/html/SNPRelate.html) -Development Version: v1.15.5 +Development Version: v1.15.6 [http://www.bioconductor.org/packages/devel/bioc/html/SNPRelate.html](http://www.bioconductor.org/packages/devel/bioc/html/SNPRelate.html) diff --git a/inst/unitTests/test_rel.R b/inst/unitTests/test_rel.R index 6f61ef9..fbe6d54 100644 --- a/inst/unitTests/test_rel.R +++ b/inst/unitTests/test_rel.R @@ -284,6 +284,12 @@ test.IndivBeta <- function() num.thread=1, verbose=FALSE) checkEquals(beta.1$beta, valid.dta$beta, "Individual Beta (one core)") + beta.1 <- snpgdsIndivBeta(genofile, sample.id=samp.id[1:90], + num.thread=1, useMatrix=TRUE, verbose=FALSE) + beta.1$beta <- as.matrix(beta.1$beta) + dimnames(beta.1$beta) <- NULL + checkEquals(beta.1$beta, valid.dta$beta, "Individual Beta (one core, Matrix)") + # run on two cores beta.2 <- snpgdsIndivBeta(genofile, sample.id=samp.id[1:90], num.thread=2, verbose=FALSE) diff --git a/man/snpgdsIndivBeta.Rd b/man/snpgdsIndivBeta.Rd index d003ae6..d323cbf 100644 --- a/man/snpgdsIndivBeta.Rd +++ b/man/snpgdsIndivBeta.Rd @@ -11,7 +11,7 @@ using SNP genotype data. \usage{ snpgdsIndivBeta(gdsobj, sample.id=NULL, snp.id=NULL, autosome.only=TRUE, remove.monosnp=TRUE, maf=NaN, missing.rate=NaN, method=c("weighted"), - inbreeding=TRUE, num.thread=1L, with.id=TRUE, verbose=TRUE) + inbreeding=TRUE, num.thread=1L, with.id=TRUE, useMatrix=FALSE, verbose=TRUE) snpgdsIndivBetaRel(beta, beta_rel, verbose=TRUE) } \arguments{ @@ -35,6 +35,8 @@ snpgdsIndivBetaRel(beta, beta_rel, verbose=TRUE) the number of cores automatically} \item{with.id}{if \code{TRUE}, the returned value with \code{sample.id} and \code{sample.id}} + \item{useMatrix}{if \code{TRUE}, use \code{Matrix::dspMatrix} to store + the output square matrix to save memory} \item{beta}{the object returned from \code{snpgdsIndivBeta()}} \item{beta_rel}{the beta-based matrix is generated relative to \code{beta_rel}} diff --git a/src/SNPRelate.cpp b/src/SNPRelate.cpp index f5bfaeb..b478a7b 100755 --- a/src/SNPRelate.cpp +++ b/src/SNPRelate.cpp @@ -1095,7 +1095,7 @@ COREARRAY_DLL_EXPORT void R_init_SNPRelate(DllInfo *info) extern SEXP gnrGRM(SEXP, SEXP, SEXP, SEXP); extern SEXP gnrGRMMerge(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP gnrGRM_avg_val(); - extern SEXP gnrIBD_Beta(SEXP, SEXP, SEXP); + extern SEXP gnrIBD_Beta(SEXP, SEXP, SEXP, SEXP); extern SEXP gnrIBD_KING_Homo(SEXP, SEXP, SEXP); extern SEXP gnrIBD_KING_Robust(SEXP, SEXP, SEXP, SEXP); extern SEXP gnrIBD_LogLik(SEXP, SEXP, SEXP); @@ -1146,7 +1146,7 @@ COREARRAY_DLL_EXPORT void R_init_SNPRelate(DllInfo *info) CALL(gnrGRM, 4), CALL(gnrGRMMerge, 5), CALL(gnrGRM_avg_val, 0), - CALL(gnrIBD_Beta, 3), + CALL(gnrIBD_Beta, 4), CALL(gnrIBD_KING_Homo, 3), CALL(gnrIBD_KING_Robust, 4), CALL(gnrIBD_LogLik, 3), CALL(gnrIBD_LogLik_k01, 3), CALL(gnrIBD_MLE, 9), CALL(gnrIBD_MLE_Jacquard, 8), diff --git a/src/genBeta.cpp b/src/genBeta.cpp index f4526c6..fa79b0c 100755 --- a/src/genBeta.cpp +++ b/src/genBeta.cpp @@ -359,7 +359,7 @@ COREARRAY_DLL_EXPORT SEXP CalcIndivBetaGRM(int NumThread, bool Verbose) /// Compute the IBD coefficients by individual relatedness beta COREARRAY_DLL_EXPORT SEXP gnrIBD_Beta(SEXP Inbreeding, SEXP NumThread, - SEXP Verbose) + SEXP useMatrix, SEXP Verbose) { int inbreeding = Rf_asLogical(Inbreeding); if (inbreeding == NA_LOGICAL) @@ -382,37 +382,74 @@ COREARRAY_DLL_EXPORT SEXP gnrIBD_Beta(SEXP Inbreeding, SEXP NumThread, } // output variables - rv_ans = PROTECT(Rf_allocMatrix(REALSXP, n, n)); - double *pBeta = REAL(rv_ans); - TS_Beta *p = IBS.Get(); - double avg = 0; - - // for-loop, average - for (size_t i=0; i < n; i++) + if (Rf_asLogical(useMatrix) != TRUE) { - if (inbreeding) - pBeta[i*n + i] = double(p->ibscnt) / p->num - 1; - else - pBeta[i*n + i] = (0.5 * p->ibscnt) / p->num; - p ++; - for (size_t j=i+1; j < n; j++, p++) + rv_ans = PROTECT(Rf_allocMatrix(REALSXP, n, n)); + double *pBeta = REAL(rv_ans); + TS_Beta *p = IBS.Get(); + double avg = 0; + // for-loop, average + for (size_t i=0; i < n; i++) { - double s = (0.5 * p->ibscnt) / p->num; - pBeta[i*n + j] = s; - avg += s; + if (inbreeding) + pBeta[i*n + i] = double(p->ibscnt) / p->num - 1; + else + pBeta[i*n + i] = (0.5 * p->ibscnt) / p->num; + p ++; + for (size_t j=i+1; j < n; j++, p++) + { + double s = (0.5 * p->ibscnt) / p->num; + pBeta[i*n + j] = s; + avg += s; + } + } + avg /= C_Int64(n) * (n-1) / 2; + grm_avg_value = avg; + // for-loop, final update + double bt = 1.0 / (1 - avg); + for (size_t i=0; i < n; i++) + { + pBeta[i*n + i] = (pBeta[i*n + i] - avg) * bt; + for (size_t j=i+1; j < n; j++) + pBeta[i*n + j] = pBeta[j*n + i] = (pBeta[i*n + j] - avg) * bt; + } + } else { + // triangle matrix + const size_t ns = n*(n+1)/2; + rv_ans = PROTECT(NEW_NUMERIC(ns)); + double *pBeta = REAL(rv_ans); + TS_Beta *p = IBS.Get(); + double avg = 0; + // for-loop, average + for (size_t i=0; i < n; i++) + { + if (inbreeding) + *pBeta++ = double(p->ibscnt) / p->num - 1; + else + *pBeta++ = (0.5 * p->ibscnt) / p->num; + p ++; + for (size_t j=i+1; j < n; j++, p++) + { + double s = (0.5 * p->ibscnt) / p->num; + *pBeta++ = s; + avg += s; + } + } + avg /= C_Int64(n) * (n-1) / 2; + grm_avg_value = avg; + // for-loop, final update + double bt = 1.0 / (1 - avg); + pBeta = REAL(rv_ans); + for (size_t i=0; i < n; i++) + { + *pBeta = (*pBeta - avg) * bt; + pBeta++; + for (size_t j=i+1; j < n; j++) + { + *pBeta = (*pBeta - avg) * bt; + pBeta++; + } } - } - - avg /= C_Int64(n) * (n-1) / 2; - grm_avg_value = avg; - - // for-loop, final update - double bt = 1.0 / (1 - avg); - for (size_t i=0; i < n; i++) - { - pBeta[i*n + i] = (pBeta[i*n + i] - avg) * bt; - for (size_t j=i+1; j < n; j++) - pBeta[i*n + j] = pBeta[j*n + i] = (pBeta[i*n + j] - avg) * bt; } if (verbose)