From 41da432feaea9f89dcb441ace1b1ddb1f4ff33df Mon Sep 17 00:00:00 2001 From: qclayssen Date: Sun, 19 Nov 2023 19:18:20 +0100 Subject: [PATCH 1/2] Add subsampling feature to gate_interactive_chr_int Implemented a subsampling option in the gate_interactive_chr_int function. This allows users to work with a fraction or a fixed number of rows from the dataset, enhancing efficiency for large datasets. The feature includes handling both fraction and fixed number subsampling, along with a seed for reproducibility. --- R/functions_chr_int.R | 21 +++++++++++++++++++-- R/methods.R | 6 ++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/R/functions_chr_int.R b/R/functions_chr_int.R index 302cbe0..db6f99e 100644 --- a/R/functions_chr_int.R +++ b/R/functions_chr_int.R @@ -45,6 +45,7 @@ pretty_plot_chr_int = function(.data, .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, is_size_fixed) { # Comply with CRAN NOTES @@ -58,8 +59,19 @@ pretty_plot_chr_int = function(.data, .shape = enquo(.shape) .size = enquo(.size) my_size_range = c(1, 3) - - + + if (!is.null(subsample)) { + if (subsample > 0 && subsample < 1) { + # Subsample as a fraction + set.seed(123) # Set seed for reproducibility + .data <- .data %>% sample_frac(subsample) + } else if (subsample >= 1) { + # Subsample as a fixed number + set.seed(123) # Set seed for reproducibility + .data <- .data %>% sample_n(subsample) + } + } + .data_formatted = .data %>% @@ -277,6 +289,7 @@ gate_interactive_chr_int <- .color = NA, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, is_size_fixed, @@ -336,6 +349,8 @@ gate_interactive_chr_int <- # size can be number or column .size = !!.size, + subsample = subsample, + opacity = opacity, is_size_fixed = is_size_fixed ) @@ -427,6 +442,7 @@ gate_programmatic_chr_int <- .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -484,6 +500,7 @@ gate_programmatic_chr_int <- .color = .color, .shape = .shape, + subsample =subsample, # size can be number of column .size = .size, diff --git a/R/methods.R b/R/methods.R index 48ad11c..58538c1 100755 --- a/R/methods.R +++ b/R/methods.R @@ -73,6 +73,7 @@ gate_chr <- function(.dim1, .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -95,6 +96,7 @@ gate_chr.numeric = function( .dim1, .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -109,6 +111,7 @@ gate_chr.numeric = function( .dim1, .color = .color, .shape = .shape, .size = .size, + subsample = subsample, opacity = opacity, how_many_gates = how_many_gates, .group_by = .group_by, @@ -136,6 +139,7 @@ gate_int <- function(.dim1, .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -157,6 +161,7 @@ gate_int.numeric = function( .dim1, .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -170,6 +175,7 @@ gate_int.numeric = function( .dim1, .color = .color, .shape = .shape, .size = .size, + subsample = subsample, opacity = opacity, how_many_gates = how_many_gates, .group_by = .group_by, From 3106cc8b3b1572205219085fdbe86ef174053f00 Mon Sep 17 00:00:00 2001 From: qclayssen Date: Thu, 23 Nov 2023 01:03:41 +0100 Subject: [PATCH 2/2] add documentation for subsample --- DESCRIPTION | 2 +- R/functions_chr_int.R | 1 + R/methods.R | 1 + README.Rmd | 11 +++++++++++ README.md | 21 +++++++++++++++++++++ man/gate_chr-methods.Rd | 4 ++++ man/gate_chr.numeric.Rd | 3 +++ man/gate_int.numeric.Rd | 3 +++ man/gate_interactive_chr_int.Rd | 3 +++ 9 files changed, 48 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 22bc172..b5fb8da 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ Depends: R (>= 3.6.0) Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.3 Imports: utils, graphics, diff --git a/R/functions_chr_int.R b/R/functions_chr_int.R index db6f99e..c907e2a 100644 --- a/R/functions_chr_int.R +++ b/R/functions_chr_int.R @@ -275,6 +275,7 @@ pretty_plot_chr_int = function(.data, #' @param .color A column symbol. Color of points #' @param .shape A column symbol. Shape of points #' @param .size A column symbol. Size of points +#' @param subsample A numeric value or a fraction indicating the subset of data to sample. #' @param opacity A number between 0 and 1. The opacity level of the data points #' @param how_many_gates An integer. The number of gates to label #' @param gate_list A list of gates. It is returned by gate function as attribute \"gate\". If you want to create this list yourself, each element of the list is a data frame with x and y columns. Each row is a coordinate. The order matter. diff --git a/R/methods.R b/R/methods.R index 58538c1..1951c4d 100755 --- a/R/methods.R +++ b/R/methods.R @@ -16,6 +16,7 @@ #' @param .color A column symbol. Colour of points #' @param .shape A column symbol. Shape of points #' @param .size A column symbol. Size of points +#' @param subsample A numeric value or a fraction indicating the subset of data to sample. #' @param opacity A number between 0 and 1. The opacity level of the data points #' @param how_many_gates An integer. The number of gates to label #' @param .group_by A column symbol. The column that is used to calculate distance (i.e., normally genes) diff --git a/README.Rmd b/README.Rmd index 22f8ff2..d70a7bd 100755 --- a/README.Rmd +++ b/README.Rmd @@ -118,4 +118,15 @@ tidygate_data %>% ``` +## Subsampling +The `subsample` parameter allows you to sample a subset of your data for analysis with a fixed seed. This can be particularly useful for large datasets or for preliminary exploratory analysis. Here's how you can use it: + +```{r} +# For sampling 50% of your data +result <- your_function(data, subsample = 0.5) + +# For sampling 100 specific observations +result <- your_function(data, subsample = 100) + +``` \ No newline at end of file diff --git a/README.md b/README.md index 3a50358..bc110e9 100755 --- a/README.md +++ b/README.md @@ -131,3 +131,24 @@ tidygate_data %>% ## 9 adrenal 1 fibrobla… immune_c… 0.52 ACC -0.776 -0.383 0 ## 10 adrenal 1 immune_c… endothel… 1 ACC 0.980 -0.116 0 ## # … with 2,230 more rows + +## Subsampling + +The `subsample` parameter allows you to sample a subset of your data for analysis with a fixed seed. This can be particularly useful for large datasets or for preliminary exploratory analysis. Here's how you can use it: + +```r +# For sampling 50% of your data +tidygate_data %>% + mutate( gate = gate_chr( + Dim1, Dim2, subsample = 0.5 + )) + + +# For sampling 100 specific observations +tidygate_data %>% + mutate( gate = gate_chr( + Dim1, Dim2, subsample = 100 + )) + + +``` \ No newline at end of file diff --git a/man/gate_chr-methods.Rd b/man/gate_chr-methods.Rd index a8f71c1..002f6dd 100644 --- a/man/gate_chr-methods.Rd +++ b/man/gate_chr-methods.Rd @@ -12,6 +12,7 @@ gate_chr( .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -25,6 +26,7 @@ gate_int( .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -43,6 +45,8 @@ gate_int( \item{.size}{A column symbol. Size of points} +\item{subsample}{A numeric value or a fraction indicating the subset of data to sample.} + \item{opacity}{A number between 0 and 1. The opacity level of the data points} \item{how_many_gates}{An integer. The number of gates to label} diff --git a/man/gate_chr.numeric.Rd b/man/gate_chr.numeric.Rd index dd6c69a..fffacc6 100644 --- a/man/gate_chr.numeric.Rd +++ b/man/gate_chr.numeric.Rd @@ -10,6 +10,7 @@ .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -28,6 +29,8 @@ \item{.size}{A column symbol. Size of points} +\item{subsample}{A numeric value or a fraction indicating the subset of data to sample.} + \item{opacity}{A number between 0 and 1. The opacity level of the data points} \item{how_many_gates}{An integer. The number of gates to label} diff --git a/man/gate_int.numeric.Rd b/man/gate_int.numeric.Rd index 33426ba..97dde6d 100644 --- a/man/gate_int.numeric.Rd +++ b/man/gate_int.numeric.Rd @@ -10,6 +10,7 @@ .color = NULL, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, .group_by = NULL, @@ -28,6 +29,8 @@ \item{.size}{A column symbol. Size of points} +\item{subsample}{A numeric value or a fraction indicating the subset of data to sample.} + \item{opacity}{A number between 0 and 1. The opacity level of the data points} \item{how_many_gates}{An integer. The number of gates to label} diff --git a/man/gate_interactive_chr_int.Rd b/man/gate_interactive_chr_int.Rd index 23c828e..baafa35 100644 --- a/man/gate_interactive_chr_int.Rd +++ b/man/gate_interactive_chr_int.Rd @@ -11,6 +11,7 @@ gate_interactive_chr_int( .color = NA, .shape = NULL, .size = NULL, + subsample = NULL, opacity = 1, how_many_gates = 1, is_size_fixed, @@ -30,6 +31,8 @@ gate_interactive_chr_int( \item{.size}{A column symbol. Size of points} +\item{subsample}{A numeric value or a fraction indicating the subset of data to sample.} + \item{opacity}{A number between 0 and 1. The opacity level of the data points} \item{how_many_gates}{An integer. The number of gates to label}