-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathread-tbl.Rd
129 lines (105 loc) · 4.65 KB
/
read-tbl.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-tbl.R
\name{read-tbl}
\alias{read-tbl}
\alias{read_tbl}
\alias{read_tbl_reference}
\alias{read_tbl_alternate}
\alias{read_tbl_coverage}
\alias{read_tbl_genotype}
\alias{read_tbl_haplotype}
\alias{read_tbl_ref_alt_cov}
\title{Read MIPTools tables}
\usage{
read_tbl_reference(.tbl, ...)
read_tbl_alternate(.tbl, ...)
read_tbl_coverage(.tbl, ...)
read_tbl_genotype(.tbl, ...)
read_tbl_haplotype(.tbl, ..., .col_select = NULL)
read_tbl_ref_alt_cov(
.tbl_ref,
.tbl_alt,
.tbl_cov,
...,
chrom = deprecated(),
gene = deprecated()
)
}
\arguments{
\item{.tbl}{File path to the table.}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Filtering
expressions. Expressions must return a logical value. If multiple
expressions are included, they are combined with the \code{&} operator. Only
rows for which all conditions evaluate to \code{TRUE} are kept.}
\item{.col_select}{Columns to include in the results. Columns can be selected
using one or more selection expressions as in
\code{\link[dplyr:select]{dplyr::select()}}. Use \code{c()} or \code{list()} to use more
than one expression. See \code{\link[tidyselect:language]{?tidyselect::language}}
for details on available selection options.}
\item{.tbl_ref}{File path to the reference table.}
\item{.tbl_alt}{File path to the alternate table.}
\item{.tbl_cov}{File path to the coverage table.}
\item{chrom}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} The chromosome(s) to filter
to.}
\item{gene}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} The gene(s) to filter to.}
}
\value{
A \code{\link[tibble:tibble-package]{tibble()}} subclass. Each function defines a
unique subclass to store the data. Data typically contains the sample,
associated metadata, and the value of interest.
}
\description{
The \verb{read_tbl_*()} family of functions is designed to read data tables
generated by the software program
\href{https://github.com/bailey-lab/MIPTools}{\code{MIPTools}}. Data is read lazily
using the \href{https://vroom.r-lib.org/index.html}{\code{vroom} package}. Data can be
filtered, retaining all rows that satisfy the conditions. To be retained, the
row in question must produce a value of \code{TRUE} for all conditions. Note that
when a condition evaluates to NA, the row will be dropped.
}
\section{Data structure}{
Input data must contain six rows of metadata. The metadata can vary depending
on what type of file is read, but typically contains information about the
location of a mutation. The remaining rows represent the data for each sample
sequenced.
}
\section{Useful filter functions}{
The \code{\link[dplyr:filter]{dplyr::filter()}} function is employed to subset the rows of the data by
applying the expressions in \code{...} to the column values to determine which
rows should be retained.
There are many functions and operators that are useful when constructing the
expressions used to filter the data:
\itemize{
\item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}}, etc.
\item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}}
\item \code{\link[=is.na]{is.na()}}
\item \code{\link[dplyr:between]{between()}}, \code{\link[dplyr:near]{near()}}
}
}
\examples{
# Get path to example file
ref_file <- miplicorn_example("reference_AA_table.csv")
alt_file <- miplicorn_example("alternate_AA_table.csv")
cov_file <- miplicorn_example("coverage_AA_table.csv")
ref_file
# Input sources -------------------------------------------------------------
# Read from a path
read_tbl_reference(ref_file)
# You can also use paths directly
# read_tbl_alternate("alternate_AA_table.csv")
# Read entire file ----------------------------------------------------------
read_tbl_coverage(cov_file)
# Data filtering ------------------------------------------------------------
# Filtering by one criterion
read_tbl_reference(ref_file, gene == "atp6")
# Filtering by multiple criteria within a single logical expression
read_tbl_alternate(alt_file, gene == "atp6" & targeted == "Yes")
read_tbl_coverage(cov_file, gene == "atp6" | targeted == "Yes")
# When multiple expressions are used, they are combined using &
read_tbl_reference(ref_file, gene == "atp6", targeted == "Yes")
# Read multiple files together ----------------------------------------------
read_tbl_ref_alt_cov(ref_file, alt_file, cov_file)
}
\seealso{
\code{\link[vroom:vroom]{vroom::vroom()}} \code{\link[dplyr:filter]{dplyr::filter()}}
}