-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreport.Rmd
126 lines (94 loc) · 3.58 KB
/
report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
---
title: "Report"
author: "Kyra Griffin"
date: '2022-06-13'
output: html_document
---
Use this block to source your `main.R`.
```{r setup, message=FALSE, warning=FALSE}
knitr::opts_chunk$set(warning = FALSE, message = FALSE, echo = FALSE)
source('main.R')
```
### Reading the data from proteomics_ack.xlsx
```{r, warning=FALSE, message=FALSE, echo=FALSE}
#Read in verse_counts.tsv and sample_metadata.csv and subset to the listed samples
proteomics_ack <- read_data("data/proteomics_ack.xlsx")
#knitr::kable(proteomics_ack)
proteomics_ack
```
```{r, warning=FALSE, message=FALSE, echo=FALSE}
proteomics_ack <- select_columns(proteomics_ack)
proteomics_ack
```
```{r, warning=FALSE, message=FALSE, echo=FALSE}
protein_groups <- id_protein_groups(proteomics_ack)
protein_groups
```
```{r, warning=FALSE, message=FALSE, echo=FALSE}
proteomics_ack <- consolidate_data(proteomics_ack, protein_groups)
proteomics_ack
write.csv(proteomics_ack, "data/proteomics_ack.csv")
```
```{r, warning=FALSE, message=FALSE, echo=FALSE}
t_test_data <- form_t_test_data(proteomics_ack)
t_test_data
```
### Data Visualization / Exploratory Findings
Summary of the samples norm fold change
```{r, warning=FALSE, message=FALSE, echo=FALSE}
summary_t_test_data <- summary_t_test_data(t_test_data)
summary_t_test_data
```
Boxplot of smaples norm fold change
```{r, warning=FALSE, message=FALSE, echo=FALSE}
visulaize_t_test_data(t_test_data)
```
Visualizing the max abundance:
```{r, warning=FALSE, message=FALSE, echo=FALSE}
# abundance_plot <- plot_abundance(proteomics_ack)
#
# abundance_plot
proteomics_ack$Sample3vSample1 <- str_replace(proteomics_ack$Sample3vSample1,"-", "0")
proteomics_ack$Sample3vSample1 <- as.double(proteomics_ack$Sample3vSample1)
#replace NA values with zero in rebs column only
proteomics_ack <- proteomics_ack %>% mutate(Sample3vSample1 = ifelse(is.na(Sample3vSample1), 0, Sample3vSample1))
data <- proteomics_ack %>%
pivot_longer(cols =c("Sample2vSample1", "Sample3vSample1", "Sample3vSample2"),
names_to = "sampleVsample",
values_to = "normFoldChange")
temp <- data %>% group_by("Gene Name")
write.csv(proteomics_ack, "data/proteomics_ack.csv")
```
## Parallele Coordinate Plot of normalized fold change for all samples
```{r, warning=FALSE, message=FALSE, echo=FALSE}
library(GGally)
data <- proteomics_ack %>%
dplyr::select(c("Sample2vSample1", "Sample3vSample1", "Sample3vSample2")) %>%
pivot_longer(everything(),
names_to = "sampleVsample",
values_to = "normFoldChange")
ggparcoord(proteomics_ack,
columns=c(3,4,5),
showPoints=TRUE
)
# scale_color_manual(c("Sample2vSample1" = "red",
# "Sample3vSample1" = "green",
# "Sample3vSample2" = "blue"))
```
### Statistical Analysis
```{r, warning=FALSE, message=FALSE, echo=FALSE}
# Shapiro-Wilk normality test for Sample2vSample1 fold change
with(t_test_data, shapiro.test(normFoldChange[sampleVsample == "Sample2vSample1"]))
# Shapiro-Wilk normality test for Sample3vSample1 fold change
with(t_test_data, shapiro.test(normFoldChange[sampleVsample == "Sample3vSample1"]))
# Shapiro-Wilk normality test for Sample3vSample2 fold change
with(t_test_data, shapiro.test(normFoldChange[sampleVsample == "Sample3vSample2"]))
```
```{r, warning=FALSE, message=FALSE, echo=FALSE}
res.aov <- t_test_data %>% anova_test(normFoldChange ~ sampleVsample)
res.aov
# Pairwise comparisons
pwc <- t_test_data %>%
pairwise_t_test(normFoldChange ~ sampleVsample, p.adjust.method = "bonferroni")
pwc
```