Skip to content

Commit

Permalink
Updated files and filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
Terri Porter authored and Terri Porter committed Oct 9, 2019
1 parent aed1c3f commit 4fc596d
Show file tree
Hide file tree
Showing 12 changed files with 650 additions and 370 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Teresita M. Porter, July, 8, 2019
# Teresita M. Porter, Oct. 9, 2019

library(vegan)
library(ggplot2)
Expand All @@ -10,8 +10,8 @@ library(reshape2)
library(dplyr)
library("car")
# to calculate venn counts
#source("http://www.bioconductor.org/biocLite.R")
#biocLite("limma")
source("http://www.bioconductor.org/biocLite.R")
biocLite("limma")
library(limma)
library(ggforce) # to draw circles with ggplot

Expand Down Expand Up @@ -59,12 +59,15 @@ edit_cores<-function(list, x){

########################################################

# read infile
#read infile prepared by python script
master<-read.table(file="LV2016_2.csv", head=TRUE, sep=",")

# Select phylum Arthropoda only (use this to get overall richness across all expts)
Arth_df<-master[master$phylum=="Arthropoda",]

# OTU matrix for 1C1E experiment (n=8 x 3 layers)
#A<-Arth_df[Arth_df$extractions=="1",]

###############################################################
# Create matrices for Fig 1A, bioinformatic pooling 1-15 cores
# OTU matrix for 1C3E experiment ( n=36 x 3 layers)
Expand Down Expand Up @@ -333,7 +336,8 @@ set.seed(1234)
##### Rarefy the dataset down to the 15th percentile
###################################################################

# Rarefy original ILC matrices down to 15th percentile library size to normalize read depth across samples
### Only do this for OTUs becaue too much missing data for genus and family ranks ###
#Rarefy original ILC matrices down to 15th percentile library size to normalize read depth across samples
# sites in rows, OTUs in columns, reads in cells
ILC_Arth_df<-rrarefy(ILC_Arth_notnull2,sample=ILC_Arth_15percentile)
ILC_B_df<-rrarefy(ILC_B_notnull2,sample=ILC_B_15percentile)
Expand All @@ -358,7 +362,7 @@ NZC_F_df<-rrarefy(NZC_F_notnull2,sample=NZC_F_15percentile)
NZC_G_df<-rrarefy(NZC_G_notnull2,sample=NZC_G_15percentile)

##################################################################################
# Plot richness for 1C3E bioinformatically pooled cores
# Plot richness for 1C3E bioinformatically pooled cores (not accumulation curves)
##################################################################################

# do specnum for each site and layer separately
Expand Down Expand Up @@ -438,7 +442,8 @@ p1<-ggplot(summary_bio, aes(x=cores, y=richness, fill=layer)) +
legend.position = "none")

###################################################################
##### Calculate richness for 1C3E & XC3E, increasing number manually pooled samples
##### Calculate species accumulation for 1C3E & XC3E
# pooled cores vs OTU richness
###################################################################

# do specnum for each site and layer separately
Expand Down Expand Up @@ -683,7 +688,8 @@ p2<-ggplot(summary_num, aes(x=cores, y=richness, fill=layer)) +
legend.position = "none")

###################################################################
##### Calculate richness for 1C1E and 1C3E, one vs three pooled DNA extractions
##### Calculate species accumulation for 1C1E and 1C3E
# Extractions vs Richness
###################################################################

# do specnum for each site, layer, and extraction separately
Expand Down Expand Up @@ -838,52 +844,27 @@ lay<-rbind(c(1),

g<-grid.arrange(p1, p2, p3, layout_matrix=lay)

ggsave("Fig1_richness_ESV.pdf", g, width=8, height=10, units="in")
ggsave("F2_richness_ESV.pdf", g, width=8, height=10, units="in")

#####################################
# Check for normality with Shapiro-Wilk’s test, sig result means not normal
set.seed(1234)
shapiro.test(summary_ext$richness)
# W = 0.95139, p-value = 0.001545
# W = 0.94999, p-value = 0.001259

#visual inspection, sometimes small sample sizes can pass normality tests
qqPlot(summary_ext$richness)

# Use Kruskal-Wallis test to check for any significant differences among extractions
new<-summary_ext
new$extractions<-as.factor(new$extractions)
kruskal.test(richness ~ extractions, data = new)
# Kruskal-Wallis chi-squared = 0.41817, df = 1, p-value = 0.5179

# Use multiple pairwise-comparison bewteen groups to check for specific diffs across cores just in case
# p.adjust method Benjamini & Hochberg (1995)
pairwise.wilcox.test(new$richness, new$extractions,
p.adjust.method = "BH")
# n/s p-value = 0.47

#########
# Figure out how many single cores would need to be sampled to match results from largest class of pooled cores
# p2, summary_num, pool data across sites and layers, get median richness
median(summary_num$richness[summary_num$cores=="9_15"])
# 171.5
# p1, summary_acc, pool data across sites and layer, get median richness
median(summary_bio$richness[summary_bio$cores=="1"])
# 156

# Use multiple pairwise-comparison bewteen groups to check for specific diffs betwee 9-15 pooled or single
# p.adjust method Benjamini & Hochberg (1995)

# create df for test
pooled915 <- summary_num[grepl("9_15", summary_num$cores),]
pooled915$sample <- "NotApplicable"
single <- summary_bio[grepl("1$", summary_bio$cores),]
single$extractions <- "3"
combined <- rbind(pooled915, single)

# Check for normality with Shapiro-Wilk’s test, sig result means not normal
set.seed(1234)
shapiro.test(combined$richness)
# W = 0.94287, p-value = 0.02292

#visual inspection, sometimes small sample sizes can pass normality tests
qqPlot(combined$richness)

pairwise.wilcox.test(combined$richness, combined$cores,
p.adjust.method = "BH")
# p-value = 0.73 n/s
# n/s

#################################################################
### Create venn diagrams from rarefied data
Expand Down Expand Up @@ -925,6 +906,7 @@ df.ILC.venncounts <- as.data.frame(ILC.venncounts)[-1,] %>%
y = c(-0.8, 1.2, 0, 1.2, 0, 1.4, 0.4))

# draw venn with ggplot

ILCrichness <- paste("Island Lake ESVs =",
ILC.norm.tot.richness)

Expand Down Expand Up @@ -968,6 +950,7 @@ df.NZC.venncounts <- as.data.frame(NZC.venncounts)[-1,] %>%
y = c(-0.8, 1.2, 0, 1.2, 0, 1.4, 0.4))

# draw venn with ggplot

NZCrichness <- paste("Nimitz ESVs =",
NZC.norm.tot.richness)

Expand All @@ -989,4 +972,93 @@ lay <- rbind(c(1,2),

g <- grid.arrange(v1, v2, layout_matrix = lay)

ggsave("FigS5_Venn.pdf", g)
ggsave("FigS4_Venn.pdf", g)

#################################################################
### Calc greatest richness detected including up to 15 individually collected cores versus up to 15 pooled cores

## Bioinformatically pooled
# convert to presence-absence ILC
ILC_B.combo.df_df[ILC_B.combo.df_df > 0] <- 1
# check 15 indvidually pooled cores
median(rowSums(ILC_B.combo.df_df[grepl("_15_", rownames(ILC_B.combo.df_df)),]))
# 595.5
sd(rowSums(ILC_B.combo.df_df[grepl("_15_", rownames(ILC_B.combo.df_df)),]))
# 153.1

# convert to presence-absence NZC
NZC_B.combo.df_df[NZC_B.combo.df_df > 0] <- 1
# check 15 indvidually pooled cores
median(rowSums(NZC_B.combo.df_df[grepl("_15_", rownames(NZC_B.combo.df_df)),]))
# 510
sd(rowSums(NZC_B.combo.df_df[grepl("_15_", rownames(NZC_B.combo.df_df)),]))
# 128.1

## Manually pooled
# convert to presence-absence ILC
ILC_G_df[ILC_G_df > 0] <- 1
# check 15 indvidually pooled cores
median(rowSums(ILC_G_df[grepl("_15C3E_", rownames(ILC_G_df)),]))
# 167
sd(rowSums(ILC_G_df[grepl("_15C3E_", rownames(ILC_G_df)),]))
# 74.5

# convert to presence-absence NZC
NZC_G_df[NZC_G_df > 0] <- 1
# check 15 indvidually pooled cores
median(rowSums(NZC_G_df[grepl("_15C3E_", rownames(NZC_G_df)),]))
# 126
sd(rowSums(NZC_G_df[grepl("_15C3E_", rownames(NZC_G_df)),]))
# 84.6

#########
# Figure out how many single cores would need to be sampled to match results from largest class of pooled cores
# summary_num, pool data across sites and layers, get median richness
median(summary_num$richness[summary_num$cores=="9_15" &
(summary_num$layer == "Bryophyte" |
summary_num$layer == "Organic")])
# 205.5 ESVs
sd(summary_num$richness[summary_num$cores=="9_15" &
(summary_num$layer == "Bryophyte" |
summary_num$layer == "Organic")])
# 34.0 ESVs

# summary_bio, pool data across sites and layer, get median richness
median(summary_bio$richness[summary_bio$cores=="1" &
(summary_bio$layer == "Bryophyte" |
summary_bio$layer == "Organic")])
# 177 ESVs
sd(summary_bio$richness[summary_bio$cores=="1" &
(summary_bio$layer == "Bryophyte" |
summary_bio$layer == "Organic")])
# 43.6

# Use multiple pairwise-comparison bewteen groups to check for specific diffs across cores just in case
# p.adjust method Benjamini & Hochberg (1995)

# grab data for 15 field pooled cores
composite915 <- data.frame(summary_num$richness[summary_num$cores=="9_15" &
(summary_num$layer == "Bryophyte" |
summary_num$layer == "Organic")])
composite915$type <- "composite915"
names(composite915)[1] <- "richness"

# grab richness for 15 individual cores
individual15 <- data.frame(summary_bio$richness[summary_bio$cores=="15" &
(summary_bio$layer == "Bryophyte" |
summary_bio$layer == "Organic")])
individual15$type <- "individual15"
names(individual15)[1] <- "richness"

# put into one df for statistical comparison
comp <- rbind(composite915, individual15)

pairwise.wilcox.test(comp$richness, comp$type,
p.adjust.method = "BH")
# 1.5e-06

median(composite915$richness)
# 205.5
median(individual15$richness)
# 598.5

Loading

0 comments on commit 4fc596d

Please sign in to comment.