1 Prep data

1.1 Load packages

library(phyloseq)
library(vegan)
library(ggplot2)
library(usedist)
library(car)
library(performance)
library(emmeans)
library(plyr)
library(lme4)
library(splitstackshape)
library(kableExtra)
library(ggpubr)
library(gtable)
library(grid)
library(gridExtra)
library(glmmTMB)
library(VennDiagram)
library(reshape2)
library(colorBlindness)

#wrapper function with settings used for tables throughout. This simplifies the code elsewhere
# x is the object o be tuned into a kable
# caption is the caption
# row.names - determines whether to include row names (NA is default for kbl)
kable.wrap <- function(x,caption,row.names=NA){
  kable_classic(full_width = F,html_font = "Calibri",font_size = 18,kbl(x,align="c",caption=caption,row.names = row.names))
}

1.2 Load data

  • Load metadata (called “meta”) and convert to phyloseq
  • Make sure date is in YYYY-MM-DD
setwd("C:/Users/Donald McKnight/Documents/other papers/blandings microbiomes/analyses/analyses_final/for paper")
meta <- read.csv("Supplemental.metadata.csv")

rownames(meta) <- meta$sample.ID
meta.phylo <- sample_data(meta)
  • Load asv table and convert to phyloseq
# function for normalizing to proportions
prop.trans <- function(x){x/sum(x)}

data <- read.csv("Supplemental.asv.csv")
data.no.trans <- data #keeps an untransformed copy of data
rownames(data) <- data[,1]
data <- data[,2:(ncol(data)-1)]
data <- as.matrix(data)
data <- apply(data,2,prop.trans) #convert to proportions
asv <- otu_table(data,taxa_are_rows = T)
  • Load taxa and convert to phyloseq
# this is the file where all possible labels for unknown have been replaced with "unknown_ ..." where "..." = the lowest taxonomic level available (e.g., "unknown_Bacteriodes")
taxa <- read.csv("Supplemental.taxa.csv")
rownames(taxa) <- taxa[,1]
taxa <- as.matrix(taxa[,-1])
colnames(taxa)[7] <- "ASV"
taxa <- tax_table(taxa)

# load rooted tree file and trim
tree <- read_tree("Supplemental.rooted.tree.nwk")
tree <- prune_taxa(rownames(asv),tree)
  • Combine data into a phyloseq object
# convert to phyloseq
phylo <- merge_phyloseq(taxa,asv,tree, meta.phylo)
  • Compress data at other taxonomic levels level
phylo.fam <- tax_glom(phylo, "Family")

phylo.ord <- tax_glom(phylo, "Order")

phylo.class <- tax_glom(phylo, "Class")

phylo.phy <- tax_glom(phylo, "Phylum")

phylo.list <- list(phylo,phylo.fam,phylo.ord,phylo.class, phylo.phy)

label.list <- list("ASV","Family","Order","Class","Phylum")

1.3 Calculate dist objects

  • Will calculate both abundance and presence/absence indexes with and without phylogenetic information
    • Bray-Curtis
    • Jaccard
    • Weighted unifrac
    • Unweighted unifrac
  • Will calculate at ASV level and at family level (with unknown families grouped by lowest known taxonomic level)
bray <- distance(phylo,method="bray") #calculate bray curtis dissimilarities
wuni <- distance(phylo,method="wunifrac") #calculate weighted unifrac distances

#convert data in phylo object to binary
phylo.bin <- phylo
phylo.bin@otu_table[which(phylo@otu_table > 0)] <- 1
jaccard <- distance(phylo,method="jaccard") #calculate jaccard dissimilarity (same as bray curtis when using binary data)
uni <- distance(phylo,method="unifrac") #calculate unweighted unifrac distances

#as above but at family level

bray.fam <- distance(phylo.fam,method="bray") #calculate bray curtis dissimilarities
wuni.fam <- distance(phylo.fam,method="wunifrac") #calculate weighted unifrac distances

#convert data in phylo object to binary
phylo.fam.bin <- phylo.fam
phylo.fam.bin@otu_table[which(phylo.fam.bin@otu_table > 0)] <- 1
jaccard.fam <- distance(phylo.fam.bin,method="jaccard") #calculate jaccard dissimilarity (same as bray curtis when using binary data)
uni.fam <- distance(phylo.fam.bin,method="unifrac") #calculate unweighted unifrac distances

#make list of dist objects
dists <- list(bray,jaccard,wuni,uni,bray.fam,jaccard.fam,wuni.fam,uni.fam)
names(dists) <- c("Bray-Curtis","Jaccard","Weighted Unifrac","Uneighted Unifrac","Bray-Curtis: Family","Jaccard: Family","Weighted Unifrac: Family","Unweighted Unifrac: Family")

1.4 Data summary

  • At this point, the following objects are available:
    • meta = metadata, including richness, evenness, growth, read depth, etc.
    • phylo = phyloseq object with data transformed to proportions
    • phylo.fam = phylo compressed to the family level
    • dists = A list of distance/dissimilarity objects
      • “Bray-Curtis”
      • “Jaccard”
      • “Weighted Unifrac”
      • “Unifrac”
      • “Bray-Curtis: Family”
      • “Jaccard: Family”
      • “Weighted Unifrac: Family”
      • “Unifrac: Family”
  • These data have already had much of the pseudoreplciation removed, but some additional subsetting will be required
  • The wild samples generally seemed similar regardless of location, and the sample sizes were so small within locations, that it will generally make more sense just to have a single “wild” category than having individual categories for each site. However, there are a few cases where it will be possible to use mixed effects modeling with specific location (PW or TC) nested inside general location

1.5 Functions

1.5.1 perm.loop

  • Takes a named list of dissimilarity objects (included in the formula name), runs PERMANOVs on each, and outputs PERMANOVA tables for each test, as well as a condensed output table from all PERMANOVAs, with adjusted P values
  • P values are adjusted per column
    • It is assumed that the first 4 entries in the distance list are ASVs, and the last 4 are families, and it applies p.adjust separately within each
  • The final table (beginning “Summary (p values):”) has raw and adjusted P values for predictors
  • Input:
    • Form = (character) the full formula for the PERMANOVA, possibly including a dist_subset() command
      • To grantee this, it is best to put “distance.list[[i]]” as the response in the formula (or “dist_subset(distance.list[[i]]…”)
    • data = the metadata
    • response = (vector of characters) the factors you want P values for
      • must be in the same order as factors in the formula, can use different names, but cannot skip or go out of order
      • e.g., if form = “x~a+c+b” , the output will be in the order a, b, c, with the names in response assigned as column names. response could be c(“a”,“b”,“c”), in which case a = “a” and so on, or other names could be substituted, but the order remains the same. Thus, if response = c(“c”,“a”,“b”), then in the output, the column called “c” will actually be the results for a and so on.
    • Caption = (character) a label you want above each output table (will follow the name of the dissimilarity object being tested or “Summary (p values)”)
    • distance.list = the list of dist objects
perm.loop <- function(form, data, response,caption,distance.list){
  set.seed(1234)
  res <- vector("list",length(distance.list))
  res.export <- vector("list",1)
  for(i in 1:length(distance.list)){
    perm.i <- adonis2(
      as.formula(form)
      ,data=data,permutations=5000)
    
    res[[i]] <- perm.i$`Pr(>F)`
    print(kable.wrap(perm.i, caption=paste(names(distance.list)[i],caption)))
        cat('\n')
   }
  
res <- as.data.frame(do.call("rbind.data.frame",res)[,1:length(response)])
rownames(res) <- names(distance.list)

#adds columns of adjusted P values
res1 <- cbind.data.frame(as.data.frame(res[1:4,]),
                                         apply(as.data.frame(res[1:4,]),2,function(x){p.adjust(x,method="holm")}))

res2 <- cbind.data.frame(as.data.frame(res[5:8,]),
                                         apply(as.data.frame(res[5:8,]),2,function(x){p.adjust(x,method="holm")}))



colnames(res1) <- c(response,gsub(" ",".",paste(response,"adjusted")))

colnames(res2) <- c(response,gsub(" ",".",paste(response,"adjusted")))

res <- rbind.data.frame(res1,res2)

 print(kable.wrap(res,caption=paste("Summary (p values):",caption)))
 
 res}

1.5.2 dist.split

  • Function to convert the lower left hand side of a dist object (x) to a long format with no duplication or 0s from comparing a sample to itself
split.dist <- function(x){
  result <- NULL
  for(b in 1:(nrow(x)-1)){
    row.b <- c(x[b,(b+1):(ncol(x))])
    row.b <- as.data.frame(cbind(row.b))
    row.b$ID1 <- colnames(x)[(b+1):(ncol(x))]
    row.b$ID2 <- rep(rownames(x)[b],nrow(row.b))
    result <- rbind(result,row.b)
    }
  result}

1.5.3 phylo.heat

  • Wrapper function for making crude heatmaps on subsets via phyloseq
  • Uses default log base 4 transformation
  • In all cases, we will convert to proportions before running
  • Inputs
    • phyloseq.object = initial phyloseq object to subset
    • data = data frame to base the subset on (metadata)
    • ID.col = (character) name of column of sample IDs in data
    • order1-4 = (character) name(s) of columns in data to order and label samples by
      • can include up to 4 (order1, order2, etc.) others should be left to NULL
      • sorts based on the order provided (e.g., if order1 = “date” and order2 = “type” it sorts by date, then by type )
    • title = (character) title of the graph
    • method = (character) see method in plot_heatmap
    • taxa.label = (character) see taxa.label in plot_heatmap
  • phylo.heat.loop = same thing, but…
    • for the phyloseq object, provide a list of phyloseq objects (e.g., at different taxonomic levels)
    • For taxa.label give a list of character labels of the same length as phyloseq.object.list (e.g. names of taxa levels)
      • The title will have the entries from taxa.label (in parentheses) at the end
    • This will loop over the list and make a plot for each
      • This could be accomplished within phyloseq, but the step of compressing the taxa is slow, so doing it externally saves time in the long run
phylo.heat <- function(phyloseq.object = phylo, data, ID.col = "sample.ID", order1 = NULL, order2 = NULL, order3 = NULL, order4 = NULL, title, method = method, taxa.label = "Species"){
  phylo.temp <- prune_samples(x = phyloseq.object,samples = data[,which(colnames(data) == ID.col)]) #subset phyloseq.object
  phylo.temp  = filter_taxa(phylo.temp , function(x) mean(x) > 0, TRUE)
  
  data$order <- paste(data[,which(colnames(data) == order1)], #make ordering column 
                      data[,which(colnames(data) == order2)],
                      data[,which(colnames(data) == order3)],
                      data[,which(colnames(data) == order4)])
  
  data <- data[match(sample_data(phylo.temp)$sample.ID,data$sample.ID),]
  
  sample_data(phylo.temp)$grouping <- data$order
  
  data <- data[order(data$order),] # order data
  
 plot <- plot_heatmap(phylo.temp,sample.order = data[,which(colnames(data) == ID.col)],sample.label = "grouping", method = method,taxa.label=taxa.label,title=title)

print(plot)
}


phylo.heat.loop <- function(phyloseq.object.list = phylo.list, data, ID.col = "sample.ID", order1 = NULL, order2 = NULL, order3 = NULL, order4 = NULL, title, method = method, taxa.label.list = label.list){
  
  for(i in 1:length(phyloseq.object.list )){
  
    phyloseq.object <- phyloseq.object.list[[i]]
    taxa.label <- taxa.label.list[[i]]
    
    data.i <- data
    
    phylo.temp <- prune_samples(x = phyloseq.object,samples = data.i[,which(colnames(data.i) == ID.col)]) #subset phyloseq.object
    phylo.temp  = filter_taxa(phylo.temp , function(x) mean(x) > 0, TRUE)
    
    data.i$order <- paste(data.i[,which(colnames(data.i) == order1)], #make ordering column 
                        data.i[,which(colnames(data.i) == order2)],
                        data.i[,which(colnames(data.i) == order3)],
                        data.i[,which(colnames(data.i) == order4)])
    
    data.i <- data.i[match(sample_data(phylo.temp)$sample.ID,data.i$sample.ID),]
    
    sample_data(phylo.temp)$grouping <- data.i$order
    
    data.i <- data.i[order(data.i$order),] # order data.i
    
   plot <- plot_heatmap(phylo.temp,sample.order = data.i[,which(colnames(data.i) == ID.col)],sample.label = "grouping", method =   method,taxa.label=taxa.label,title=paste(title,gsub(" ","",paste("(",taxa.label,")"))))
    print(plot)}
}

1.5.4 Venn diagrams

  • Function to make categories for venn diagrams
  • Takes a data frame (not a matrix) where each column is a group (e.g. study site) and each row is a thing that was counted (e.g. species)
  • Data should be numeric with either 0 or NA for absent and any numeric value (other than 0) for present
  • It outputs either a list or vector with the results
  • Only handles up to 4 categories
  • x = your data frame
  • make.vector = if you want to output the results in my venn.quad function, use this
  • return.unique =
    • if T then it will only return counts for unique combinations (e.g., if a species [row] is present in columns 1, 2, and 3, then it will be counted for the 1&2&3 category, but not for 1&2 or 1&3 or 2&3 or 1, etc.)
    • if F then it returns the total count for each category, even if that results in counting the same data point twice (e.g., a species present in columns 1 and 2 will be counted for column 1, for column 2, and for the 1&2 category)
    • set to false if you plan on using functions from VennDiagram
venn.cat <- function(x,make.vector = F,return.unique=F){
  if(is.matrix(x)==T){print("Error: x must be a data frame")
    break}
  x[is.na(x)==T] <- 0
  x[x!=0] <- 1
  
  if(ncol(x)==2){
    cnames <- colnames(x)
    colnames(x)[1] <- "V1"
    colnames(x)[2] <- "V2"
    area1 = nrow(x[x$V1 == 1,])
    area2 = nrow(x[x$V2 == 1,])
    n12 = nrow(x[x$V1 == 1 & x$V2 == 1,])
    if(return.unique == T){
      area1 <- area1-n12
      area2 <- area2-n12}
    result <- cbind.data.frame(area1,area2,n12)
    colnames(result) <- c(cnames[1],cnames[2],gsub(" ","",paste(cnames[1],"&",cnames[2])))
    if(make.vector ==T){result <- as.numeric(result)}}
  if(ncol(x)==3){
    cnames <- colnames(x)
    colnames(x)[1] <- "V1"
    colnames(x)[2] <- "V2"
    colnames(x)[3] <- "V3"
    area1 = nrow(x[x$V1 == 1,])
    area2 = nrow(x[x$V2 == 1,])
    area3 = nrow(x[x$V3 == 1,])
    n12 = nrow(x[x$V1 == 1 & x$V2 == 1,])
    n13 = nrow(x[x$V1 == 1 & x$V3 == 1,])
    n23 = nrow(x[x$V2 == 1 & x$V3 == 1,])
    n123 = nrow(x[x$V1 == 1 & x$V2 == 1 & x$V3 == 1 ,])
    if(return.unique == T){
      n12 <- n12-n123
      n13 <- n13-n123
      n23 <- n23-n123
      area1 <- area1-(n12+n13+n123)
      area2 <- area2-(n12+n23+n123)
      area3 <- area3-(n13+n23+n123)     }
      result <- cbind.data.frame(area1,area2,area3,n12,n13,n23,n123)
      colnames(result) <- c(cnames[1],cnames[2],cnames[3],gsub(" ","",paste(cnames[1],"&",cnames[2])),gsub(" ","",paste(cnames[1],"&",cnames[3])),gsub(" ","",paste(cnames[2],"&",cnames[3])),gsub(" ","",paste(cnames[1],"&",cnames[2],"&",cnames[3])))
      if(make.vector ==T){result <- as.numeric(result)}}
  if(ncol(x)==4){
    cnames <- colnames(x)
    colnames(x)[1] <- "V1"
    colnames(x)[2] <- "V2"
    colnames(x)[3] <- "V3"
    colnames(x)[4] <- "V4"
    area1 = nrow(x[x$V1 == 1,])
    area2 = nrow(x[x$V2 == 1,])
    area3 = nrow(x[x$V3 == 1,])
    area4 = nrow(x[x$V4 == 1,])
    n12 = nrow(x[x$V1 == 1 & x$V2 == 1,])
    n13 = nrow(x[x$V1 == 1 & x$V3 == 1,])
    n14 = nrow(x[x$V1 == 1 & x$V4 == 1,])
    n23 = nrow(x[x$V2 == 1 & x$V3 == 1,])
    n24 = nrow(x[x$V2 == 1 & x$V4 == 1,])
    n34 = nrow(x[x$V3 == 1 & x$V4 == 1,])
    n123 = nrow(x[x$V1 == 1 & x$V2 == 1 & x$V3 == 1 ,])
    n124 = nrow(x[x$V1 == 1 & x$V2 == 1 & x$V4 == 1 ,])
    n134 = nrow(x[x$V1 == 1 & x$V3 == 1 & x$V4 == 1 ,])
    n234 = nrow(x[x$V2 == 1 & x$V3 == 1 & x$V4 == 1 ,])
    n1234 = nrow(x[x$V1 == 1 & x$V2 == 1 & x$V3 == 1 & x$V4 == 1 ,])
    if(return.unique == T){
      n123 <- n123-n1234
      n124 <- n124-n1234
      n234 <- n234-n1234
      n134 <- n134-n1234
      n12 <- n12-(n123+n124+n1234)
      n13 <- n13-(n123+n134+n1234)
      n14 <- n14-(n124+n134+n1234)
      n23 <- n23-(n123+n234+n1234)
      n24 <- n24-(n124+n234+n1234)
      n34 <- n34-(n134+n234+n1234)
      area1 <- area1-(n12+n13+n14+n123+n124+n134+n1234)
      area2 <- area2-(n12+n23+n24+n123+n124+n234+n1234)
      area3 <- area3-(n13+n23+n34+n123+n134+n234+n1234)     
      area4 <- area4-(n14+n24+n34+n124+n134+n234+n1234)     }
      result <- cbind.data.frame(area1,area2,area3,area4,n12,n13,n14,n23,n24,n34,n123,n124,n134,n234,n1234)
      colnames(result) <- c(cnames[1],cnames[2],cnames[3],cnames[4],gsub(" ","",paste(cnames[1],"&",cnames[2])),gsub(" ","",paste(cnames[1],"&",cnames[3])),gsub(" ","",paste(cnames[1],"&",cnames[4])),gsub(" ","",paste(cnames[2],"&",cnames[3])),gsub(" ","",paste(cnames[2],"&",cnames[4])),gsub(" ","",paste(cnames[3],"&",cnames[4])),gsub(" ","",paste(cnames[1],"&",cnames[2],"&",cnames[3])),gsub(" ","",paste(cnames[1],"&",cnames[2],"&",cnames[4])),gsub(" ","",paste(cnames[1],"&",cnames[3],"&",cnames[4])),gsub(" ","",paste(cnames[2],"&",cnames[3],"&",cnames[4])),gsub(" ","",paste(cnames[1],"&",cnames[2],"&",cnames[3],"&",cnames[4])))
      if(make.vector ==T){result <- as.numeric(result)}}
  if(return.unique==T){print("Only unique counts per category are returned")}
  result
  }
  • Function to draw venn diagram
  • Exact same things as the draw.quad.venn function in the VennDiagram package (which may need to be loaded), but it takes a single vector (data) rather than having to enter all 15 positions separately (order still has to be the same as the order of the original function)
  • It wants totals, not unique counts (e.g., for a simple case of two categories with 3 individuals only in C1, two only in C2, and 4 in C1 and C2, you enter c(7, 6, 4))
  • Like the original function, it is makes a 4 way venn diagram
  • Category names should be entered in the same order as the column order input into venn.cat
venn.quad <- function (data, category = rep("", 4), 
          lwd = rep(2, 4), lty = rep("solid", 4), col = rep("black", 
                                                            4), fill = NULL, alpha = rep(0.5, 4), label.col = rep("black", 
                                                                                                                  15), cex = rep(1, 15), fontface = rep("plain", 15), fontfamily = rep("serif", 
                                                                                                                                                                                       15), cat.pos = c(-15, 15, 0, 0), cat.dist = c(0.22, 0.22, 
                                                                                                                                                                                                                                     0.11, 0.11), cat.col = rep("black", 4), cat.cex = rep(1, 
                                                                                                                                                                                                                                                                                           4), cat.fontface = rep("plain", 4), cat.fontfamily = rep("serif", 
                                                                                                                                                                                                                                                                                                                                                    4), cat.just = rep(list(c(0.5, 0.5)), 4), rotation.degree = 0, 
          rotation.centre = c(0.5, 0.5), ind = TRUE, cex.prop = NULL, 
          print.mode = "raw", sigdigs = 3, direct.area = FALSE, area.vector = 0, 
          ...) 
{
 area1 <- data[1]
 area2 <- data[2]
 area3 <- data[3]
 area4 <- data[4]
 n12 <- data[5]
 n13 <- data[6]
 n14 <- data[7]
 n23 <- data[8]
 n24 <- data[9]
 n34 <- data[10]
 n123 <- data[11]
 n124 <- data[12]
 n134 <- data[13]
 n234 <- data[14]
 n1234 <- data[15]
  
 if (length(category) == 1) {
    cat <- rep(category, 4)
  }  else if (length(category) != 4) {
    flog.error("Unexpected parameter length for 'category'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'category'")
  }
  if (length(lwd) == 1) {
    lwd <- rep(lwd, 4)
  }  else if (length(lwd) != 4) {
    flog.error("Unexpected parameter length for 'lwd'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'lwd'")
  }
  if (length(lty) == 1) {
    lty <- rep(lty, 4)
  }  else if (length(lty) != 4) {
    flog.error("Unexpected parameter length for 'lty'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'lty'")
  }
  if (length(col) == 1) {
    col <- rep(col, 4)
  }  else if (length(col) != 4) {
    flog.error("Unexpected parameter length for 'col'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'col'")
  }
  if (length(label.col) == 1) {
    label.col <- rep(label.col, 15)
  }  else if (length(label.col) != 15) {
    flog.error("Unexpected parameter length for 'label.col'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'label.col'")
  }
  if (length(cex) == 1) {
    cex <- rep(cex, 15)
  }  else if (length(cex) != 15) {
    flog.error("Unexpected parameter length for 'cex'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cex'")
  }
  if (length(fontface) == 1) {
    fontface <- rep(fontface, 15)
  }  else if (length(fontface) != 15) {
    flog.error("Unexpected parameter length for 'fontface'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'fontface'")
  }
  if (length(fontfamily) == 1) {
    fontfamily <- rep(fontfamily, 15)
  }  else if (length(fontfamily) != 15) {
    flog.error("Unexpected parameter length for 'fontfamily'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'fontfamily'")
  }
  if (length(fill) == 1) {
    fill <- rep(fill, 4)
  }  else if (length(fill) != 4 & length(fill) != 0) {
    flog.error("Unexpected parameter length for 'fill'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'fill'")
  }
  if (length(alpha) == 1) {
    alpha <- rep(alpha, 4)
  }  else if (length(alpha) != 4 & length(alpha) != 0) {
    flog.error("Unexpected parameter length for 'alpha'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'alpha'")
  }
  if (length(cat.pos) == 1) {
    cat.pos <- rep(cat.pos, 4)
  }  else if (length(cat.pos) != 4) {
    flog.error("Unexpected parameter length for 'cat.pos'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.pos'")
  }
  if (length(cat.dist) == 1) {
    cat.dist <- rep(cat.dist, 4)
  }  else if (length(cat.dist) != 4) {
    flog.error("Unexpected parameter length for 'cat.dist'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.dist'")
  }
  if (length(cat.col) == 1) {
    cat.col <- rep(cat.col, 4)
  }  else if (length(cat.col) != 4) {
    flog.error("Unexpected parameter length for 'cat.col'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.col'")
  }
  if (length(cat.cex) == 1) {
    cat.cex <- rep(cat.cex, 4)
  }  else if (length(cat.cex) != 4) {
    flog.error("Unexpected parameter length for 'cat.cex'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.cex'")
  }
  if (length(cat.fontface) == 1) {
    cat.fontface <- rep(cat.fontface, 4)
  }  else if (length(cat.fontface) != 4) {
    flog.error("Unexpected parameter length for 'cat.fontface'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.fontface'")
  }
  if (length(cat.fontfamily) == 1) {
    cat.fontfamily <- rep(cat.fontfamily, 4)
  }  else if (length(cat.fontfamily) != 4) {
    flog.error("Unexpected parameter length for 'cat.fontfamily'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.fontfamily'")
  }
  if (!(class(cat.just) == "list" & length(cat.just) == 4 & 
        length(cat.just[[1]]) == 2 & length(cat.just[[2]]) == 
        2 & length(cat.just[[3]]) == 2 & length(cat.just[[4]]) == 
        2)) {
    flog.error("Unexpected parameter format for 'cat.just'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter format for 'cat.just'")
  }
  cat.pos <- cat.pos + rotation.degree
  if (direct.area) {
    areas <- area.vector
    for (i in 1:15) {
      assign(paste("a", i, sep = ""), area.vector[i])
    }
  }  else {
    a6 <- n1234
    a12 <- n123 - a6
    a11 <- n124 - a6
    a5 <- n134 - a6
    a7 <- n234 - a6
    a15 <- n12 - a6 - a11 - a12
    a4 <- n13 - a6 - a5 - a12
    a10 <- n14 - a6 - a5 - a11
    a13 <- n23 - a6 - a7 - a12
    a8 <- n24 - a6 - a7 - a11
    a2 <- n34 - a6 - a5 - a7
    a9 <- area1 - a4 - a5 - a6 - a10 - a11 - a12 - a15
    a14 <- area2 - a6 - a7 - a8 - a11 - a12 - a13 - a15
    a1 <- area3 - a2 - a4 - a5 - a6 - a7 - a12 - a13
    a3 <- area4 - a2 - a5 - a6 - a7 - a8 - a10 - a11
    areas <- c(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, 
               a12, a13, a14, a15)
  }
  areas.error <- c("a1  <- area3 - a2 - a4 - a5 - a6 - a7 - a12 - a13", 
                   "a2  <- n34 - a6 - a5 - a7", "a3  <- area4 - a2 - a5 - a6 - a7 - a8 - a10 - a11", 
                   "a4  <- n13 - a6 - a5 - a12", "a5  <- n134 - a6", "a6  <- n1234", 
                   "a7  <- n234 - a6", "a8  <- n24 - a6 - a7 - a11", "a9  <- area1 - a4 - a5 - a6 - a10 - a11 - a12 - a15", 
                   "a10 <- n14 - a6 - a5 - a11", "a11 <- n124 - a6", "a12 <- n123 - a6", 
                   "a15 <- n12 - a6 - a11 - a12", "a13 <- n23 - a6 - a7 - a12", 
                   "a14 <- area2 - a6 - a7 - a8 - a11 - a12 - a13 - a15")
  for (i in 1:length(areas)) {
    if (areas[i] < 0) {
      flog.error(paste("Impossible:", areas.error[i], "produces negative area"), 
                 name = "VennDiagramLogger")
      stop(paste("Impossible:", areas.error[i], "produces negative area"))
    }
  }
  if (length(cex.prop) > 0) {
    if (length(cex.prop) != 1) {
      flog.error("Value passed to cex.prop is not length 1", 
                 name = "VennDiagramLogger")
      stop("Value passed to cex.prop is not length 1")
    }
    func = cex.prop
    if (class(cex.prop) != "function") {
      if (cex.prop == "lin") {
        func = function(x) x
      }      else if (cex.prop == "log10") {
        func = log10
      }      else flog.error(paste0("Unknown value passed to cex.prop: ", 
                             cex.prop), name = "VennDiagramLogger")
      stop(paste0("Unknown value passed to cex.prop: ", 
                  cex.prop))
    }
    maxArea = max(areas)
    for (i in 1:length(areas)) {
      cex[i] = cex[i] * func(areas[i])/func(maxArea)
      if (cex[i] <= 0) 
        stop(paste0("Error in rescaling of area labels: the label of area ", 
                    i, " is less than or equal to zero"))
    }
  }
  grob.list <- gList()
  ellipse.positions <- matrix(nrow = 4, ncol = 7)
  colnames(ellipse.positions) <- c("x", "y", "a", "b", "rotation", 
                                   "fill.mapping", "line.mapping")
  ellipse.positions[1, ] <- c(0.65, 0.47, 0.35, 0.2, 45, 2, 
                              2)
  ellipse.positions[2, ] <- c(0.35, 0.47, 0.35, 0.2, 135, 1, 
                              1)
  ellipse.positions[3, ] <- c(0.5, 0.57, 0.33, 0.15, 45, 4, 
                              4)
  ellipse.positions[4, ] <- c(0.5, 0.57, 0.35, 0.15, 135, 3, 
                              3)
  for (i in 1:4) {
    grob.list <- gList(grob.list, VennDiagram::ellipse(x = ellipse.positions[i, 
                                                                             "x"], y = ellipse.positions[i, "y"], a = ellipse.positions[i, 
                                                                                                                                        "a"], b = ellipse.positions[i, "b"], rotation = ellipse.positions[i, 
                                                                                                                                                                                                          "rotation"], gp = gpar(lty = 0, fill = fill[ellipse.positions[i, 
                                                                                                                                                                                                                                                                        "fill.mapping"]], alpha = alpha[ellipse.positions[i, 
                                                                                                                                                                                                                                                                                                                          "fill.mapping"]])))
  }
  for (i in 1:4) {
    grob.list <- gList(grob.list, ellipse(x = ellipse.positions[i, 
                                                                "x"], y = ellipse.positions[i, "y"], a = ellipse.positions[i, 
                                                                                                                           "a"], b = ellipse.positions[i, "b"], rotation = ellipse.positions[i, 
                                                                                                                                                                                             "rotation"], gp = gpar(lwd = lwd[ellipse.positions[i, 
                                                                                                                                                                                                                                                "line.mapping"]], lty = lty[ellipse.positions[i, 
                                                                                                                                                                                                                                                                                              "line.mapping"]], col = col[ellipse.positions[i, 
                                                                                                                                                                                                                                                                                                                                            "line.mapping"]], fill = "transparent")))
  }
  label.matrix <- matrix(nrow = 15, ncol = 3)
  colnames(label.matrix) <- c("label", "x", "y")
  label.matrix[1, ] <- c(a1, 0.35, 0.77)
  label.matrix[2, ] <- c(a2, 0.5, 0.69)
  label.matrix[3, ] <- c(a3, 0.65, 0.77)
  label.matrix[4, ] <- c(a4, 0.31, 0.67)
  label.matrix[5, ] <- c(a5, 0.4, 0.58)
  label.matrix[6, ] <- c(a6, 0.5, 0.47)
  label.matrix[7, ] <- c(a7, 0.6, 0.58)
  label.matrix[8, ] <- c(a8, 0.69, 0.67)
  label.matrix[9, ] <- c(a9, 0.18, 0.58)
  label.matrix[10, ] <- c(a10, 0.32, 0.42)
  label.matrix[11, ] <- c(a11, 0.425, 0.38)
  label.matrix[12, ] <- c(a12, 0.575, 0.38)
  label.matrix[13, ] <- c(a13, 0.68, 0.42)
  label.matrix[14, ] <- c(a14, 0.82, 0.58)
  label.matrix[15, ] <- c(a15, 0.5, 0.28)
  processedLabels <- rep("", length(label.matrix[, "label"]))
  if (print.mode[1] == "percent") {
    processedLabels <- paste(signif(label.matrix[, "label"]/sum(label.matrix[, 
                                                                             "label"]) * 100, digits = sigdigs), "%", sep = "")
    if (isTRUE(print.mode[2] == "raw")) {
      processedLabels <- paste(processedLabels, "\\n(", 
                               label.matrix[, "label"], ")", sep = "")
    }
  }
  if (print.mode[1] == "raw") {
    processedLabels <- label.matrix[, "label"]
    if (isTRUE(print.mode[2] == "percent")) {
      processedLabels <- paste(processedLabels, "\\n(", 
                               paste(signif(label.matrix[, "label"]/sum(label.matrix[, 
                                                                                     "label"]) * 100, digits = sigdigs), "%)", sep = ""), 
                               sep = "")
    }
  }
  for (i in 1:nrow(label.matrix)) {
    grob.list <- gList(grob.list, textGrob(label = processedLabels[i], 
                                           x = label.matrix[i, "x"], y = label.matrix[i, "y"], 
                                           gp = gpar(col = label.col[i], cex = cex[i], fontface = fontface[i], 
                                                     fontfamily = fontfamily[i])))
  }
  cat.pos.x <- c(0.18, 0.82, 0.35, 0.65)
  cat.pos.y <- c(0.58, 0.58, 0.77, 0.77)
  for (i in 1:4) {
    this.cat.pos <- find.cat.pos(x = cat.pos.x[i], y = cat.pos.y[i], 
                                 pos = cat.pos[i], dist = cat.dist[i])
    grob.list <- gList(grob.list, textGrob(label = category[i], 
                                           x = this.cat.pos$x, y = this.cat.pos$y, just = cat.just[[i]], 
                                           gp = gpar(col = cat.col[i], cex = cat.cex[i], fontface = cat.fontface[i], 
                                                     fontfamily = cat.fontfamily[i])))
  }
  grob.list <- VennDiagram::adjust.venn(VennDiagram::rotate.venn.degrees(grob.list, 
                                                                         rotation.degree, rotation.centre[1], rotation.centre[2]), 
                                        ...)
  if (ind) {
    grid.draw(grob.list)
  }
  return(grob.list)
}

venn.trip <- function(data, category = rep("", 
                                                                   3), rotation = 1, reverse = FALSE, euler.d = TRUE, scaled = TRUE, 
          lwd = rep(2, 3), lty = rep("solid", 3), col = rep("black", 
                                                            3), fill = NULL, alpha = rep(0.5, 3), label.col = rep("black", 
                                                                                                                  7), cex = rep(1, 7), fontface = rep("plain", 7), fontfamily = rep("serif", 
                                                                                                                                                                                    7), cat.pos = c(-40, 40, 180), cat.dist = c(0.05, 0.05, 
                                                                                                                                                                                                                                0.025), cat.col = rep("black", 3), cat.cex = rep(1, 3), 
          cat.fontface = rep("plain", 3), cat.fontfamily = rep("serif", 
                                                               3), cat.just = list(c(0.5, 1), c(0.5, 1), c(0.5, 0)), 
          cat.default.pos = "outer", cat.prompts = FALSE, rotation.degree = 0, 
          rotation.centre = c(0.5, 0.5), ind = TRUE, sep.dist = 0.05, 
          offset = 0, cex.prop = NULL, print.mode = "raw", sigdigs = 3, 
          direct.area = FALSE, area.vector = 0, ...) 
{
  area1 <- data[1]
  area2 <- data[2]
  area3 <- data[3]
  n12 <- data[4]
  n13 <- data[5]
  n23 <- data[6]
  n123 <- data[7]

  
  
  if (length(category) == 1) {
    cat <- rep(category, 3)
  }
  else if (length(category) != 3) {
    flog.error("Unexpected parameter length for 'category'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'category'")
  }
  if (length(lwd) == 1) {
    lwd <- rep(lwd, 3)
  }
  else if (length(lwd) != 3) {
    flog.error("Unexpected parameter length for 'lwd'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'lwd'")
  }
  if (length(lty) == 1) {
    lty <- rep(lty, 3)
  }
  else if (length(lty) != 3) {
    flog.error("Unexpected parameter length for 'lty'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'lty'")
  }
  if (length(col) == 1) {
    col <- rep(col, 3)
  }
  else if (length(col) != 3) {
    flog.error("Unexpected parameter length for 'col'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'col'")
  }
  if (length(label.col) == 1) {
    label.col <- rep(label.col, 7)
  }
  else if (length(label.col) != 7) {
    flog.error("Unexpected parameter length for 'label.col'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'label.col'")
  }
  if (length(cex) == 1) {
    cex <- rep(cex, 7)
  }
  else if (length(cex) != 7) {
    flog.error("Unexpected parameter length for 'cex'", name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cex'")
  }
  if (length(fontface) == 1) {
    fontface <- rep(fontface, 7)
  }
  else if (length(fontface) != 7) {
    flog.error("Unexpected parameter length for 'fontface'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'fontface'")
  }
  if (length(fontfamily) == 1) {
    fontfamily <- rep(fontfamily, 7)
  }
  else if (length(fontfamily) != 7) {
    flog.error("Unexpected parameter length for 'fontfamily'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'fontfamily'")
  }
  if (length(fill) == 1) {
    fill <- rep(fill, 3)
  }
  else if (length(fill) != 3 & length(fill) != 0) {
    flog.error("Unexpected parameter length for 'fill'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'fill'")
  }
  if (length(alpha) == 1) {
    alpha <- rep(alpha, 3)
  }
  else if (length(alpha) != 3 & length(alpha) != 0) {
    flog.error("Unexpected parameter length for 'alpha'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'alpha'")
  }
  if (length(cat.pos) == 1) {
    cat.pos <- rep(cat.pos, 3)
  }
  else if (length(cat.pos) != 3) {
    flog.error("Unexpected parameter length for 'cat.pos'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.pos'")
  }
  if (length(cat.dist) == 1) {
    cat.dist <- rep(cat.dist, 3)
  }
  else if (length(cat.dist) != 3) {
    flog.error("Unexpected parameter length for 'cat.dist'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.dist'")
  }
  if (length(cat.col) == 1) {
    cat.col <- rep(cat.col, 3)
  }
  else if (length(cat.col) != 3) {
    flog.error("Unexpected parameter length for 'cat.col'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.col'")
  }
  if (length(cat.cex) == 1) {
    cat.cex <- rep(cat.cex, 3)
  }
  else if (length(cat.cex) != 3) {
    flog.error("Unexpected parameter length for 'cat.cex'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.cex'")
  }
  if (length(cat.fontface) == 1) {
    cat.fontface <- rep(cat.fontface, 3)
  }
  else if (length(cat.fontface) != 3) {
    flog.error("Unexpected parameter length for 'cat.fontface'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.fontface'")
  }
  if (length(cat.fontfamily) == 1) {
    cat.fontfamily <- rep(cat.fontfamily, 3)
  }
  else if (length(cat.fontfamily) != 3) {
    flog.error("Unexpected parameter length for 'cat.fontfamily'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter length for 'cat.fontfamily'")
  }
  if (!(class(cat.just) == "list" & length(cat.just) == 3)) {
    flog.error("Unexpected parameter format for 'cat.just'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter format for 'cat.just'")
  }
  else if (!(length(cat.just[[1]]) == 2 & length(cat.just[[2]]) == 
             2 & length(cat.just[[3]]) == 2)) {
    flog.error("Unexpected parameter format for 'cat.just'", 
               name = "VennDiagramLogger")
    stop("Unexpected parameter format for 'cat.just'")
  }
  if (euler.d == FALSE & scaled == TRUE) {
    flog.error("Uninterpretable parameter combination\\nPlease set both euler.d = FALSE and scaled = FALSE to force Venn diagrams.", 
               name = "VennDiagramLogger")
    stop("Uninterpretable parameter combination\\nPlease set both euler.d = FALSE and scaled = FALSE to force Venn diagrams.")
  }
  if (offset > 1 | offset < 0) {
    flog.error("'Offset' must be between 0 and 1.  Try using 'rotation.degree = 180' to achieve offsets in the opposite direction.", 
               name = "VennDiagramLogger")
    stop("'Offset' must be between 0 and 1.  Try using 'rotation.degree = 180' to achieve offsets in the opposite direction.")
  }
  cat.pos <- cat.pos + rotation.degree
  if (direct.area) {
    areas <- area.vector
    for (i in 1:7) {
      assign(paste("a", i, sep = ""), area.vector[i])
    }
  }
  else {
    a1 <- area1 - n12 - n13 + n123
    a2 <- n12 - n123
    a3 <- area2 - n12 - n23 + n123
    a4 <- n13 - n123
    a5 <- n123
    a6 <- n23 - n123
    a7 <- area3 - n13 - n23 + n123
    areas <- c(a1, a2, a3, a4, a5, a6, a7)
  }
  if (euler.d) {
    special.code <- VennDiagram::decide.special.case(areas)
    if (special.code %in% c("121AO", "100", "033", "011A", 
                            "021AA", "022AAOO", "011O", "112AA", "122AAOO", "010", 
                            "110", "130", "001", "012AA", "120", "022AAAO", "032", 
                            "111A", "023")) {
      if (special.code %in% c("022AAAO", "022AAOO", "023", 
                              "032", "120", "121AO", "122AAOO", "130")) {
        f1 <- VennDiagram::draw.sp.case.scaled
      }
      else {
        f1 <- VennDiagram::draw.sp.case.preprocess
      }
      rst <- f1(sp.case.name = special.code, a1 = areas[1], 
                a2 = areas[2], a3 = areas[3], a4 = areas[4], 
                a5 = areas[5], a6 = areas[6], a7 = areas[7], 
                category = category, reverse = reverse, cat.default.pos = cat.default.pos, 
                lwd = lwd, lty = lty, col = col, label.col = label.col, 
                cex = cex, fontface = fontface, fontfamily = fontfamily, 
                cat.pos = cat.pos, cat.dist = cat.dist, cat.col = cat.col, 
                cat.cex = cat.cex, cat.fontface = cat.fontface, 
                cat.fontfamily = cat.fontfamily, cat.just = cat.just, 
                cat.prompts = cat.prompts, fill = fill, alpha = alpha, 
                print.mode = print.mode, sigdigs = sigdigs, ...)
      rst <- VennDiagram::adjust.venn(VennDiagram::rotate.venn.degrees(gList1 = rst, 
                                                                       angle = rotation.degree, x.centre = rotation.centre[1], 
                                                                       y.centre = rotation.centre[2]), ...)
      if (ind) {
        grid.draw(rst)
      }
      return(rst)
    }
  }
  rotated <- VennDiagram::rotate(areas, category, lwd, lty, 
                                 col, label.col, cex, fontface, fontfamily, cat.col, cat.cex, 
                                 cat.fontface, cat.fontfamily, alpha, rotation, reverse, 
                                 fill)
  for (i in 1:length(areas)) {
    areas[i] <- rotated[[1]][i]
  }
  category <- rotated[[2]]
  lwd <- rotated$lwd
  lty <- rotated$lty
  col <- rotated$col
  label.col <- rotated$label.col
  cex <- rotated$cex
  fontface <- rotated$fontface
  fontfamily <- rotated$fontfamily
  cat.col <- rotated$cat.col
  cat.cex <- rotated$cat.cex
  cat.fontface <- rotated$cat.fontface
  cat.fontfamily <- rotated$cat.fontfamily
  fill <- rotated$fill
  alpha <- rotated$alpha
  areas.error <- c("a1 <- area1 - n12 - n13 + n123", "a2 <- n12 - n123", 
                   "a3 <- area2 - n12 - n23 + n123", "a4 <- n13 - n123", 
                   "a5 <- n123", "a6 <- n23 - n123", "a7 <- area3 - n13 - n23 + n123")
  for (i in 1:length(areas)) {
    if (areas[i] < 0) {
      flog.error(paste("Impossible:", areas.error[i], "produces negative area"), 
                 name = "VennDiagramLogger")
      stop(paste("Impossible:", areas.error[i], "produces negative area"))
    }
  }
  for (i in 1:length(areas)) {
    if (areas[i]) {
      scaled <- FALSE
    }
  }
  is.defaults <- TRUE
  if (is.expression(category)) {
    is.defaults <- FALSE
  }
  if (all(cat.default.pos != "outer", cat.default.pos != "text", 
          !is.defaults, cat.prompts)) {
    flog.info("No default location recognized.  Automatically changing to 'outer'", 
              name = "VennDiagramLogger")
    cat.default.pos <- "outer"
  }
  if (all(cat.default.pos == "outer", !is.defaults, cat.prompts)) {
    flog.info("Placing category labels at default outer locations.  Use 'cat.pos' and 'cat.dist' to modify location.", 
              name = "VennDiagramLogger")
    flog.info(paste("Current 'cat.pos':", cat.pos[1], "degrees,", 
                    cat.pos[2], "degrees"), name = "VennDiagramLogger")
    flog.info(paste("Current 'cat.dist':", cat.dist[1], ",", 
                    cat.dist[2]), name = "VennDiagramLogger")
  }
  if (all(cat.default.pos == "text", !is.defaults, cat.prompts)) {
    flog.info("Placing category labels at default text locations.  Use 'cat.pos' and 'cat.dist' to modify location.", 
              name = "VennDiagramLogger")
    flog.info(paste("Current 'cat.pos':", cat.pos[1], "degrees,", 
                    cat.pos[2], "degrees"), name = "VennDiagramLogger")
    flog.info(paste("Current 'cat.dist':", cat.dist[1], ",", 
                    cat.dist[2]), name = "VennDiagramLogger")
  }
  grob.list <- gList()
  if (!exists("overrideTriple")) {
    r1 <- sqrt(100/pi)
    r2 <- r1
    r3 <- r1
  }
  else {
    r1 <- sqrt(area1/pi)
    r2 <- sqrt(area2/pi)
    r3 <- sqrt(area3/pi)
  }
  max.circle.size = 0.2
  shrink.factor <- max.circle.size/r1
  r1 <- r1 * shrink.factor
  r2 <- r2 * shrink.factor
  r3 <- r3 * shrink.factor
  if (!exists("overrideTriple")) {
    a <- find.dist(100, 100, 40) * shrink.factor
    b <- a
    c <- a
  }
  else {
    a <- find.dist(area1, area2, n12) * shrink.factor
    b <- find.dist(area2, area3, n23) * shrink.factor
    c <- find.dist(area1, area3, n13) * shrink.factor
  }
  x.centres <- vector(mode = "numeric", length = 3)
  y.centres <- vector(mode = "numeric", length = 3)
  beta <- (a^2 + c^2 - b^2)/(2 * a * c)
  gamma <- sqrt(1 - beta^2)
  x.centres[1] <- (r1 - r2 - a + 1)/2
  x.centres[3] <- x.centres[1] + c * beta
  y.centres[3] <- (r3 - r1 + 1 - c * gamma)/2
  y.centres[1] <- y.centres[3] + c * gamma
  x.centres[2] <- x.centres[1] + a
  y.centres[2] <- y.centres[1]
  radii <- c(r1, r2, r3)
  for (i in 1:3) {
    grob.list <- gList(grob.list, VennDiagram::ellipse(x = x.centres[i], 
                                                       y = y.centres[i], a = radii[i], b = radii[i], gp = gpar(lty = 0, 
                                                                                                               fill = fill[i], alpha = alpha[i])))
  }
  for (i in 1:3) {
    grob.list <- gList(grob.list, VennDiagram::ellipse(x = x.centres[i], 
                                                       y = y.centres[i], a = radii[i], b = radii[i], gp = gpar(lwd = lwd[i], 
                                                                                                               lty = lty[i], col = col[i], fill = "transparent")))
  }
  new.x.centres <- vector(mode = "numeric", length = 3)
  new.y.centres <- vector(mode = "numeric", length = 3)
  cell.labels <- areas
  cell.x <- vector(mode = "numeric", length = 7)
  cell.y <- vector(mode = "numeric", length = 7)
  x.cept.12 <- (r1^2 - r2^2 - x.centres[1]^2 + x.centres[2]^2)/(2 * 
                                                                  (x.centres[2] - x.centres[1]))
  y.cept.12.1 <- sqrt(r1^2 - (x.cept.12 - x.centres[1])^2) + 
    y.centres[1]
  y.cept.12.2 <- -sqrt(r1^2 - (x.cept.12 - x.centres[1])^2) + 
    y.centres[1]
  theta <- acos((a^2 + c^2 - b^2)/(2 * a * c))
  new.x.centres[3] <- x.centres[1] + c
  l.x.cept.13 <- (r1^2 - r3^2 - x.centres[1]^2 + new.x.centres[3]^2)/(2 * 
                                                                        (new.x.centres[3] - x.centres[1]))
  l.y.cept.13.1 <- sqrt(r1^2 - (l.x.cept.13 - x.centres[1])^2) + 
    y.centres[1]
  l.y.cept.13.2 <- -sqrt(r1^2 - (l.x.cept.13 - x.centres[1])^2) + 
    y.centres[1]
  rot <- sqrt(2 * r1^2 - 2 * r1^2 * cos(theta))
  x.cept.13.1 <- l.x.cept.13 + rot * cos(pi/2 - atan((l.y.cept.13.1 - 
                                                        y.centres[1])/(l.x.cept.13 - x.centres[1])) + theta/2)
  x.cept.13.2 <- l.x.cept.13 + rot * cos(pi/2 - atan((l.y.cept.13.2 - 
                                                        y.centres[1])/(l.x.cept.13 - x.centres[1])) + theta/2)
  y.cept.13.1 <- l.y.cept.13.1 - rot * sin(pi/2 - atan((l.y.cept.13.1 - 
                                                          y.centres[1])/(l.x.cept.13 - x.centres[1])) + theta/2)
  y.cept.13.2 <- l.y.cept.13.2 - rot * sin(pi/2 - atan((l.y.cept.13.2 - 
                                                          y.centres[1])/(l.x.cept.13 - x.centres[1])) + theta/2)
  theta <- -acos((a^2 + b^2 - c^2)/(2 * a * b))
  new.x.centres[3] <- x.centres[2] - b
  l.x.cept.23 <- (r2^2 - r3^2 - x.centres[2]^2 + new.x.centres[3]^2)/(2 * 
                                                                        (new.x.centres[3] - x.centres[2]))
  l.y.cept.23.1 <- sqrt(r2^2 - (l.x.cept.23 - x.centres[2])^2) + 
    y.centres[2]
  l.y.cept.23.2 <- -sqrt(r2^2 - (l.x.cept.23 - x.centres[2])^2) + 
    y.centres[2]
  rot <- sqrt(2 * r2^2 - 2 * r2^2 * cos(theta))
  x.cept.23.1 <- l.x.cept.23 + rot * cos(pi/2 - atan((y.centres[2] - 
                                                        l.y.cept.23.1)/(x.centres[2] - l.x.cept.23)) + theta/2)
  x.cept.23.2 <- l.x.cept.23 + rot * cos(pi/2 - atan((y.centres[2] - 
                                                        l.y.cept.23.2)/(x.centres[2] - l.x.cept.23)) + theta/2)
  y.cept.23.1 <- l.y.cept.23.1 - rot * sin(pi/2 - atan((y.centres[2] - 
                                                          l.y.cept.23.1)/(x.centres[2] - l.x.cept.23)) + theta/2)
  y.cept.23.2 <- l.y.cept.23.2 - rot * sin(pi/2 - atan((y.centres[2] - 
                                                          l.y.cept.23.2)/(x.centres[2] - l.x.cept.23)) + theta/2)
  m <- (y.cept.23.2 - y.cept.23.1)/(x.cept.23.2 - x.cept.23.1)
  y.sect <- m * (x.cept.12 - x.cept.23.1) + y.cept.23.1
  cell.x[5] <- x.cept.12
  cell.y[5] <- y.sect
  m <- (y.cept.13.2 - y.cept.13.1)/(x.cept.13.2 - x.cept.13.1)
  y0 <- y.centres[2]
  x0 <- x.centres[2]
  b <- y.cept.13.1 - m * x.cept.13.1
  x.sect <- (m * y0 + x0 - m * b)/(m^2 + 1) + sqrt(r2^2 - ((y0 - 
                                                              m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  y.sect <- (m^2 * y0 + m * x0 + b)/(m^2 + 1) + m * sqrt(r2^2 - 
                                                           ((y0 - m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  cell.x[3] <- (x.cept.13.1 + x.sect)/2
  cell.y[3] <- (y.cept.13.1 + y.sect)/2
  m <- (y.cept.23.2 - y.cept.23.1)/(x.cept.23.2 - x.cept.23.1)
  y0 <- y.centres[1]
  x0 <- x.centres[1]
  b <- y.cept.23.1 - m * x.cept.23.1
  x.sect <- (m * y0 + x0 - m * b)/(m^2 + 1) - sqrt(r1^2 - ((y0 - 
                                                              m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  y.sect <- (m^2 * y0 + m * x0 + b)/(m^2 + 1) - m * sqrt(r1^2 - 
                                                           ((y0 - m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  cell.x[1] <- (x.cept.23.1 + x.sect)/2
  cell.y[1] <- (y.cept.23.1 + y.sect)/2
  y.sect <- -sqrt(r3^2 - (x.cept.12 - x.centres[3])^2) + y.centres[3]
  cell.x[7] <- x.cept.12
  cell.y[7] <- (y.cept.12.2 + y.sect)/2
  m <- (y.cept.23.2 - y.cept.23.1)/(x.cept.23.2 - x.cept.23.1)
  y0 <- y.centres[1]
  x0 <- x.centres[1]
  b <- y.cept.23.1 - m * x.cept.23.1
  x.sect <- (m * y0 + x0 - m * b)/(m^2 + 1) + sqrt(r1^2 - ((y0 - 
                                                              m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  y.sect <- (m^2 * y0 + m * x0 + b)/(m^2 + 1) + m * sqrt(r1^2 - 
                                                           ((y0 - m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  cell.x[6] <- (x.cept.23.2 + x.sect)/2
  cell.y[6] <- (y.cept.23.2 + y.sect)/2
  m <- (y.cept.13.2 - y.cept.13.1)/(x.cept.13.2 - x.cept.13.1)
  y0 <- y.centres[2]
  x0 <- x.centres[2]
  b <- y.cept.13.1 - m * x.cept.13.1
  x.sect <- (m * y0 + x0 - m * b)/(m^2 + 1) - sqrt(r2^2 - ((y0 - 
                                                              m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  y.sect <- (m^2 * y0 + m * x0 + b)/(m^2 + 1) - m * sqrt(r2^2 - 
                                                           ((y0 - m * x0 - b)/sqrt(1 + m^2))^2)/sqrt(1 + m^2)
  cell.x[4] <- (x.cept.13.2 + x.sect)/2
  cell.y[4] <- (y.cept.13.2 + y.sect)/2
  y.sect <- sqrt(r3^2 - (x.cept.12 - x.centres[3])^2) + y.centres[3]
  cell.x[2] <- x.cept.12
  cell.y[2] <- (y.cept.12.1 + y.sect)/2
  if (length(cex.prop) > 0) {
    if (length(cex.prop) != 1) {
      flog.error("Value passed to cex.prop is not length 1", 
                 name = "VennDiagramLogger")
      stop("Value passed to cex.prop is not length 1")
    }
    func = cex.prop
    if (class(cex.prop) != "function") {
      if (cex.prop == "lin") {
        func = function(x) x
      }
      else if (cex.prop == "log10") {
        func = log10
      }
      else flog.error(paste0("Unknown value passed to cex.prop: ", 
                             cex.prop), name = "VennDiagramLogger")
      stop(paste0("Unknown value passed to cex.prop: ", 
                  cex.prop))
    }
    maxArea = max(areas)
    for (i in 1:length(areas)) {
      cex[i] = cex[i] * func(areas[i])/func(maxArea)
      if (cex[i] <= 0) 
        stop(paste0("Error in rescaling of area labels: the label of area ", 
                    i, " is less than or equal to zero"))
    }
  }
  processedLabels <- rep("", length(cell.labels))
  if (print.mode[1] == "percent") {
    processedLabels <- paste(signif(cell.labels/sum(cell.labels) * 
                                      100, digits = sigdigs), "%", sep = "")
    if (isTRUE(print.mode[2] == "raw")) {
      processedLabels <- paste(processedLabels, "\\n(", 
                               cell.labels, ")", sep = "")
    }
  }
  if (print.mode[1] == "raw") {
    processedLabels <- cell.labels
    if (isTRUE(print.mode[2] == "percent")) {
      processedLabels <- paste(processedLabels, "\\n(", 
                               paste(signif(cell.labels/sum(cell.labels) * 100, 
                                            digits = sigdigs), "%)", sep = ""), sep = "")
    }
  }
  for (i in 1:7) {
    grob.list <- gList(grob.list, textGrob(label = processedLabels[i], 
                                           x = cell.x[i], y = cell.y[i], gp = gpar(col = label.col[i], 
                                                                                   cex = cex[i], fontface = fontface[i], fontfamily = fontfamily[i])))
  }
  text.location.mapping <- c(1, 3, 7)
  for (i in 1:3) {
    if ("outer" == cat.default.pos) {
      this.cat.pos <- find.cat.pos(x = x.centres[i], y = y.centres[i], 
                                   pos = cat.pos[i], dist = cat.dist[i], r = radii[i])
    }
    else if ("text" == cat.default.pos) {
      this.cat.pos <- find.cat.pos(x = cell.x[text.location.mapping[i]], 
                                   y = cell.y[text.location.mapping[i]], pos = cat.pos[i], 
                                   dist = cat.dist[i])
    }
    else {
      flog.error("Invalid setting of cat.default.pos", 
                 name = "VennDiagramLogger")
      stop("Invalid setting of cat.default.pos")
    }
    grob.list <- gList(grob.list, textGrob(label = category[i], 
                                           x = this.cat.pos$x, y = this.cat.pos$y, just = cat.just[[i]], 
                                           gp = gpar(col = cat.col[i], cex = cat.cex[i], fontface = cat.fontface[i], 
                                                     fontfamily = cat.fontfamily[i])))
  }
  grob.list <- VennDiagram::adjust.venn(VennDiagram::rotate.venn.degrees(gList1 = grob.list, 
                                                                         angle = rotation.degree, x.centre = rotation.centre[1], 
                                                                         y.centre = rotation.centre[2]), ...)
  if (ind) {
    grid.draw(grob.list)
  }
  return(grob.list)
}

2 Mock communities

  • Examine results of 3 Zymo Research mock bacterial communities that were extracted and sequenced alongside the actual samples
mock <- read.csv("Supplemental mock.csv")

ggplot(mock,aes(x=sample,y=proportion.of.reads,fill=species))+
  geom_bar(position="stack", stat="identity")

3 Broad beta and alpha diversity patterns

  • This section contains plots looking at broad patterns
  • The data are still pseudoreplicated, and we cannot run rigorous statistics at this level, but it provides a good visual overview

3.1 PCoAs

  • Ordination plots based on different dissimilarity measures
  • Results are shown both at the ASV and the Family level
  • The ASV plot based on Bray-Curtis was included in the manuscript
#order metadata to match the distance objects
meta <- meta[match(colnames(as.matrix(dists[[1]])),meta$sample.ID),]

for(i in 1:length(dists)){

  meta.i <- meta
  pcoa <- cmdscale(dists[[i]],k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
 plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(shape=location.general,fill=sample.type,stroke=1),color = "black",size=4)+
    scale_shape_manual(values=c(21, 22,24,25))+
    scale_fill_manual(values = c("#cc3a47",rgb(1, 0.7921569, 0.1568627),rgb(0.0078, 0.4392, 0.7490)))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    guides(fill=guide_legend(override.aes=list(colour=c(cloaca = "#cc3a47",plastron = rgb(1, 0.7921569, 0.1568627),water = rgb(0.0078, 0.4392, 0.7490)))))+
    ggtitle(paste(names(dists)[i],"(broad patterns)"))
 
 print(plot)
 
}

3.2 Alpha diversity boxplots

  • These plots are included in manuscript
ggplot(meta,aes(x=type.location.gen.water.type,y=rich,fill=sample.type))+
  geom_boxplot()+
  scale_fill_manual(values = c("#cc3a47",rgb(1, 0.7921569, 0.1568627),rgb(0.0078, 0.4392, 0.7490)))+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45,hjust = 1),axis.title.x=element_blank())+
  ggtitle("Richness all samples (broad patterns)")+
  labs(y="Richness")

ggplot(meta,aes(x=type.location.gen.water.type,y=even,fill=sample.type))+
  geom_boxplot()+
  scale_fill_manual(values = c("#cc3a47",rgb(1, 0.7921569, 0.1568627),rgb(0.0078, 0.4392, 0.7490)))+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45,hjust = 1),axis.title.x=element_blank())+
  ggtitle("Evenness all samples (broad patterns)")+
  labs(y="Evenness")

3.3 Stacked barplots based on taxonomy

3.3.1 Phylum

  • Make stacked bar plots showing the proportion of reads per phylum per sample
  • Only phyla that comprised a mean of at least 0.5% of all reads (after standardizing read depths) are displayed
  • Note that for Cosley cloaca samples, the two clear groupings were driven by the 4 captive adults compared to the 5 captive juveniles
  • Note that the 5 outliers in the Cosely water samples were from the tap water (the remainder were tub samples)
phyla.reads <- as.data.frame(otu_table(phylo.phy)) #extract proportion of reads per phyla per sample

phyla.reads$names <- rownames(phyla.reads)#make column of row reads

phyla.taxa <-  as.data.frame(tax_table(phylo.phy))#extract taxonomic information
phyla.taxa <- cbind.data.frame(Phylum = phyla.taxa$Phylum, names = rownames(phyla.taxa))

#merge data
phyla <- merge(phyla.taxa,phyla.reads,by="names",all.x=T,all.y=T)

phyla <- phyla[,-1]

#extract and set aside the sum of the proportions of reads
phyla.sum.of.reads <- cbind.data.frame(phyla$Phylum,rowSums(phyla[,2:ncol(phyla)]))
#divide by sum of sums (same as number of samples) to get a proportion of reads per phylum based on a standardized read depth per sample
phyla.sum.of.reads[,2] <- phyla.sum.of.reads[,2]/sum(phyla.sum.of.reads[,2])
colnames(phyla.sum.of.reads) <- c("Phylum","proportion.of.reads")

phyla <- melt(phyla,id.vars="Phylum") #convert to long

colnames(phyla)[2:3] <- c("sample.ID","proportion.of.reads")

phyla <- merge(meta,phyla,by="sample.ID",all.x=F,all.y=T) #merge with metadata

#subset to phyla that comprise >= 0.5% of all reads
phyla.5 <- phyla[which(phyla$Phylum %in% phyla.sum.of.reads[phyla.sum.of.reads$proportion.of.reads >= 0.005,"Phylum"] ),]

#subset to phyla that comprise < 0.5% of all reads
phyla.not5 <- phyla[which(phyla$Phylum %in% phyla.sum.of.reads[phyla.sum.of.reads$proportion.of.reads < 0.005,"Phylum"] ),]

#calculate the total proportion comprised by Phyla < 0.5% of reads, compress to a single line with that label and proportion and combine back with the phyla.5 object so that there is now an other entry and sums per smaple are once again 1
phyla.not5.list <- vector("list",length(unique(phyla.not5$sample.ID)))
for(i in 1:length(unique(phyla.not5$sample.ID))){
  samp.i <- phyla.not5[phyla.not5$sample.ID == unique(phyla.not5$sample.ID)[i],]
  res.i <- samp.i[1,]
  res.i$proportion.of.reads <- sum(samp.i$proportion.of.reads)
  res.i$Phylum <- "Other (<0.5%)"
  phyla.not5.list[[i]] <- res.i}
phyla.5 <- rbind.data.frame(phyla.5,do.call("rbind.data.frame",phyla.not5.list))

phyla.5$sample.type <- factor(phyla.5$sample.type,levels=c("water","plastron","cloaca"))

#make a plot based on Phyla that comprise >= 0.5% of reads
ggplot(phyla.5 ,aes(x=sample.ID,y=proportion.of.reads,fill=Phylum))+
  geom_bar(position="stack", stat="identity",color="black",size=.01,width=1)+
  facet_wrap(~sample.type+location.general,scales="free")+
  scale_y_continuous(limits=c(0,1),expand=c(0,0))+
  scale_fill_manual(values=Blue2DarkRed12Steps)+
  theme(axis.text.x=element_blank())

  • Make table with the mean per phyla for all phyla
#calculate means per sample type
phyla.sum <- phyla %>% 
  group_by(Phylum,type.location.gen.water.type) %>% 
  dplyr::summarize(mean=mean(proportion.of.reads)*100)


kable.wrap(dcast(phyla.sum,Phylum~type.location.gen.water.type,value.var="mean"),"Mean percent of reads per sample type at the phylum level")
Mean percent of reads per sample type at the phylum level
Phylum cloaca Cosley cloaca Wild plastron Cosley plastron Shedd plastron Wild water Cosley tap water Cosley tub water Shedd tub water Wild marsh
Acidobacteriota 0.7885399 0.4338130 16.7949935 13.7946677 4.2060332 0.0383891 1.7190429 1.0805369 0.3559808
Actinobacteriota 3.2580510 2.1485013 4.2135910 2.9731629 4.6893745 59.5997896 13.1696882 1.3054438 48.7358021
Armatimonadota 0.0000000 0.0012044 0.0087748 0.0050078 0.0012449 0.0000000 0.1178804 0.1091083 0.0000000
Bacteroidota 37.1877316 36.3969435 9.6039663 24.6256016 31.8338176 0.0226041 20.9433178 23.3130217 10.1562631
Bdellovibrionota 0.4600345 0.0486653 0.0953862 0.0290669 0.0060548 0.0000000 0.3026449 0.3842915 0.0000000
Campylobacterota 0.0000000 0.2585087 0.0000318 0.0000000 0.1257607 0.0000000 0.0000000 0.0000000 1.7556828
Chloroflexi 5.0020590 1.1365833 3.4283898 1.8380270 0.1440052 0.0000000 0.6700357 1.1599499 0.7447344
Cloacimonadota 0.0000000 0.0048836 0.0000000 0.0000000 0.0650809 0.0000000 0.0000000 0.0000000 0.1717663
Crenarchaeota 0.0000000 0.0067977 0.0000000 0.0000000 0.0527163 0.0000000 0.1263982 0.1495346 0.5830938
Cyanobacteria 0.0396349 0.0596836 0.2317553 3.0454046 0.1833735 6.1521309 1.8017801 17.2567134 0.2637289
Deinococcota 2.9082800 0.7082070 30.1980278 18.7938453 20.9592881 0.1578330 2.3072892 1.4714044 0.2196730
Dependentiae 0.0000000 0.0000000 0.0002361 0.0006573 0.0000000 1.6748614 0.2946243 0.3594214 0.0000000
Desulfobacterota 0.0270773 0.6713648 0.0002528 0.0000000 1.8462042 0.0000000 0.0304119 0.0231519 1.7734586
Elusimicrobiota 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0531699 0.0000000
Euryarchaeota 0.0000000 0.0286463 0.0000000 0.0000000 0.4889987 0.0000000 0.0000000 0.0000000 1.5456292
Firmicutes 5.0343795 7.8573337 0.5526502 1.3188153 1.7508956 0.0000000 18.1278717 5.9789523 2.6680599
Fusobacteriota 0.7714508 0.2204275 0.0000000 0.0000000 0.0041961 0.0000000 0.0000000 0.0208755 0.2287156
Gemmatimonadota 0.0000000 0.0010651 0.0000000 0.0004080 0.0028593 0.0000000 0.0282356 0.0837352 0.0000000
Halobacterota 0.0000000 0.0117056 0.0000000 0.0000000 0.2384124 0.0000000 0.0000000 0.0000000 0.1831961
MBNT15 0.0000000 0.0022513 0.0000000 0.0000000 0.0048382 0.0000000 0.0155702 0.0071637 0.1941490
Methylomirabilota 0.0000000 0.0025071 0.0000000 0.0000000 0.0023247 0.0000000 0.0000000 0.0000000 0.2127868
Micrarchaeota 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0994859 0.0000000
Myxococcota 0.4937742 0.0054566 0.2876963 0.1600681 0.0362471 0.0000000 0.7378757 0.3802688 0.2430023
Nanoarchaeota 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0474592 0.2468601 0.0000000
Nitrospirota 0.0000000 0.0007245 0.0151905 0.0008819 0.0291142 0.0000000 0.1618656 0.0879042 0.2999831
Patescibacteria 0.1454178 4.2224626 0.0668867 0.0509309 0.9102997 0.0000000 0.8268458 5.3825370 0.0328456
Planctomycetota 0.0202156 0.0071004 0.1018539 0.1779815 0.0492763 0.0000000 2.0815838 2.7607155 0.0472545
Proteobacteria 43.6153491 29.2065092 33.8088293 32.9217412 31.6974166 32.3543919 33.9260655 35.3404011 28.3213780
SAR324_clade(Marine_group_B) 0.0000000 0.0019773 0.0001579 0.0000000 0.0147488 0.0000000 0.2752684 0.0287850 0.1035382
Spirochaetota 0.0000000 15.7352036 0.0005829 0.0017755 0.0737440 0.0000000 0.0009455 0.0000000 0.0146177
Sva0485 0.0000000 0.0037766 0.0000000 0.0000000 0.0213429 0.0000000 0.0000000 0.0000000 0.1319252
Synergistota 0.0000000 0.7259916 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Thermoplasmatota 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0458397 0.0000000
unknown_Bacteria 0.2316072 0.0000000 0.3351585 0.1064029 0.0000000 0.0000000 0.7156768 0.2690874 0.0000000
Verrucomicrobiota 0.0032579 0.0917048 0.0730865 0.1119309 0.5582469 0.0000000 0.9771296 2.5838836 1.0127348
WPS-2 0.0131396 0.0000000 0.1825020 0.0436226 0.0040847 0.0000000 0.5944930 0.0177574 0.0000000

3.3.2 Class

  • Make stacked bar plots showing the proportion of reads per class per sample
  • Only classes that comprised a mean of at least 0.5% of all reads (after standardizing read depths) are displayed
  • Note that for Cosley cloaca samples, the two clear groupings were driven by the 4 captive adults compared to the 5 captive juveniles
  • Note that the 5 outliers in the Cosely water samples were from the tap water (the remainder were tub samples)
class.reads <- as.data.frame(otu_table(phylo.class)) #extract proportion of reads per class per sample

class.reads$names <- rownames(class.reads)#make column of row reads

class.taxa <-  as.data.frame(tax_table(phylo.class))#extract taxonomic information
class.taxa <- cbind.data.frame(class = class.taxa$Class, names = rownames(class.taxa))

#merge data
class <- merge(class.taxa,class.reads,by="names",all.x=T,all.y=T)

class <- class[,-1]

#extract and set aside the sum of the proportions of reads
class.sum.of.reads <- cbind.data.frame(class$class,rowSums(class[,2:ncol(class)]))
#divide by sum of sums (same as number of samples) to get a proportion of reads per class based on a standardized read depth per sample
class.sum.of.reads[,2] <- class.sum.of.reads[,2]/sum(class.sum.of.reads[,2])
colnames(class.sum.of.reads) <- c("class","proportion.of.reads")

#ggplot(class.sum.of.reads,aes(x=proportion.of.reads))+
#  geom_histogram(boundary=0,binwidth=0.005)

#nrow(class.sum.of.reads[class.sum.of.reads$proportion.of.reads > 0.005,])

class <- melt(class,id.vars="class") #convert to long

colnames(class)[2:3] <- c("sample.ID","proportion.of.reads")

class <- merge(meta,class,by="sample.ID",all.x=F,all.y=T) #merge with metadata

#subset to class that comprise >= 0.5% of all reads
class.5 <- class[which(class$class %in% class.sum.of.reads[class.sum.of.reads$proportion.of.reads >= 0.005,"class"] ),]

#subset to class that comprise < 0.5% of all reads
class.not5 <- class[which(class$class %in% class.sum.of.reads[class.sum.of.reads$proportion.of.reads < 0.005,"class"] ),]

#calcualte the total proportion comprised by class < 0.5% of reads, compress to a single line with that label and proportion and combine back with the class.5 object so that there is now an other entry and sums per smaple are once again 1
class.not5.list <- vector("list",length(unique(class.not5$sample.ID)))
for(i in 1:length(unique(class.not5$sample.ID))){
  samp.i <- class.not5[class.not5$sample.ID == unique(class.not5$sample.ID)[i],]
  res.i <- samp.i[1,]
  res.i$proportion.of.reads <- sum(samp.i$proportion.of.reads)
  res.i$class <- "Other (<0.5%)"
  class.not5.list[[i]] <- res.i}
class.5 <- rbind.data.frame(class.5,do.call("rbind.data.frame",class.not5.list))

class.5$sample.type <- factor(class.5$sample.type,levels=c("water","plastron","cloaca"))

#make a plot based on class that comprise >= 0.5% of reads
ggplot(class.5 ,aes(x=sample.ID,y=proportion.of.reads,fill=class))+
  geom_bar(position="stack", stat="identity",color="black",size=.01,width=1)+
  scale_fill_manual(values=c(rgb(0,0,.4313725),Blue2DarkRed12Steps,rgb(.3882353,0.003921569,0.003921569)))+
  facet_wrap(~sample.type+location.general,scales="free")+
  scale_y_continuous(limits=c(0,1),expand=c(0,0))+
  theme(axis.text.x=element_blank())

  • Make table with the mean per class for all classes
#calculate means per sample type
class.sum <- class %>% 
  group_by(class,type.location.gen.water.type) %>% 
  dplyr::summarize(mean=mean(proportion.of.reads)*100)

kable.wrap(dcast(class.sum,class~type.location.gen.water.type,value.var="mean"),"Mean percent of reads per sample type at the class level")
Mean percent of reads per sample type at the class level
class cloaca Cosley cloaca Wild plastron Cosley plastron Shedd plastron Wild water Cosley tap water Cosley tub water Shedd tub water Wild marsh
ABY1 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1566940 0.1836471 0.0000000
Acidimicrobiia 0.4454980 0.0003358 1.1862741 0.1584714 0.0803015 0.0000000 0.0370491 0.0635431 0.0000000
Acidobacteriae 0.0000000 0.0000000 0.0000000 0.0003784 0.0000000 0.0000000 0.0000000 0.0693976 0.0000000
Actinobacteria 2.2812327 2.1458587 2.5941914 2.7123878 4.5432049 59.5997896 13.0033486 1.1790703 48.5290724
Alphaproteobacteria 5.2765395 0.7374841 20.8269807 19.4800398 13.6100849 32.0874843 13.5992552 13.5047075 3.2207856
Aminicenantia 0.0000000 0.0043588 0.0000000 0.0000000 0.0783618 0.0000000 0.0000000 0.0000000 0.0794178
Anaerolineae 0.0108738 1.0533244 0.0061875 0.0033598 0.0947681 0.0000000 0.2706901 0.1162095 0.5704308
Babeliae 0.0000000 0.0000000 0.0002361 0.0006573 0.0000000 1.6748614 0.2946243 0.3594214 0.0000000
Bacilli 0.3124523 0.5122893 0.0203203 0.0134740 0.9750415 0.0000000 2.1028210 0.7255534 0.5049120
bacteriap25 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2430023
Bacteroidia 37.1584070 34.2109558 9.1486751 23.7849001 31.5477249 0.0169347 20.8955729 22.8870966 10.0340861
Bathyarchaeia 0.0000000 0.0067977 0.0000000 0.0000000 0.0527163 0.0000000 0.0000000 0.0000000 0.3471129
Bdellovibrionia 0.4600345 0.0486653 0.0922967 0.0241057 0.0042343 0.0000000 0.0800160 0.1344057 0.0000000
Berkelbacteria 0.0000000 0.0000000 0.0000000 0.0006929 0.0000000 0.0000000 0.0000000 0.2312316 0.0000000
Blastocatellia 0.6085147 0.0193159 16.7365826 13.7924087 0.6097093 0.0383891 1.5235519 0.7976880 0.0210084
BRH-c20a 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0479654 0.0133568 0.0000000
Campylobacteria 0.0000000 0.2585087 0.0000318 0.0000000 0.1257607 0.0000000 0.0000000 0.0000000 1.7556828
Chlamydiae 0.0000000 0.0000000 0.0004254 0.0009499 0.0000000 0.0000000 0.2266252 0.3559077 0.0000000
Chloroflexia 4.9911852 0.0832590 3.4222023 1.8346672 0.0320733 0.0000000 0.3993456 1.0437405 0.0065651
Cloacimonadia 0.0000000 0.0048836 0.0000000 0.0000000 0.0650809 0.0000000 0.0000000 0.0000000 0.1717663
Clostridia 4.7136309 6.9811739 0.5321705 1.3053413 0.5438105 0.0000000 14.4213107 5.0062308 2.0660479
Cyanobacteriia 0.0092461 0.0081772 0.1030382 2.9875562 0.1398116 0.0000000 1.0847513 16.9292046 0.0000000
Deinococci 2.9082800 0.7082070 30.1980278 18.7938453 20.9592881 0.1578330 2.3072892 1.4714044 0.2196730
Desulfobaccia 0.0000000 0.0000000 0.0000000 0.0000000 0.0181576 0.0000000 0.0000000 0.0000000 0.1738993
Desulfobacteria 0.0000000 0.0014705 0.0000000 0.0000000 0.1075577 0.0000000 0.0000000 0.0000000 0.2682852
Desulfobulbia 0.0000000 0.1166761 0.0002528 0.0000000 1.1646248 0.0000000 0.0000000 0.0000000 0.1274915
Desulfomonilia 0.0000000 0.0000000 0.0000000 0.0000000 0.0213466 0.0000000 0.0000000 0.0000000 0.0982951
Desulfovibrionia 0.0270773 0.1860315 0.0000000 0.0000000 0.1810676 0.0000000 0.0088302 0.0000000 0.0000000
Desulfuromonadia 0.0000000 0.3314621 0.0000000 0.0000000 0.0740370 0.0000000 0.0000000 0.0000000 0.0000000
Fimbriimonadia 0.0000000 0.0012044 0.0087748 0.0050078 0.0012449 0.0000000 0.1178804 0.1091083 0.0000000
Fusobacteriia 0.7714508 0.2204275 0.0000000 0.0000000 0.0041961 0.0000000 0.0000000 0.0208755 0.2287156
Gammaproteobacteria 38.3388096 28.4690251 12.9818486 13.4417015 18.0873317 0.2669075 20.3268103 21.8356937 25.1005923
Gemmatimonadetes 0.0000000 0.0010651 0.0000000 0.0004080 0.0028593 0.0000000 0.0282356 0.0837352 0.0000000
Gracilibacteria 0.1454178 3.8732000 0.0513593 0.0499750 0.7820152 0.0000000 0.4566295 2.9416458 0.0328456
Holophagae 0.0000000 0.3993626 0.0000000 0.0000000 3.2325762 0.0000000 0.0000000 0.0000000 0.0618684
Ignavibacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.2105318 0.0000000 0.0000000 0.0000000 0.1221770
Kapabacteria 0.0000000 2.1859877 0.0000000 0.0046166 0.0526796 0.0000000 0.0414439 0.4091770 0.0000000
KD4-96 0.0000000 0.0000000 0.0000000 0.0000000 0.0171638 0.0000000 0.0000000 0.0000000 0.1677384
Leptospirae 0.0000000 9.4942244 0.0005445 0.0012169 0.0000000 0.0000000 0.0003889 0.0000000 0.0000000
Lineage_IIb 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0531699 0.0000000
MBNT15 0.0000000 0.0022513 0.0000000 0.0000000 0.0048382 0.0000000 0.0155702 0.0071637 0.1941490
Methanobacteria 0.0000000 0.0286463 0.0000000 0.0000000 0.4889987 0.0000000 0.0000000 0.0000000 1.5456292
Methanosarcinia 0.0000000 0.0117056 0.0000000 0.0000000 0.2384124 0.0000000 0.0000000 0.0000000 0.1831961
Methylomirabilia 0.0000000 0.0025071 0.0000000 0.0000000 0.0023247 0.0000000 0.0000000 0.0000000 0.2127868
Micrarchaeia 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0994859 0.0000000
MVP-15 0.0000000 0.0208737 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Myxococcia 0.0000000 0.0000000 0.0000000 0.0000000 0.0330571 0.0000000 0.0208223 0.0006749 0.0000000
Nanoarchaeia 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0474592 0.2468601 0.0000000
Negativicutes 0.0082963 0.3005185 0.0001594 0.0000000 0.2320436 0.0000000 1.5557745 0.2338114 0.0971000
Nitrososphaeria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1263982 0.1495346 0.2359809
Nitrospiria 0.0000000 0.0000000 0.0151905 0.0008819 0.0000000 0.0000000 0.1618656 0.0879042 0.0000000
Oligoflexia 0.0000000 0.0000000 0.0030895 0.0049613 0.0018205 0.0000000 0.2226290 0.2498858 0.0000000
OM190 0.0000000 0.0000000 0.0000000 0.0006224 0.0000000 0.0000000 0.0000000 0.0576381 0.0000000
Omnitrophia 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0165602 0.0000000 0.0000000
Parcubacteria 0.0000000 0.3492627 0.0000000 0.0002629 0.0561247 0.0000000 0.2135224 1.9682745 0.0000000
Phycisphaerae 0.0000000 0.0000000 0.0000000 0.0069622 0.0000000 0.0000000 0.0014890 0.2074554 0.0000000
Pla3_lineage 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0211726 0.0000000 0.0000000
Planctomycetes 0.0202156 0.0071004 0.1018539 0.1703969 0.0492763 0.0000000 2.0589222 2.4956220 0.0472545
Polyangia 0.4937742 0.0054566 0.2876963 0.1600681 0.0031900 0.0000000 0.7170533 0.3795939 0.0000000
Rhodothermia 0.0293246 0.0000000 0.4552912 0.8360849 0.0228813 0.0056695 0.0063011 0.0167480 0.0000000
Saccharimonadia 0.0000000 0.0000000 0.0155274 0.0000000 0.0721598 0.0000000 0.0000000 0.0577379 0.0000000
SAR324_clade(Marine_group_B) 0.0000000 0.0019773 0.0001579 0.0000000 0.0147488 0.0000000 0.2752684 0.0287850 0.1035382
Sericytochromatia 0.0303888 0.0331171 0.1286577 0.0372942 0.0005178 0.0000000 0.0066040 0.0344221 0.0000000
Spirochaetia 0.0000000 6.2201055 0.0000384 0.0005586 0.0737440 0.0000000 0.0005566 0.0000000 0.0146177
Sva0485 0.0000000 0.0037766 0.0000000 0.0000000 0.0213429 0.0000000 0.0000000 0.0000000 0.1319252
Synergistia 0.0000000 0.7259916 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Syntrophia 0.0000000 0.0241561 0.0000000 0.0000000 0.1865051 0.0000000 0.0000000 0.0000000 0.9354610
Syntrophorhabdia 0.0000000 0.0115684 0.0000000 0.0000000 0.0910626 0.0000000 0.0000000 0.0000000 0.1700264
Thermoanaerobaculia 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0812088 0.0000000
Thermodesulfovibrionia 0.0000000 0.0007245 0.0000000 0.0000000 0.0291142 0.0000000 0.0000000 0.0000000 0.2999831
Thermoleophilia 0.5313203 0.0023069 0.4331255 0.1023038 0.0658681 0.0000000 0.1292904 0.0628305 0.2067296
Thermoplasmata 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0458397 0.0000000
unknown_Bacteria 0.2316072 0.0000000 0.3351585 0.1064029 0.0000000 0.0000000 0.7156768 0.2690874 0.0000000
unknown_Desulfobacterota 0.0000000 0.0000000 0.0000000 0.0000000 0.0018453 0.0000000 0.0215818 0.0231519 0.0000000
unknown_Firmicutes 0.0000000 0.0633520 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Vampirivibrionia 0.0000000 0.0183893 0.0000594 0.0205542 0.0430441 6.1521309 0.7104248 0.2930866 0.2637289
Verrucomicrobiae 0.0032579 0.0917048 0.0726611 0.1109810 0.5582469 0.0000000 0.7339443 2.2279760 1.0127348
Vicinamibacteria 0.1800251 0.0107756 0.0584109 0.0018806 0.2853859 0.0000000 0.1954911 0.1322425 0.1936861
WPS-2 0.0131396 0.0000000 0.1825020 0.0436226 0.0040847 0.0000000 0.5944930 0.0177574 0.0000000

3.3.3 Order

  • Make stacked bar plots showing the proportion of reads per order per sample
  • Only orders that comprised a mean of at least 2% of all reads (after standardizing read depths) are displayed
  • Note that for Cosley cloaca samples, the two clear groupings were driven by the 4 captive adults compared to the 5 captive juveniles
  • Note that the 5 outliers in the Cosely water samples were from the tap water (the remainder were tub samples)
order.reads <- as.data.frame(otu_table(phylo.ord)) #extract proportion of reads per order per sample

order.reads$names <- rownames(order.reads)#make column of row reads

order.taxa <-  as.data.frame(tax_table(phylo.ord))#extract taxonomic information
order.taxa <- cbind.data.frame(order = order.taxa$Order, names = rownames(order.taxa))

#merge data
order <- merge(order.taxa,order.reads,by="names",all.x=T,all.y=T)

order <- order[,-1]

#extract and set aside the sum of the proportions of reads
order.sum.of.reads <- cbind.data.frame(order$order,rowSums(order[,2:ncol(order)]))
#divide by sum of sums (same as number of samples) to get a proportion of reads per order based on a standardized read depth per sample
order.sum.of.reads[,2] <- order.sum.of.reads[,2]/sum(order.sum.of.reads[,2])
colnames(order.sum.of.reads) <- c("order","proportion.of.reads")

#ggplot(order.sum.of.reads,aes(x=proportion.of.reads))+
#  geom_histogram(boundary=0,binwidth=0.01)

#nrow(order.sum.of.reads[order.sum.of.reads$proportion.of.reads > 0.02,])

order <- melt(order,id.vars="order") #convert to long

colnames(order)[2:3] <- c("sample.ID","proportion.of.reads")

order <- merge(meta,order,by="sample.ID",all.x=F,all.y=T) #merge with metadata

#subset to order that comprise >= 02% of all reads
order1 <- order[which(order$order %in% order.sum.of.reads[order.sum.of.reads$proportion.of.reads >= 0.02,"order"] ),]

#subset to order that comprise < 02% of all reads
order.not1 <- order[which(order$order %in% order.sum.of.reads[order.sum.of.reads$proportion.of.reads < 0.02,"order"] ),]

#calcualte the total proportion comprised by order < 01% of reads, compress to a single line with that label and proportion and combine back with the order1 object so that there is now an other entry and sums per smaple are once again 1
order.not1.list <- vector("list",length(unique(order.not1$sample.ID)))
for(i in 1:length(unique(order.not1$sample.ID))){
  samp.i <- order.not1[order.not1$sample.ID == unique(order.not1$sample.ID)[i],]
  res.i <- samp.i[1,]
  res.i$proportion.of.reads <- sum(samp.i$proportion.of.reads)
  res.i$order <- "Other (<2%)"
  order.not1.list[[i]] <- res.i}
order1 <- rbind.data.frame(order1,do.call("rbind.data.frame",order.not1.list))

order1$sample.type <- factor(order1$sample.type,levels=c("water","plastron","cloaca"))

#make a plot based on order that comprise >= 01% of reads
ggplot(order1 ,aes(x=sample.ID,y=proportion.of.reads,fill=order))+
  geom_bar(position="stack", stat="identity",color="black",size=.01,width=1)+
  scale_fill_manual(values=c(rgb(0,0,.4313725),Blue2DarkRed12Steps,rgb(.3882353,0.003921569,0.003921569)))+
  facet_wrap(~sample.type+location.general,scales="free")+
  scale_y_continuous(limits=c(0,1),expand=c(0,0))+
  theme(axis.text.x=element_blank())

  • Make table with the mean per order for all orders
#calculate means per sample type
order.sum <- order %>% 
  group_by(order,type.location.gen.water.type) %>% 
  dplyr::summarize(mean=mean(proportion.of.reads)*100)

kable.wrap(dcast(order.sum,order~type.location.gen.water.type,value.var="mean"),"Mean percent of reads per sample type at the orders level")
Mean percent of reads per sample type at the orders level
order cloaca Cosley cloaca Wild plastron Cosley plastron Shedd plastron Wild water Cosley tap water Cosley tub water Shedd tub water Wild marsh
0319-6G20 0.0000000 0.0000000 0.0002269 0.0000000 0.0000000 0.0000000 0.1385935 0.0683280 0.0000000
Absconditabacteriales_(SR1) 0.0000000 2.2581712 0.0000000 0.0052675 0.2884869 0.0000000 0.0000000 0.2068077 0.0328456
Acetobacterales 0.0000000 0.0000000 0.0000000 0.0281029 0.1221243 0.0000000 0.0007153 0.2604641 0.0000000
Acidaminococcales 0.0000000 0.1964710 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Alicyclobacillales 0.0000000 0.0000000 0.0000000 0.0000000 0.0838145 0.0000000 0.0000000 0.0040868 0.0339734
Aminicenantales 0.0000000 0.0043588 0.0000000 0.0000000 0.0783618 0.0000000 0.0000000 0.0000000 0.0794178
Anaerolineales 0.0000000 1.0533244 0.0000000 0.0000000 0.0932642 0.0000000 0.0000000 0.0000000 0.5704308
Ardenticatenales 0.0000000 0.0000000 0.0003831 0.0000000 0.0000000 0.0000000 0.0078406 0.0150348 0.0000000
Babeliales 0.0000000 0.0000000 0.0002361 0.0006573 0.0000000 1.6748614 0.2946243 0.3594214 0.0000000
Bacillales 0.0000000 0.0070082 0.0122648 0.0039814 0.0489342 0.0000000 0.6704348 0.5444316 0.1970576
bacteriap25 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2430023
Bacteriovoracales 0.0725052 0.0000000 0.0017101 0.0009530 0.0000000 0.0000000 0.0094841 0.0531371 0.0000000
Bacteroidales 3.2919301 10.5657489 0.0288480 0.0365466 0.9946360 0.0028347 14.1040509 8.1155322 0.6578766
Bathyarchaeia 0.0000000 0.0067977 0.0000000 0.0000000 0.0527163 0.0000000 0.0000000 0.0000000 0.3471129
Bdellovibrionales 0.3875293 0.0486653 0.0905867 0.0231527 0.0042343 0.0000000 0.0705319 0.0812686 0.0000000
Berkelbacteria 0.0000000 0.0000000 0.0000000 0.0006929 0.0000000 0.0000000 0.0000000 0.2312316 0.0000000
Blastocatellales 0.6085147 0.0193159 16.4967949 11.5450664 0.6034654 0.0383891 0.5971925 0.6648019 0.0210084
Blfdi19 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.6722183 0.0510043 0.0000000
Bradymonadales 0.0000000 0.3202899 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
BRH-c20a 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0479654 0.0133568 0.0000000
Bryobacterales 0.0000000 0.0000000 0.0000000 0.0003784 0.0000000 0.0000000 0.0000000 0.0693976 0.0000000
Burkholderiales 29.2493339 22.6851496 5.7223221 6.5294180 7.5298509 0.2527339 7.8772723 10.4309509 19.1714721
Caedibacterales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0207569 0.0866388 0.0000000
Caenarcaniphilales 0.0000000 0.0000000 0.0000000 0.0201250 0.0000000 0.0000000 0.0000000 0.1923761 0.0000000
Caldilineales 0.0009869 0.0000000 0.0005954 0.0004241 0.0000000 0.0000000 0.2461555 0.0504967 0.0000000
Campylobacterales 0.0000000 0.2585087 0.0000318 0.0000000 0.1257607 0.0000000 0.0000000 0.0000000 1.7556828
Candidatus_Abawacabacteria 0.0000000 0.0000000 0.0000000 0.0002985 0.0000000 0.0000000 0.0229288 0.0572008 0.0000000
Candidatus_Campbellbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0246859 0.0000000 0.0000000
Candidatus_Jorgensenbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1813737 0.0000000
Candidatus_Lloydbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0947904 0.0000000
Candidatus_Magasanikbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1566940 0.1291288 0.0000000
Candidatus_Nomurabacteria 0.0000000 0.3492627 0.0000000 0.0000000 0.0047440 0.0000000 0.1078504 0.2857165 0.0000000
Candidatus_Peregrinibacteria 0.0000000 0.0000000 0.0000000 0.0057844 0.0000000 0.0000000 0.0000000 0.0842727 0.0000000
Candidatus_Peribacteria 0.0000000 0.0000000 0.0000000 0.0002629 0.0000000 0.0000000 0.2894070 2.3178356 0.0000000
Candidatus_Ryanbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0876657 0.0000000
Candidatus_Uhrbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0545183 0.0000000
Candidatus_Zambryskibacteria 0.0000000 0.0000000 0.0000000 0.0002629 0.0000000 0.0000000 0.0000000 0.3310859 0.0000000
Caulobacterales 0.0129408 0.0019983 0.0297526 0.2930047 0.0735778 3.5750666 0.5649148 0.1333327 0.0000000
CCM19a 0.0000000 0.0000000 0.0002278 0.0242693 0.0000000 0.0000000 0.0801721 0.4169659 0.0000000
Chitinophagales 5.6744226 5.0930258 2.3357949 4.0307015 2.1674939 0.0000000 2.7840496 7.3192425 0.5057886
Chlamydiales 0.0000000 0.0000000 0.0004254 0.0009499 0.0000000 0.0000000 0.2266252 0.3559077 0.0000000
Chloroflexales 4.9882246 0.0832590 3.3913196 1.8342381 0.0320733 0.0000000 0.2911365 0.9787947 0.0065651
Christensenellales 0.0030820 0.5963177 0.0000000 0.0000000 0.0886512 0.0000000 0.3110221 0.0339625 0.2188009
Chromatiales 0.0000000 0.0088611 0.0000000 0.0000000 0.1721400 0.0000000 0.0000000 0.0000000 0.3951791
Chthoniobacterales 0.0000000 0.0000000 0.0000950 0.0075177 0.0000000 0.0000000 0.2958975 0.4311386 0.0000000
Cloacimonadales 0.0000000 0.0048836 0.0000000 0.0000000 0.0650809 0.0000000 0.0000000 0.0000000 0.1717663
Clostridia_vadinBB60_group 0.0000000 0.0008245 0.0000000 0.0000000 0.0000000 0.0000000 0.1108243 0.0000000 0.0000000
Clostridiales 0.3121260 0.1123946 0.3004981 0.8400839 0.0883219 0.0000000 3.1739145 1.8670469 1.0851645
Competibacterales 0.0000000 0.0000000 0.0000000 0.0000000 0.1667369 0.0000000 0.0000000 0.0000000 0.0000000
Corynebacteriales 1.0824183 0.4629737 0.4475076 0.0639382 0.0751503 59.5997896 5.4117597 0.6062980 1.7619205
Coxiellales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0546730 0.0000000 0.0000000
Cyanobacteriales 0.0083922 0.0081772 0.0953505 2.7322673 0.1375853 0.0000000 0.8055766 12.3303260 0.0000000
Cytophagales 0.1178218 0.0826103 0.6068375 0.7890047 0.1735284 0.0015971 0.5195085 2.2645678 0.4807533
Deinococcales 2.9082800 0.7082070 30.1978141 18.7822661 20.9592881 0.1578330 2.2375296 1.3444455 0.2196730
Desulfatiglandales 0.0000000 0.0014705 0.0000000 0.0000000 0.0110676 0.0000000 0.0000000 0.0000000 0.1132677
Desulfobaccales 0.0000000 0.0000000 0.0000000 0.0000000 0.0181576 0.0000000 0.0000000 0.0000000 0.1738993
Desulfobacterales 0.0000000 0.0000000 0.0000000 0.0000000 0.0964901 0.0000000 0.0000000 0.0000000 0.1550175
Desulfobulbales 0.0000000 0.1166761 0.0002528 0.0000000 1.1646248 0.0000000 0.0000000 0.0000000 0.1274915
Desulfomonilales 0.0000000 0.0000000 0.0000000 0.0000000 0.0213466 0.0000000 0.0000000 0.0000000 0.0982951
Desulfovibrionales 0.0270773 0.1860315 0.0000000 0.0000000 0.1810676 0.0000000 0.0088302 0.0000000 0.0000000
Diplorickettsiales 0.0000000 0.0000000 0.0008040 0.0000000 0.0000000 0.0000000 0.2852700 0.0000000 0.0000000
DS-100 0.0000000 0.0000000 0.0002959 0.0000000 0.0000000 0.0000000 0.0707560 0.0000000 0.0000000
EC3 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0252912 0.0000000 0.0000000
Enterobacterales 2.6720921 0.1954914 0.0167434 0.0726897 0.2251149 0.0000000 2.1275462 5.5034785 0.0343713
Erysipelotrichales 0.2013248 0.3978573 0.0018047 0.0005023 0.0000000 0.0000000 1.2800641 0.1169673 0.0318180
Eubacteriales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0574016 0.0000000 0.0000000
EV818SWSAP88 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0354283 0.0000000
Exiguobacterales 0.0000000 0.0062625 0.0055939 0.0089904 0.1126427 0.0000000 0.0280140 0.0458870 0.0144433
Fimbriimonadales 0.0000000 0.0012044 0.0087748 0.0050078 0.0012449 0.0000000 0.1178804 0.1091083 0.0000000
Flavobacteriales 27.2417076 14.6794775 4.2686531 16.4148291 27.4934266 0.0125028 2.6944581 3.6470298 7.9419877
Frankiales 0.0000000 0.0020288 0.0000000 0.0000000 0.0007576 0.0000000 0.0000000 0.1054743 10.2026125
Fusobacteriales 0.7714508 0.2204275 0.0000000 0.0000000 0.0041961 0.0000000 0.0000000 0.0208755 0.2287156
Gaiellales 0.0107953 0.0023069 0.0079445 0.0003349 0.0658681 0.0000000 0.0642745 0.0329785 0.2067296
Gastranaerophilales 0.0000000 0.0183893 0.0000000 0.0000000 0.0330851 0.0000000 0.0000000 0.0000000 0.2637289
Gemmatales 0.0191130 0.0000000 0.0465360 0.0263999 0.0037445 0.0000000 0.5922160 0.6271697 0.0000000
Gemmatimonadales 0.0000000 0.0010651 0.0000000 0.0004080 0.0028593 0.0000000 0.0282356 0.0837352 0.0000000
Geobacterales 0.0000000 0.0111723 0.0000000 0.0000000 0.0740370 0.0000000 0.0000000 0.0000000 0.0000000
Gracilibacteria 0.1454178 0.8851618 0.0513593 0.0383617 0.2413087 0.0000000 0.1442937 0.2522574 0.0000000
Haliangiales 0.0000000 0.0000000 0.0000000 0.0000000 0.0015561 0.0000000 0.0195029 0.0432916 0.0000000
HglApr721 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0194666 0.0000000 0.0000000
Holophagales 0.0000000 0.3993626 0.0000000 0.0000000 3.2325762 0.0000000 0.0000000 0.0000000 0.0618684
Holosporales 0.0000000 0.0000000 0.0000724 0.0000000 0.0000000 0.0028347 0.0695956 0.0175826 0.0000000
Ignavibacteriales 0.0000000 0.0000000 0.0000000 0.0000000 0.2105318 0.0000000 0.0000000 0.0000000 0.1221770
Isosphaerales 0.0000000 0.0000000 0.0009750 0.0140157 0.0000000 0.0000000 0.0191756 0.4638632 0.0000000
Izemoplasmatales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2276198
JGI_0000069-P22 0.0000000 0.7298671 0.0000000 0.0000000 0.2522196 0.0000000 0.0000000 0.0232716 0.0000000
Kapabacteriales 0.0000000 2.1859877 0.0000000 0.0046166 0.0526796 0.0000000 0.0414439 0.4091770 0.0000000
KD4-96 0.0000000 0.0000000 0.0000000 0.0000000 0.0171638 0.0000000 0.0000000 0.0000000 0.1677384
Kineosporiales 0.0000000 0.3950912 0.0007482 0.0000000 1.9625882 0.0000000 0.0425148 0.0337610 0.0000000
Lachnospirales 0.0730449 0.1808881 0.0180163 0.0287919 0.0000000 0.0000000 5.6932241 1.6542550 0.2023439
Lactobacillales 0.1111275 0.0213544 0.0006568 0.0000000 0.5493013 0.0000000 0.0815992 0.0000000 0.0000000
Legionellales 0.0000000 0.0000000 0.0134219 0.0032813 0.0000000 0.0000000 0.9471159 0.3095548 0.0000000
Leptolyngbyales 0.0008539 0.0000000 0.0076877 0.2228758 0.0022263 0.0000000 0.2791746 3.5876492 0.0000000
Leptospirales 0.0000000 9.4942244 0.0005445 0.0012169 0.0000000 0.0000000 0.0003889 0.0000000 0.0000000
Lineage_IIb 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0531699 0.0000000
Marine_Group_II 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0458397 0.0000000
MBNT15 0.0000000 0.0022513 0.0000000 0.0000000 0.0048382 0.0000000 0.0155702 0.0071637 0.1941490
Methanobacteriales 0.0000000 0.0286463 0.0000000 0.0000000 0.4889987 0.0000000 0.0000000 0.0000000 1.5456292
Methanosarciniales 0.0000000 0.0117056 0.0000000 0.0000000 0.2384124 0.0000000 0.0000000 0.0000000 0.1831961
Methylacidiphilales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0512979 0.1922383 0.0000000
Methylococcales 0.0000000 0.0820920 0.0000000 0.0000000 2.3313950 0.0000000 0.0000000 0.0000000 4.8543457
Micavibrionales 0.1070198 0.0000000 0.0196695 0.0150594 0.0000000 0.0000000 0.2703092 0.0751200 0.0000000
Micrarchaeales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0994859 0.0000000
Micrococcales 0.8376226 1.0295984 1.5622084 1.7924468 2.3437542 0.0000000 7.4918196 0.2532118 36.5645395
Micromonosporales 0.0000000 0.0000000 0.0076881 0.0000000 0.0142964 0.0000000 0.0173400 0.0014999 0.0000000
Microtrichales 0.4454980 0.0003358 1.1862741 0.1584714 0.0803015 0.0000000 0.0370491 0.0635431 0.0000000
mle1-27 0.0000000 0.0000000 0.0015496 0.0052921 0.0000000 0.0000000 0.0068382 0.0055266 0.0000000
Monoglobales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0184207 0.0000000 0.0000000
MVP-15 0.0000000 0.0208737 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Myxococcales 0.0000000 0.0000000 0.0000000 0.0000000 0.0330571 0.0000000 0.0208223 0.0006749 0.0000000
Nannocystales 0.0000000 0.0000000 0.0000000 0.0006224 0.0000000 0.0000000 0.0000000 0.1801095 0.0000000
Nitrosopumilales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1063509 0.1495346 0.0000000
Nitrososphaerales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0200473 0.0000000 0.2359809
Nitrospirales 0.0000000 0.0000000 0.0151905 0.0008819 0.0000000 0.0000000 0.1618656 0.0879042 0.0000000
Obscuribacterales 0.0000000 0.0000000 0.0000000 0.0004292 0.0099590 6.1521309 0.6835429 0.0608278 0.0000000
Oligoflexales 0.0000000 0.0000000 0.0028625 0.0049613 0.0018205 0.0000000 0.0840354 0.1320691 0.0000000
OM190 0.0000000 0.0000000 0.0000000 0.0006224 0.0000000 0.0000000 0.0000000 0.0576381 0.0000000
Omnitrophales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0165602 0.0000000 0.0000000
Oscillospirales 0.0940270 0.3328733 0.0000506 0.0000000 0.0161342 0.0000000 1.8975671 0.5377510 0.0000000
Paracaedibacterales 0.0493037 0.1658376 0.0474597 0.1893244 0.0010356 0.0000000 0.2334239 0.0314796 0.0000000
Parcubacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0513807 0.0000000 0.0000000 0.5707977 0.0000000
Pedosphaerales 0.0000000 0.0077130 0.0000000 0.0491888 0.0692465 0.0000000 0.0851557 0.6246436 0.3149908
Peptococcales 0.0000000 0.0143054 0.0000000 0.0000000 0.0161951 0.0000000 0.0000000 0.0000000 0.0000000
Peptostreptococcales-Tissierellales 4.2302485 5.6858999 0.2136055 0.4364654 0.3031510 0.0000000 3.1040107 0.9132155 0.5597385
Phycisphaerales 0.0000000 0.0000000 0.0000000 0.0069622 0.0000000 0.0000000 0.0014890 0.2074554 0.0000000
Pirellulales 0.0011025 0.0024087 0.0441913 0.0397527 0.0133400 0.0000000 0.7025051 0.3197126 0.0216735
Piscirickettsiales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0032559 0.0421413 0.0000000
Pla3_lineage 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0211726 0.0000000 0.0000000
Planctomycetales 0.0000000 0.0046917 0.0101516 0.0902286 0.0321919 0.0000000 0.7450255 1.0848765 0.0255811
Polyangiales 0.4937742 0.0054566 0.2861466 0.1541536 0.0016339 0.0000000 0.0184939 0.0996619 0.0000000
Propionibacteriales 0.3611918 0.0039022 0.5760390 0.8560027 0.0510479 0.0000000 0.0399146 0.1788253 0.0000000
Pseudanabaenales 0.0000000 0.0000000 0.0000000 0.0324131 0.0000000 0.0000000 0.0000000 0.7513590 0.0000000
Pseudomonadales 0.2264946 2.5946515 0.6783043 1.0265778 6.9300890 0.0066144 3.3400303 1.4767163 0.6074156
Pyrinomonadales 0.0000000 0.0000000 0.0081352 0.0000000 0.0000000 0.0000000 0.7593850 0.0000000 0.0000000
R7C24 0.0000000 0.0000000 0.0000000 0.0000000 0.0354871 0.0000000 0.0000000 0.0000000 0.0120300
RBG-13-54-9 0.0000000 0.0000000 0.0017509 0.0000000 0.0000000 0.0000000 0.0149224 0.0462404 0.0000000
Reyranellales 0.0008806 0.0012044 0.0012250 0.0000000 0.0000000 0.0000000 0.4381934 0.1051382 0.0308646
RF39 0.0000000 0.0776993 0.0000000 0.0000000 0.0000000 0.0000000 0.0427089 0.0141807 0.0000000
Rhizobiales 2.2057382 0.1035884 8.1369964 4.8789521 5.6164883 14.3206690 2.1336001 2.5059009 0.9386668
Rhodobacterales 2.1681392 0.3839446 2.6784933 2.7877374 1.4688965 0.0000000 0.7102896 4.6061811 1.4471438
Rhodospirillales 0.0000000 0.0000000 0.0041376 0.0005789 0.0000000 0.0000000 0.1078559 0.1082137 0.0000000
Rhodothermales 0.0293246 0.0000000 0.4552912 0.8360849 0.0228813 0.0056695 0.0063011 0.0167480 0.0000000
Rickettsiales 0.1573699 0.0018065 0.0182472 0.0006573 0.0033590 0.0203886 1.0283353 0.7443574 0.0091361
Rokubacteriales 0.0000000 0.0025071 0.0000000 0.0000000 0.0023247 0.0000000 0.0000000 0.0000000 0.2127868
Saccharimonadales 0.0000000 0.0000000 0.0155274 0.0000000 0.0721598 0.0000000 0.0000000 0.0577379 0.0000000
Salinisphaerales 0.0069442 0.0000000 0.0208343 0.1901986 0.0000000 0.0000000 0.1169375 0.1657051 0.0000000
SAR324_clade(Marine_group_B) 0.0000000 0.0019773 0.0001579 0.0000000 0.0147488 0.0000000 0.2752684 0.0287850 0.1035382
SBR1031 0.0098869 0.0000000 0.0034581 0.0029357 0.0015039 0.0000000 0.0017717 0.0044377 0.0000000
SepB-3 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2598703 0.0000000
Sericytochromatia 0.0303888 0.0331171 0.1286577 0.0372942 0.0005178 0.0000000 0.0066040 0.0344221 0.0000000
Silvanigrellales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0494887 0.0000000
Solirubrobacterales 0.5205250 0.0000000 0.4251810 0.1019689 0.0000000 0.0000000 0.0650159 0.0298519 0.0000000
Sphingobacteriales 0.8325250 2.9233284 1.9071194 2.4832510 0.7186401 0.0000000 0.6442347 1.4903342 0.4476799
Sphingomonadales 0.5751475 0.0791044 9.8906171 11.2765279 6.2858696 14.1685254 7.7289478 4.5454637 0.7949743
Spirochaetales 0.0000000 6.2201055 0.0000384 0.0005586 0.0737440 0.0000000 0.0005566 0.0000000 0.0146177
Staphylococcales 0.0000000 0.0021076 0.0000000 0.0000000 0.1803487 0.0000000 0.0000000 0.0000000 0.0000000
Subgroup_17 0.0000000 0.0013854 0.0000000 0.0000000 0.0269204 0.0000000 0.0000000 0.0000000 0.0959535
Sva0485 0.0000000 0.0037766 0.0000000 0.0000000 0.0213429 0.0000000 0.0000000 0.0000000 0.1319252
Synergistales 0.0000000 0.7259916 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Syntrophales 0.0000000 0.0241561 0.0000000 0.0000000 0.1865051 0.0000000 0.0000000 0.0000000 0.9354610
Syntrophorhabdales 0.0000000 0.0115684 0.0000000 0.0000000 0.0910626 0.0000000 0.0000000 0.0000000 0.1700264
Thermales 0.0000000 0.0000000 0.0002137 0.0115792 0.0000000 0.0000000 0.0697596 0.1269589 0.0000000
Thermoanaerobaculales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0812088 0.0000000
Thermomicrobiales 0.0029606 0.0000000 0.0308827 0.0004292 0.0000000 0.0000000 0.1082091 0.0649458 0.0000000
Thiotrichales 0.0000000 0.0039141 0.0000000 0.0000000 0.1859518 0.0000000 0.0000000 0.0000000 0.0103114
Tistrellales 0.0000000 0.0000000 0.0003101 0.0039243 0.0000000 0.0000000 0.1034781 0.2555183 0.0000000
unknown_Actinobacteria 0.0000000 0.2522642 0.0000000 0.0000000 0.0956102 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Alphaproteobacteria 0.0000000 0.0000000 0.0000000 0.0061704 0.0387338 0.0000000 0.1888394 0.0293164 0.0000000
unknown_Bacteria 0.2316072 0.0000000 0.3351585 0.1064029 0.0000000 0.0000000 0.7156768 0.2690874 0.0000000
unknown_Bacteroidia 0.0000000 0.8667650 0.0014223 0.0305671 0.0000000 0.0000000 0.1492711 0.0503901 0.0000000
unknown_Blastocatellia 0.0000000 0.0000000 0.2313565 2.2473424 0.0062439 0.0000000 0.0962184 0.1328861 0.0000000
unknown_Clostridia 0.0011025 0.0576704 0.0000000 0.0000000 0.0313570 0.0000000 0.0549256 0.0000000 0.0000000
unknown_Desulfobacterota 0.0000000 0.0000000 0.0000000 0.0000000 0.0018453 0.0000000 0.0215818 0.0231519 0.0000000
unknown_Firmicutes 0.0000000 0.0633520 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Gammaproteobacteria 0.0164212 0.0027098 0.0329385 0.0288688 0.0025601 0.0000000 4.0948487 0.8334230 0.0000000
unknown_Parcubacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0809860 0.4168446 0.0000000
unknown_Thermodesulfovibrionia 0.0000000 0.0007245 0.0000000 0.0000000 0.0291142 0.0000000 0.0000000 0.0000000 0.2999831
Vampirovibrionales 0.0000000 0.0000000 0.0000594 0.0000000 0.0000000 0.0000000 0.0268819 0.0398828 0.0000000
Veillonellales-Selenomonadales 0.0082963 0.1040476 0.0001594 0.0000000 0.2320436 0.0000000 1.5557745 0.2338114 0.0971000
Verrucomicrobiales 0.0032579 0.0839917 0.0725661 0.0542745 0.4890005 0.0000000 0.3015931 0.9799555 0.6977440
Vicinamibacterales 0.1800251 0.0093902 0.0584109 0.0018806 0.2584655 0.0000000 0.1954911 0.1322425 0.0977326
Woesearchaeales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0474592 0.2468601 0.0000000
WPS-2 0.0131396 0.0000000 0.1825020 0.0436226 0.0040847 0.0000000 0.5944930 0.0177574 0.0000000
Xanthomonadales 6.1675235 2.8961555 6.4962525 5.5663978 0.5080060 0.0075593 1.3549306 2.6213295 0.0154671

3.3.4 Family

  • Make stacked bar plots showing the proportion of reads per family per sample
  • Only families that comprised a mean of at least 1.5% of all reads (after standardizing read depths) are displayed
  • Note that for Cosley cloaca samples, the two clear groupings were driven by the 4 captive adults compared to the 5 captive juveniles
  • Note that the 5 outliers in the Cosely water samples were from the tap water (the remainder were tub samples)
family.reads <- as.data.frame(otu_table(phylo.fam)) #extract proportion of reads per family per sample

family.reads$names <- rownames(family.reads)#make column of row reads

family.taxa <-  as.data.frame(tax_table(phylo.fam))#extract taxonomic information
family.taxa <- cbind.data.frame(family = family.taxa$Family, names = rownames(family.taxa))

#merge data
family <- merge(family.taxa,family.reads,by="names",all.x=T,all.y=T)

family <- family[,-1]

#extract and set aside the sum of the proportions of reads
family.sum.of.reads <- cbind.data.frame(family$family,rowSums(family[,2:ncol(family)]))
#divide by sum of sums (same as number of samples) to get a proportion of reads per family based on a standardized read depth per sample
family.sum.of.reads[,2] <- family.sum.of.reads[,2]/sum(family.sum.of.reads[,2])
colnames(family.sum.of.reads) <- c("family","proportion.of.reads")

#ggplot(family.sum.of.reads,aes(x=proportion.of.reads))+
#  geom_histogram(boundary=0,binwidth=0.01)

#nrow(family.sum.of.reads[family.sum.of.reads$proportion.of.reads > 0.015,])

family <- melt(family,id.vars="family") #convert to long

colnames(family)[2:3] <- c("sample.ID","proportion.of.reads")

family <- merge(meta,family,by="sample.ID",all.x=F,all.y=T) #merge with metadata

#subset to family that comprise >= 1.5% of all reads
family1 <- family[which(family$family %in% family.sum.of.reads[family.sum.of.reads$proportion.of.reads >= 0.015,"family"] ),]

#subset to family that comprise < 1.5% of all reads
family.not1 <- family[which(family$family %in% family.sum.of.reads[family.sum.of.reads$proportion.of.reads < 0.015,"family"] ),]

#calcualte the total proportion comprised by family < 01% of reads, compress to a single line with that label and proportion and combine back with the family1 object so that there is now an other entry and sums per smaple are once again 1
family.not1.list <- vector("list",length(unique(family.not1$sample.ID)))
for(i in 1:length(unique(family.not1$sample.ID))){
  samp.i <- family.not1[family.not1$sample.ID == unique(family.not1$sample.ID)[i],]
  res.i <- samp.i[1,]
  res.i$proportion.of.reads <- sum(samp.i$proportion.of.reads)
  res.i$family <- "Other (<1.5%)"
  family.not1.list[[i]] <- res.i}
family1 <- rbind.data.frame(family1,do.call("rbind.data.frame",family.not1.list))

family1$sample.type <- factor(family1$sample.type,levels=c("water","plastron","cloaca"))

#make a plot based on family that comprise >= 01% of reads
ggplot(family1 ,aes(x=sample.ID,y=proportion.of.reads,fill=family))+
  geom_bar(position="stack", stat="identity",color="black",size=.01,width=1)+
  scale_fill_manual(values=c(rgb(0,0,.4313725),Blue2DarkRed12Steps,rgb(.3882353,0.003921569,0.003921569)))+
  facet_wrap(~sample.type+location.general,scales="free")+
  scale_y_continuous(limits=c(0,1),expand=c(0,0))+
  theme(axis.text.x=element_blank())

  • Make table with the mean per family for all families
#calculate means per sample type
family.sum <- family %>% 
  group_by(family,type.location.gen.water.type) %>% 
  dplyr::summarize(mean=mean(proportion.of.reads)*100)

kable.wrap(dcast(family.sum,family~type.location.gen.water.type,value.var="mean"),"Mean percent of reads per sample type at the familys level")
Mean percent of reads per sample type at the familys level
family cloaca Cosley cloaca Wild plastron Cosley plastron Shedd plastron Wild water Cosley tap water Cosley tub water Shedd tub water Wild marsh
[Eubacterium]_coprostanoligenes_group 0.0498387 0.0000000 0.0000506 0.0000000 0.0000000 0.0000000 0.2241324 0.0848106 0.0000000
0319-6G20 0.0000000 0.0000000 0.0002269 0.0000000 0.0000000 0.0000000 0.1385935 0.0683280 0.0000000
37-13 0.0000000 0.0000000 0.0000000 0.0022440 0.0000000 0.0000000 0.0000000 0.2696414 0.0000000
67-14 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0176448 0.0022412 0.0000000
A0839 0.0462577 0.0000000 2.1998370 0.5598430 0.0081694 0.0000000 0.0445027 0.1823400 0.0000000
A4b 0.0098869 0.0000000 0.0034581 0.0029357 0.0015039 0.0000000 0.0017717 0.0044377 0.0000000
AB1 0.1560660 0.0000000 0.0083943 0.0006573 0.0000000 0.0000000 0.0191537 0.0339097 0.0000000
Absconditabacteriales_(SR1) 0.0000000 2.2581712 0.0000000 0.0052675 0.2884869 0.0000000 0.0000000 0.2068077 0.0328456
Acetobacteraceae 0.0000000 0.0000000 0.0000000 0.0281029 0.1221243 0.0000000 0.0007153 0.2604641 0.0000000
Acidaminobacteraceae 0.0000000 0.0109766 0.0000000 0.0000000 0.0487003 0.0000000 0.0123718 0.0287330 0.0000000
Acidaminococcaceae 0.0000000 0.1964710 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Aeromonadaceae 0.0195143 0.0513484 0.0061375 0.0068493 0.0110482 0.0000000 0.5166397 0.4288017 0.0343713
Akkermansiaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.6441978 0.0000000
Alcaligenaceae 5.2136049 1.6432960 0.0001382 0.0004194 0.0031069 0.0000000 0.0000000 0.0000000 0.0000000
Alicyclobacillaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0838145 0.0000000 0.0000000 0.0040868 0.0339734
Alteromonadaceae 0.0000000 0.0087315 0.0043011 0.0601245 0.0068178 0.0000000 0.3484398 4.8732191 0.0000000
Aminicenantales 0.0000000 0.0043588 0.0000000 0.0000000 0.0783618 0.0000000 0.0000000 0.0000000 0.0794178
Anaerolineaceae 0.0000000 1.0533244 0.0000000 0.0000000 0.0932642 0.0000000 0.0000000 0.0000000 0.5704308
Anaeromyxobacteraceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0330571 0.0000000 0.0000000 0.0000000 0.0000000
Anaerovoracaceae 0.0022014 0.0959868 0.0000000 0.0000000 0.0736792 0.0000000 0.2204193 0.0620242 0.2233716
Arcobacteraceae 0.0000000 0.0127885 0.0000000 0.0000000 0.0173009 0.0000000 0.0000000 0.0000000 1.1343948
Ardenticatenaceae 0.0000000 0.0000000 0.0003831 0.0000000 0.0000000 0.0000000 0.0078406 0.0150348 0.0000000
Babeliaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0590711 0.0000000 0.0000000
Babeliales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.6748614 0.0206307 0.1266890 0.0000000
Bacillaceae 0.0000000 0.0070082 0.0120477 0.0039814 0.0489342 0.0000000 0.5851548 0.4457478 0.1970576
bacteriap25 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2430023
Bacteriovoracaceae 0.0725052 0.0000000 0.0017101 0.0009530 0.0000000 0.0000000 0.0094841 0.0531371 0.0000000
Bacteroidaceae 2.2664550 0.1617072 0.0017123 0.0007674 0.0000000 0.0000000 2.6314526 4.0346179 0.0000000
Bacteroidetes_vadinHA17 0.0000000 0.0565128 0.0000000 0.0000000 0.6574701 0.0000000 0.0000000 0.0000000 0.5410163
Barnesiellaceae 0.0456265 0.0026061 0.0000000 0.0000000 0.0000000 0.0000000 0.5632887 0.0000000 0.0000000
Bathyarchaeia 0.0000000 0.0067977 0.0000000 0.0000000 0.0527163 0.0000000 0.0000000 0.0000000 0.3471129
Bdellovibrionaceae 0.3875293 0.0486653 0.0905867 0.0231527 0.0042343 0.0000000 0.0705319 0.0812686 0.0000000
Beijerinckiaceae 0.0196398 0.0000000 1.0372273 1.3413174 1.2150635 0.0015971 0.0155187 0.5443109 0.0000000
Berkelbacteria 0.0000000 0.0000000 0.0000000 0.0006929 0.0000000 0.0000000 0.0000000 0.2312316 0.0000000
Blastocatellaceae 0.6085147 0.0193159 16.4967949 11.5450664 0.6034654 0.0383891 0.5971925 0.6648019 0.0210084
Blfdi19 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.6722183 0.0510043 0.0000000
Bradymonadales 0.0000000 0.3202899 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
BRH-c20a 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0479654 0.0133568 0.0000000
Bryobacteraceae 0.0000000 0.0000000 0.0000000 0.0003784 0.0000000 0.0000000 0.0000000 0.0693976 0.0000000
Burkholderiaceae 0.0000000 0.0011001 0.0000513 0.0000000 0.0017798 0.0000000 0.7492797 0.1021227 2.9976058
Butyricicoccaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0669342 0.0036330 0.0000000
Caedibacteraceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0207569 0.0866388 0.0000000
Caenarcaniphilales 0.0000000 0.0000000 0.0000000 0.0201250 0.0000000 0.0000000 0.0000000 0.1923761 0.0000000
Caldilineaceae 0.0009869 0.0000000 0.0005954 0.0004241 0.0000000 0.0000000 0.2461555 0.0504967 0.0000000
Campylobacteraceae 0.0000000 0.0702027 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Candidatus_Abawacabacteria 0.0000000 0.0000000 0.0000000 0.0002985 0.0000000 0.0000000 0.0229288 0.0572008 0.0000000
Candidatus_Campbellbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0246859 0.0000000 0.0000000
Candidatus_Jorgensenbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1813737 0.0000000
Candidatus_Lloydbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0947904 0.0000000
Candidatus_Magasanikbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1566940 0.1291288 0.0000000
Candidatus_Nomurabacteria 0.0000000 0.3492627 0.0000000 0.0000000 0.0047440 0.0000000 0.1078504 0.2857165 0.0000000
Candidatus_Peregrinibacteria 0.0000000 0.0000000 0.0000000 0.0057844 0.0000000 0.0000000 0.0000000 0.0842727 0.0000000
Candidatus_Peribacteria 0.0000000 0.0000000 0.0000000 0.0002629 0.0000000 0.0000000 0.2894070 2.3178356 0.0000000
Candidatus_Ryanbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0876657 0.0000000
Candidatus_Uhrbacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0545183 0.0000000
Candidatus_Zambryskibacteria 0.0000000 0.0000000 0.0000000 0.0002629 0.0000000 0.0000000 0.0000000 0.3310859 0.0000000
Caulobacteraceae 0.0129408 0.0019983 0.0297110 0.2926323 0.0659360 0.0000000 0.0501929 0.0731865 0.0000000
CCM19a 0.0000000 0.0000000 0.0002278 0.0242693 0.0000000 0.0000000 0.0801721 0.4169659 0.0000000
Cellvibrionaceae 0.0000000 0.0000000 0.0000707 0.0022060 0.0000000 0.0000000 0.0285064 0.0013497 0.0000000
CG1-02-32-21 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0359881 0.0000000
Chitinibacteraceae 0.0000000 0.0000000 0.0000000 0.0052452 0.0000000 0.0000000 0.0559692 0.3045272 0.0368505
Chitinimonadaceae 0.0000000 0.0000000 0.0000000 0.0018512 0.0000000 0.0000000 0.0157024 0.2127551 0.0000000
Chitinophagaceae 5.6696327 5.0897566 2.3187990 3.7761562 2.0774446 0.0000000 2.1682901 3.7655214 0.4834472
Chlamydiaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0181584 0.0301773 0.0000000
Chloroflexaceae 0.0000000 0.0815923 0.0000000 0.0000000 0.0186258 0.0000000 0.0000000 0.0000000 0.0000000
Christensenellaceae 0.0030820 0.5963177 0.0000000 0.0000000 0.0886512 0.0000000 0.3110221 0.0339625 0.2188009
Chromatiaceae 0.0000000 0.0088611 0.0000000 0.0000000 0.1721400 0.0000000 0.0000000 0.0000000 0.3951791
Chromobacteriaceae 0.0000000 0.0710032 0.0039249 0.0185427 0.0608394 0.0000000 1.7466553 1.9679090 0.0310627
Cloacimonadaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0255733 0.0000000 0.0000000 0.0000000 0.0385362
Cloacimonadales 0.0000000 0.0048836 0.0000000 0.0000000 0.0395076 0.0000000 0.0000000 0.0000000 0.1332301
Clostridia_vadinBB60_group 0.0000000 0.0008245 0.0000000 0.0000000 0.0000000 0.0000000 0.1108243 0.0000000 0.0000000
Clostridiaceae 0.3121260 0.1123946 0.3004981 0.8400839 0.0883219 0.0000000 3.1739145 1.8670469 1.0851645
Comamonadaceae 7.9539321 6.8192729 5.5221580 6.4146993 6.2868134 0.0960045 4.2687648 6.3919066 8.4216093
Competibacteraceae 0.0000000 0.0000000 0.0000000 0.0000000 0.1667369 0.0000000 0.0000000 0.0000000 0.0000000
Coxiellaceae 0.0000000 0.0000000 0.0000000 0.0016407 0.0000000 0.0000000 0.0273365 0.0391532 0.0000000
Crocinitomicaceae 0.0000000 0.0018299 0.0000000 0.0000000 0.0166282 0.0000000 0.0000000 0.0000000 2.3911001
cvE6 0.0000000 0.0000000 0.0000000 0.0005259 0.0000000 0.0000000 0.0000000 0.2095560 0.0000000
Cyanobacteriaceae 0.0000000 0.0000000 0.0000000 2.2913833 0.0000000 0.0000000 0.0000000 8.4252898 0.0000000
Cyanobacteriales 0.0000000 0.0000000 0.0000000 0.0075364 0.0000000 0.0000000 0.0000000 0.0845715 0.0000000
Cyclobacteriaceae 0.0000000 0.0000000 0.0000000 0.0015021 0.0000000 0.0000000 0.0000000 0.0402825 0.0000000
Cytophagaceae 0.0000000 0.0000000 0.0053244 0.0429564 0.0021785 0.0000000 0.0225798 0.1686270 0.0000000
Deinococcaceae 2.9082800 0.7082070 30.1978141 18.7822661 20.9592881 0.1578330 2.2375296 1.3444455 0.2196730
Dermatophilaceae 0.0000000 0.0078607 0.0679063 0.1244122 1.2120681 0.0000000 0.0000000 0.0000000 0.0000000
Desulfatiglandaceae 0.0000000 0.0014705 0.0000000 0.0000000 0.0110676 0.0000000 0.0000000 0.0000000 0.1132677
Desulfobaccaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0181576 0.0000000 0.0000000 0.0000000 0.1738993
Desulfobulbaceae 0.0000000 0.1104342 0.0000722 0.0000000 0.3638419 0.0000000 0.0000000 0.0000000 0.0338196
Desulfocapsaceae 0.0000000 0.0062418 0.0001806 0.0000000 0.7592550 0.0000000 0.0000000 0.0000000 0.0936718
Desulfomicrobiaceae 0.0000000 0.0137801 0.0000000 0.0000000 0.0858944 0.0000000 0.0000000 0.0000000 0.0000000
Desulfomonilaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0213466 0.0000000 0.0000000 0.0000000 0.0982951
Desulfosarcinaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0964901 0.0000000 0.0000000 0.0000000 0.1550175
Desulfovibrionaceae 0.0270773 0.1705776 0.0000000 0.0000000 0.0662049 0.0000000 0.0088302 0.0000000 0.0000000
Desulfurivibrionaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0415278 0.0000000 0.0000000 0.0000000 0.0000000
Devosiaceae 0.0104425 0.0000000 0.2449887 0.0130801 0.0434604 0.0000000 0.0006589 0.0000000 0.0000000
Dietziaceae 0.6446732 0.4542191 0.0523637 0.0169345 0.0095089 0.0000000 0.0165206 0.0054875 0.0000000
Diplorickettsiaceae 0.0000000 0.0000000 0.0008040 0.0000000 0.0000000 0.0000000 0.2852700 0.0000000 0.0000000
DS-100 0.0000000 0.0000000 0.0002959 0.0000000 0.0000000 0.0000000 0.0707560 0.0000000 0.0000000
Dysgonomonadaceae 0.4931968 3.0062097 0.0000000 0.0000000 0.0429610 0.0000000 0.1008357 0.0601130 0.0292355
EC3 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0252912 0.0000000 0.0000000
Enterobacteriaceae 0.0359004 0.0478763 0.0030909 0.0000000 0.0658970 0.0000000 1.1291560 0.0779960 0.0000000
env.OPS_17 0.2822527 0.3253767 1.1961783 0.1294467 0.0091489 0.0000000 0.3714075 0.2823585 0.0459559
Erysipelatoclostridiaceae 0.0022182 0.0000000 0.0003311 0.0000000 0.0000000 0.0000000 0.7142083 0.0153716 0.0000000
Erysipelotrichaceae 0.1991066 0.3978573 0.0014737 0.0005023 0.0000000 0.0000000 0.5658558 0.1015957 0.0318180
Eubacteriaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0574016 0.0000000 0.0000000
EV818SWSAP88 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0354283 0.0000000
Exiguobacteraceae 0.0000000 0.0062625 0.0055939 0.0089904 0.1126427 0.0000000 0.0280140 0.0458870 0.0144433
Family_XI 0.4488662 1.5227540 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Fimbriimonadaceae 0.0000000 0.0012044 0.0087748 0.0050078 0.0012449 0.0000000 0.1178804 0.1091083 0.0000000
Flavobacteriaceae 5.2259906 3.3634608 0.0996735 0.2293809 0.9772717 0.0000000 1.9966435 2.8345647 5.5338183
Fusibacteraceae 0.0000000 2.3317112 0.0000000 0.0000000 0.0240476 0.0000000 0.0000000 0.0000000 0.0496410
Fusobacteriaceae 0.7714508 0.2204275 0.0000000 0.0000000 0.0041961 0.0000000 0.0000000 0.0208755 0.2287156
Gastranaerophilales 0.0000000 0.0183893 0.0000000 0.0000000 0.0330851 0.0000000 0.0000000 0.0000000 0.2637289
Geminicoccaceae 0.0000000 0.0000000 0.0003101 0.0039243 0.0000000 0.0000000 0.1034781 0.2555183 0.0000000
Gemmataceae 0.0191130 0.0000000 0.0465360 0.0263999 0.0037445 0.0000000 0.5922160 0.6271697 0.0000000
Gemmatimonadaceae 0.0000000 0.0010651 0.0000000 0.0004080 0.0028593 0.0000000 0.0282356 0.0837352 0.0000000
Geobacteraceae 0.0000000 0.0111723 0.0000000 0.0000000 0.0740370 0.0000000 0.0000000 0.0000000 0.0000000
Gloeocapsaceae 0.0000000 0.0000000 0.0000000 0.1225183 0.0000000 0.0000000 0.0000000 0.6329031 0.0000000
Gracilibacteria 0.1454178 0.8851618 0.0513593 0.0383617 0.2413087 0.0000000 0.1442937 0.2522574 0.0000000
Hafniaceae 0.0072782 0.0120435 0.0000000 0.0000000 0.0510585 0.0000000 0.0000000 0.0000000 0.0000000
Haliangiaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0015561 0.0000000 0.0195029 0.0432916 0.0000000
Halieaceae 0.0000000 0.0000000 0.0003351 0.0575649 0.0000000 0.0000000 0.0007094 0.5745681 0.0120300
Halomonadaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0272343 0.0000000 0.0000000
Herpetosiphonaceae 4.9882246 0.0016667 3.3875903 0.0005190 0.0134475 0.0000000 0.0954140 0.0012253 0.0065651
HglApr721 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0194666 0.0000000 0.0000000
Holophagaceae 0.0000000 0.3993626 0.0000000 0.0000000 3.2325762 0.0000000 0.0000000 0.0000000 0.0618684
Holosporaceae 0.0000000 0.0000000 0.0000724 0.0000000 0.0000000 0.0028347 0.0695956 0.0175826 0.0000000
Hungateiclostridiaceae 0.0085228 0.3254034 0.0000000 0.0000000 0.0161342 0.0000000 0.0000000 0.0000000 0.0000000
Hydrogenophilaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0400687 0.0000000 0.0000000 0.0000000 0.1865051
Hymenobacteraceae 0.0000000 0.0000000 0.0271402 0.1994963 0.0000000 0.0000000 0.0268195 0.0555482 0.0000000
Hyphomicrobiaceae 0.0212908 0.0102341 0.1980537 0.2214862 0.1492868 0.5730747 0.4175595 0.1975822 0.0000000
Hyphomonadaceae 0.0000000 0.0000000 0.0000416 0.0003724 0.0076418 3.5750666 0.1680148 0.0601462 0.0000000
Iamiaceae 0.0000000 0.0000000 0.0012608 0.0000000 0.0107775 0.0000000 0.0070263 0.0000000 0.0000000
Ilumatobacteraceae 0.3509521 0.0000000 0.0201545 0.0038299 0.0695240 0.0000000 0.0014323 0.0284811 0.0000000
Intrasporangiaceae 0.0729286 0.5132814 1.1790389 1.6558698 0.9464732 0.0000000 0.1160527 0.2011487 0.0000000
Isosphaeraceae 0.0000000 0.0000000 0.0009750 0.0140157 0.0000000 0.0000000 0.0191756 0.4638632 0.0000000
JG30-KF-CM45 0.0029606 0.0000000 0.0308827 0.0004292 0.0000000 0.0000000 0.1082091 0.0649458 0.0000000
JGI_0000069-P22 0.0000000 0.7298671 0.0000000 0.0000000 0.2522196 0.0000000 0.0000000 0.0232716 0.0000000
Kapabacteriales 0.0000000 2.1859877 0.0000000 0.0046166 0.0526796 0.0000000 0.0414439 0.4091770 0.0000000
KD3-93 0.4431369 0.0027708 0.7105665 2.1776630 0.0000000 0.0000000 0.2183870 0.3417911 0.0000000
KD4-96 0.0000000 0.0000000 0.0000000 0.0000000 0.0171638 0.0000000 0.0000000 0.0000000 0.1677384
Kineosporiaceae 0.0000000 0.3950912 0.0007482 0.0000000 1.9625882 0.0000000 0.0425148 0.0337610 0.0000000
Labraceae 0.0008539 0.0000000 0.0056356 0.0084077 0.0000000 0.0000000 0.0247797 0.1959260 0.0000000
Lachnospiraceae 0.0730449 0.1808881 0.0180163 0.0287919 0.0000000 0.0000000 5.6932241 1.6542550 0.2023439
Legionellaceae 0.0000000 0.0000000 0.0134219 0.0000000 0.0000000 0.0000000 0.9471159 0.2312485 0.0000000
Lentimicrobiaceae 0.0000000 2.5951809 0.0000000 0.0000000 0.7045116 0.0000000 0.0000000 0.0000000 0.3938459
Leptolyngbyaceae 0.0008539 0.0000000 0.0076877 0.2228758 0.0022263 0.0000000 0.2791746 3.5876492 0.0000000
Leptospiraceae 0.0000000 9.4942244 0.0005445 0.0012169 0.0000000 0.0000000 0.0003889 0.0000000 0.0000000
Lineage_IIb 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0531699 0.0000000
Listeriaceae 0.0000000 0.0213544 0.0006568 0.0000000 0.5493013 0.0000000 0.0034469 0.0000000 0.0000000
Marine_Group_II 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0458397 0.0000000
MBNT15 0.0000000 0.0022513 0.0000000 0.0000000 0.0048382 0.0000000 0.0155702 0.0071637 0.1941490
Melioribacteraceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0462099 0.0000000 0.0000000 0.0000000 0.0000000
Methanobacteriaceae 0.0000000 0.0286463 0.0000000 0.0000000 0.4889987 0.0000000 0.0000000 0.0000000 1.5456292
Methanosaetaceae 0.0000000 0.0117056 0.0000000 0.0000000 0.1802523 0.0000000 0.0000000 0.0000000 0.1831961
Methanosarcinaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0581601 0.0000000 0.0000000 0.0000000 0.0000000
Methylacidiphilaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0512979 0.1922383 0.0000000
Methylococcaceae 0.0000000 0.0236223 0.0000000 0.0000000 0.1140752 0.0000000 0.0000000 0.0000000 1.2634884
Methyloligellaceae 0.0000000 0.0031404 0.0001242 0.0000000 0.0090913 0.0000000 0.0172261 0.0000000 0.3441095
Methylomonadaceae 0.0000000 0.0584697 0.0000000 0.0000000 2.2173198 0.0000000 0.0000000 0.0000000 3.5908573
Methylophilaceae 0.0000000 0.0871631 0.0000000 0.0000000 0.0403284 0.0000000 0.0000000 0.0000000 1.7802246
Microbacteriaceae 0.0660927 0.5084563 0.0711935 0.0121648 0.1360200 0.0000000 7.2985329 0.0520631 36.5645395
Microcystaceae 0.0000000 0.0000000 0.0000000 0.0007030 0.0000000 0.0000000 0.0000000 0.0954183 0.0000000
Micromonosporaceae 0.0000000 0.0000000 0.0076881 0.0000000 0.0142964 0.0000000 0.0173400 0.0014999 0.0000000
Microscillaceae 0.0000000 0.0003719 0.0404285 0.0302050 0.0000000 0.0000000 0.1546786 0.0976492 0.0000000
Microtrichaceae 0.0945459 0.0003358 1.1648588 0.1546415 0.0000000 0.0000000 0.0285906 0.0350620 0.0000000
mle1-27 0.0000000 0.0000000 0.0015496 0.0052921 0.0000000 0.0000000 0.0068382 0.0055266 0.0000000
Monoglobaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0184207 0.0000000 0.0000000
Moraxellaceae 0.2247991 2.4987901 0.6697131 0.9525357 5.5645225 0.0066144 0.5617835 0.2488246 0.5917311
Morganellaceae 2.0583581 0.0651113 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
MVP-15 0.0000000 0.0208737 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Mycobacteriaceae 0.1411416 0.0087546 0.3661555 0.0000000 0.0529858 59.5997896 5.3259215 0.2056494 1.7619205
Myxococcaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0208223 0.0006749 0.0000000
Nannocystaceae 0.0000000 0.0000000 0.0000000 0.0006224 0.0000000 0.0000000 0.0000000 0.1801095 0.0000000
Neisseriaceae 13.5517912 2.9687544 0.0003142 0.0062208 0.0000000 0.0000000 0.2616800 0.2464486 0.0000000
Nitrosomonadaceae 0.1177684 0.0000000 0.1927695 0.0000000 0.0000000 0.0000000 0.0445190 0.0000000 0.0000000
Nitrosopumilaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1063509 0.1495346 0.0000000
Nitrososphaeraceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0200473 0.0000000 0.2359809
Nitrospiraceae 0.0000000 0.0000000 0.0151905 0.0008819 0.0000000 0.0000000 0.1618656 0.0879042 0.0000000
Nocardiaceae 0.2966035 0.0000000 0.0255217 0.0470037 0.0126557 0.0000000 0.0693176 0.3951611 0.0000000
Nocardioidaceae 0.3611918 0.0039022 0.5760390 0.8517653 0.0510479 0.0000000 0.0399146 0.0586955 0.0000000
Nostocaceae 0.0031779 0.0049106 0.0090446 0.1499490 0.0572323 0.0000000 0.1719596 1.0140530 0.0000000
NS11-12_marine_group 0.0000000 0.0000000 0.0000516 0.0000000 0.0049795 0.0000000 0.0531710 0.8181828 0.0000000
NS9_marine_group 0.0000000 0.0000000 0.0000000 0.0058897 0.0025601 0.0000000 0.0000000 0.0352927 0.0000000
Obscuribacteraceae 0.0000000 0.0000000 0.0000000 0.0004292 0.0099590 6.1521309 0.6835429 0.0608278 0.0000000
Oligoflexaceae 0.0000000 0.0000000 0.0028625 0.0049613 0.0018205 0.0000000 0.0840354 0.1320691 0.0000000
OM190 0.0000000 0.0000000 0.0000000 0.0006224 0.0000000 0.0000000 0.0000000 0.0576381 0.0000000
Omnitrophaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0165602 0.0000000 0.0000000
Oscillospiraceae 0.0115966 0.0028857 0.0000000 0.0000000 0.0000000 0.0000000 0.9836735 0.2778532 0.0000000
Oscillospirales 0.0011025 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0537056 0.0125033 0.0000000
Oxalobacteraceae 0.0000000 0.0000000 0.0001259 0.0000000 0.0713373 0.0000000 0.4984843 0.0952806 0.0000000
Paludibacteraceae 0.0047808 4.0196316 0.0000000 0.0000000 0.1461925 0.0000000 0.0000000 0.0000000 0.0000000
Paracaedibacteraceae 0.0493037 0.1658376 0.0474597 0.1893244 0.0010356 0.0000000 0.2334239 0.0314796 0.0000000
Parachlamydiaceae 0.0000000 0.0000000 0.0004254 0.0004241 0.0000000 0.0000000 0.2012202 0.0497449 0.0000000
Parcubacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0513807 0.0000000 0.0000000 0.5707977 0.0000000
Parvularculaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.3467072 0.0000000 0.0000000
Pasteurellaceae 0.5352466 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0050995 0.0000000 0.0000000
Pedosphaeraceae 0.0000000 0.0077130 0.0000000 0.0491888 0.0692465 0.0000000 0.0851557 0.6246436 0.3149908
Peptococcaceae 0.0000000 0.0143054 0.0000000 0.0000000 0.0161951 0.0000000 0.0000000 0.0000000 0.0000000
Peptostreptococcaceae 3.7454844 1.6860201 0.2136055 0.4364654 0.1531734 0.0000000 2.8607780 0.8037825 0.2867259
Peptostreptococcales-Tissierellales 0.0181693 0.0164343 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Phaselicystidaceae 0.0061488 0.0000000 0.0157611 0.0594176 0.0000000 0.0000000 0.0000000 0.0026628 0.0000000
Phormidiaceae 0.0008806 0.0000000 0.0285485 0.1428435 0.0000000 0.0000000 0.0020151 0.9886237 0.0000000
PHOS-HE36 0.0000000 0.0000000 0.0000000 0.0000000 0.1643219 0.0000000 0.0000000 0.0000000 0.1221770
Phycisphaeraceae 0.0000000 0.0000000 0.0000000 0.0069622 0.0000000 0.0000000 0.0014890 0.2074554 0.0000000
Pirellulaceae 0.0011025 0.0024087 0.0441913 0.0397527 0.0133400 0.0000000 0.7025051 0.3197126 0.0216735
Piscirickettsiaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0032559 0.0421413 0.0000000
Pla3_lineage 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0211726 0.0000000 0.0000000
Planococcaceae 0.0000000 0.0000000 0.0002172 0.0000000 0.0000000 0.0000000 0.0852800 0.0986838 0.0000000
Pleomorphomonadaceae 0.5698818 0.0020612 1.0219186 0.9093043 0.0129904 0.0000000 0.0395082 0.0578001 0.0000000
Polyangiaceae 0.3265675 0.0039512 0.1275735 0.0158023 0.0000000 0.0000000 0.0083538 0.0809573 0.0000000
Prolixibacteraceae 0.0000000 0.0149397 0.0000000 0.0000000 0.0883797 0.0000000 0.0000000 0.0000000 0.0355007
Propionibacteriaceae 0.0000000 0.0000000 0.0000000 0.0042375 0.0000000 0.0000000 0.0000000 0.1201298 0.0000000
Pseudanabaenaceae 0.0000000 0.0000000 0.0000000 0.0324131 0.0000000 0.0000000 0.0000000 0.7513590 0.0000000
Pseudomonadaceae 0.0016956 0.0958614 0.0081854 0.0142712 1.3655665 0.0000000 2.7217968 0.6519739 0.0036544
Pyrinomonadaceae 0.0000000 0.0000000 0.0081352 0.0000000 0.0000000 0.0000000 0.7593850 0.0000000 0.0000000
R7C24 0.0000000 0.0000000 0.0000000 0.0000000 0.0354871 0.0000000 0.0000000 0.0000000 0.0120300
RBG-13-54-9 0.0000000 0.0000000 0.0017509 0.0000000 0.0000000 0.0000000 0.0149224 0.0462404 0.0000000
Reyranellaceae 0.0008806 0.0012044 0.0012250 0.0000000 0.0000000 0.0000000 0.4381934 0.1051382 0.0308646
RF39 0.0000000 0.0776993 0.0000000 0.0000000 0.0000000 0.0000000 0.0427089 0.0141807 0.0000000
Rhizobiaceae 0.1289922 0.0016490 0.4208089 0.1885761 0.2388174 0.0000000 0.3544048 0.1446163 0.0591918
Rhodanobacteraceae 0.1820373 0.0019287 4.0982097 1.2194865 0.0244972 0.0000000 0.0665300 0.3423285 0.0000000
Rhodobacteraceae 2.1681392 0.3839446 2.6784933 2.7877374 1.4688965 0.0000000 0.7102896 4.6061811 1.4471438
Rhodocyclaceae 0.1194107 5.7843649 0.0000000 0.0000000 0.4940541 0.1567294 0.0651943 0.0667856 5.6918356
Rhodospirillaceae 0.0000000 0.0000000 0.0041376 0.0005789 0.0000000 0.0000000 0.0580442 0.1082137 0.0000000
Rhodothermaceae 0.0293246 0.0000000 0.4552912 0.8360849 0.0228813 0.0056695 0.0063011 0.0167480 0.0000000
Rickettsiaceae 0.0013039 0.0000000 0.0041642 0.0000000 0.0000000 0.0000000 0.1889759 0.2969174 0.0000000
Rickettsiales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1482409 0.0000000 0.0000000
Rikenellaceae 0.0000000 0.7124088 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Rokubacteriales 0.0000000 0.0025071 0.0000000 0.0000000 0.0023247 0.0000000 0.0000000 0.0000000 0.2127868
Roseiflexaceae 0.0000000 0.0000000 0.0037293 1.8337190 0.0000000 0.0000000 0.1957225 0.9775694 0.0000000
Rubinisphaeraceae 0.0000000 0.0046917 0.0028127 0.0397004 0.0304049 0.0000000 0.3097806 0.6998056 0.0255811
Rubritaleaceae 0.0000000 0.0628673 0.0000000 0.0012077 0.3779603 0.0000000 0.0000000 0.0108645 0.6254357
Ruminococcaceae 0.0229663 0.0045842 0.0000000 0.0000000 0.0000000 0.0000000 0.4777664 0.1359052 0.0000000
Saccharimonadaceae 0.0000000 0.0000000 0.0155274 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Saccharimonadales 0.0000000 0.0000000 0.0000000 0.0000000 0.0721598 0.0000000 0.0000000 0.0577379 0.0000000
Sandaracinaceae 0.1610580 0.0015054 0.1428121 0.0789338 0.0016339 0.0000000 0.0101401 0.0160418 0.0000000
Saprospiraceae 0.0047899 0.0000000 0.0169958 0.2463106 0.0762475 0.0000000 0.2206770 2.9810578 0.0223414
SAR324_clade(Marine_group_B) 0.0000000 0.0019773 0.0001579 0.0000000 0.0147488 0.0000000 0.2752684 0.0287850 0.1035382
SB-5 0.0000000 0.0000000 0.0000000 0.0000000 0.0588751 0.0000000 0.0000000 0.0000000 0.0412649
Sedimentibacteraceae 0.0155271 0.0006277 0.0000000 0.0000000 0.0000000 0.0000000 0.0104416 0.0186758 0.0000000
Selenomonadaceae 0.0025725 0.0041225 0.0000616 0.0000000 0.0000000 0.0000000 0.6606327 0.1304632 0.0000000
SepB-3 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2598703 0.0000000
Sericytochromatia 0.0303888 0.0331171 0.1286577 0.0372942 0.0005178 0.0000000 0.0066040 0.0344221 0.0000000
Silvanigrellaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0494887 0.0000000
Simkaniaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0072465 0.0664296 0.0000000
SM2D12 0.0000000 0.0018065 0.0056886 0.0000000 0.0033590 0.0203886 0.6454019 0.4125697 0.0091361
Smithellaceae 0.0000000 0.0180364 0.0000000 0.0000000 0.0819259 0.0000000 0.0000000 0.0000000 0.4806272
Solimonadaceae 0.0069442 0.0000000 0.0208343 0.1901986 0.0000000 0.0000000 0.1169375 0.1657051 0.0000000
Solirubrobacteraceae 0.5205250 0.0000000 0.4178810 0.1019689 0.0000000 0.0000000 0.0473710 0.0276107 0.0000000
Sphingobacteriaceae 0.1071354 0.0000000 0.0003229 0.1761413 0.0000000 0.0000000 0.0012693 0.0480018 0.0078782
Sphingomonadaceae 0.5751475 0.0791044 9.8906171 11.2765279 6.2858696 14.1685254 7.7289478 4.5454637 0.7949743
Spirochaetaceae 0.0000000 6.2201055 0.0000384 0.0005586 0.0737440 0.0000000 0.0005566 0.0000000 0.0146177
Spirosomaceae 0.1178218 0.0822384 0.5339444 0.5032072 0.1713499 0.0015971 0.3154306 1.8428122 0.4807533
Sporichthyaceae 0.0000000 0.0020288 0.0000000 0.0000000 0.0007576 0.0000000 0.0000000 0.1054743 10.2026125
Sporomusaceae 0.0057237 0.0999251 0.0000978 0.0000000 0.2320436 0.0000000 0.7682321 0.0460227 0.0971000
Staphylococcaceae 0.0000000 0.0021076 0.0000000 0.0000000 0.1803487 0.0000000 0.0000000 0.0000000 0.0000000
Streptococcaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0745757 0.0000000 0.0000000
Subgroup_17 0.0000000 0.0013854 0.0000000 0.0000000 0.0269204 0.0000000 0.0000000 0.0000000 0.0959535
Sulfurimonadaceae 0.0000000 0.0153382 0.0000318 0.0000000 0.0665650 0.0000000 0.0000000 0.0000000 0.2040128
Sulfurovaceae 0.0000000 0.0416488 0.0000000 0.0000000 0.0364555 0.0000000 0.0000000 0.0000000 0.0206228
Sutterellaceae 0.0000000 0.0000000 0.0015040 0.0824395 0.0131803 0.0000000 0.1682878 1.0432154 0.0257785
Sva0485 0.0000000 0.0037766 0.0000000 0.0000000 0.0213429 0.0000000 0.0000000 0.0000000 0.1319252
Synergistaceae 0.0000000 0.7259916 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Syntrophaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0425171 0.0000000 0.0000000 0.0000000 0.0977326
Syntrophorhabdaceae 0.0000000 0.0115684 0.0000000 0.0000000 0.0910626 0.0000000 0.0000000 0.0000000 0.1700264
T34 2.2928266 4.6381047 0.0013362 0.0000000 0.2424435 0.0000000 0.0027355 0.0000000 0.0000000
Tannerellaceae 0.4818709 2.2842402 0.0271357 0.0357793 0.0007576 0.0028347 10.8084740 4.0208014 0.0108592
Terrimicrobiaceae 0.0000000 0.0000000 0.0000950 0.0075177 0.0000000 0.0000000 0.2958975 0.4311386 0.0000000
Thermaceae 0.0000000 0.0000000 0.0002137 0.0115792 0.0000000 0.0000000 0.0697596 0.1269589 0.0000000
Thermoanaerobaculaceae 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0812088 0.0000000
Thiotrichaceae 0.0000000 0.0039141 0.0000000 0.0000000 0.1859518 0.0000000 0.0000000 0.0000000 0.0103114
Tsukamurellaceae 0.0000000 0.0000000 0.0034668 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
UBA12409 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0018863 0.0169425 0.0000000
unknown_Actinobacteria 0.0000000 0.2522642 0.0000000 0.0000000 0.0956102 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Alphaproteobacteria 0.0000000 0.0000000 0.0000000 0.0061704 0.0387338 0.0000000 0.1888394 0.0293164 0.0000000
unknown_Babeliales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0146497 0.1484925 0.0000000
unknown_Bacteria 0.2316072 0.0000000 0.3351585 0.1064029 0.0000000 0.0000000 0.7156768 0.2690874 0.0000000
unknown_Bacteroidia 0.0000000 0.8667650 0.0014223 0.0305671 0.0000000 0.0000000 0.1492711 0.0503901 0.0000000
unknown_Blastocatellia 0.0000000 0.0000000 0.2313565 2.2473424 0.0062439 0.0000000 0.0962184 0.1328861 0.0000000
unknown_Burkholderiales 0.0000000 0.6720904 0.0000000 0.0000000 0.2758991 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Campylobacterales 0.0000000 0.1185305 0.0000000 0.0000000 0.0054394 0.0000000 0.0000000 0.0000000 0.3966524
unknown_Chitinophagales 0.0000000 0.0032692 0.0000000 0.0059908 0.0138018 0.0000000 0.3950825 0.3030219 0.0000000
unknown_Clostridia 0.0011025 0.0576704 0.0000000 0.0000000 0.0313570 0.0000000 0.0549256 0.0000000 0.0000000
unknown_Cyanobacteriales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0597030 0.0000000
unknown_Cytophagales 0.0000000 0.0000000 0.0000000 0.0116376 0.0000000 0.0000000 0.0000000 0.0596488 0.0000000
unknown_Desulfobacterota 0.0000000 0.0000000 0.0000000 0.0000000 0.0018453 0.0000000 0.0215818 0.0231519 0.0000000
unknown_Desulfovibrionales 0.0000000 0.0016737 0.0000000 0.0000000 0.0289683 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Enterobacterales 0.0157945 0.0103803 0.0000000 0.0057159 0.0000000 0.0000000 0.0577501 0.1234616 0.0000000
unknown_Firmicutes 0.0000000 0.0633520 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Gaiellales 0.0107953 0.0023069 0.0079445 0.0003349 0.0658681 0.0000000 0.0642745 0.0329785 0.2067296
unknown_Gammaproteobacteria 0.0164212 0.0027098 0.0329385 0.0288688 0.0025601 0.0000000 4.0948487 0.8334230 0.0000000
unknown_Izemoplasmatales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.2276198
unknown_Micavibrionales 0.1070198 0.0000000 0.0196695 0.0150594 0.0000000 0.0000000 0.2703092 0.0751200 0.0000000
unknown_Micrarchaeales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0634977 0.0000000
unknown_Micrococcales 0.6986013 0.0000000 0.2440698 0.0000000 0.0491929 0.0000000 0.0772339 0.0000000 0.0000000
unknown_Oscillospirales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0913550 0.0230456 0.0000000
unknown_Parcubacteria 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0809860 0.4168446 0.0000000
unknown_Peptostreptococcales-Tissierellales 0.0000000 0.0213893 0.0000000 0.0000000 0.0035504 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Planctomycetales 0.0000000 0.0000000 0.0073390 0.0505281 0.0017870 0.0000000 0.4352448 0.3850709 0.0000000
unknown_Rhizobiales 1.4083794 0.0739187 2.9981127 1.6369374 3.7612766 2.2049334 0.7730887 1.0637993 0.4906827
unknown_Rhodospirillales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0498117 0.0000000 0.0000000
unknown_Rickettsiales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0265628 0.0009605 0.0000000
unknown_Solirubrobacterales 0.0000000 0.0000000 0.0073000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
unknown_Syntrophales 0.0000000 0.0061197 0.0000000 0.0000000 0.0620621 0.0000000 0.0000000 0.0000000 0.3571012
unknown_Thermodesulfovibrionia 0.0000000 0.0007245 0.0000000 0.0000000 0.0291142 0.0000000 0.0000000 0.0000000 0.2999831
unknown_Vampirovibrionales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0398828 0.0000000
unknown_Veillonellales-Selenomonadales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.1269098 0.0573255 0.0000000
unknown_Vicinamibacterales 0.0000000 0.0093902 0.0000000 0.0000000 0.1725491 0.0000000 0.0299907 0.0360540 0.0977326
unknown_Woesearchaeales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0705110 0.0000000
Vagococcaceae 0.1111275 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0035765 0.0000000 0.0000000
Vampirovibrionaceae 0.0000000 0.0000000 0.0000594 0.0000000 0.0000000 0.0000000 0.0268819 0.0000000 0.0000000
Vermiphilaceae 0.0000000 0.0000000 0.0002361 0.0006573 0.0000000 0.0000000 0.1983865 0.0672974 0.0000000
Verrucomicrobiaceae 0.0032579 0.0211244 0.0725661 0.0530668 0.1110402 0.0000000 0.3015931 0.3248932 0.0723083
Vicinamibacteraceae 0.1800251 0.0000000 0.0584109 0.0018806 0.0859163 0.0000000 0.1655004 0.0961885 0.0000000
Weeksellaceae 22.0157170 11.3141867 4.1689795 16.1795585 26.4969666 0.0125028 0.6978146 0.7771724 0.0170693
Williamwhitmaniaceae 0.0000000 0.3074925 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Woesearchaeales 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0474592 0.1763490 0.0000000
WPS-2 0.0131396 0.0000000 0.1825020 0.0436226 0.0040847 0.0000000 0.5944930 0.0177574 0.0000000
Xanthobacteraceae 0.0000000 0.0125850 0.0102896 0.0000000 0.1783325 11.5410638 0.4463527 0.1195262 0.0446828
Xanthomonadaceae 5.9854862 2.8942268 2.3980428 4.3469114 0.4835088 0.0075593 1.2884006 2.2790010 0.0154671
Xenococcaceae 0.0043337 0.0032666 0.0577574 0.0173338 0.0803529 0.0000000 0.6316020 1.0297636 0.0000000
Yersiniaceae 0.0000000 0.0000000 0.0032139 0.0000000 0.0902933 0.0000000 0.0704610 0.0000000 0.0000000

3.4 Heatmaps using all data

  • Make heatmaps at different taxonomic levels based on all data
  • The heatmap based on ASVs was included in the manuscript
phylo.heat.loop(phylo.list, data = meta  , ID.col = "sample.ID", order1 = "captive.wild",order2="sample.type",order3="location",order4="age",title = "All data",method = "NMDS",taxa.label = label.list)

3.5 Venn diagrams using broad data

3.5.1 Comparisons of water and plastron samples for captive juveniles based on ASVs

#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)
#juveniles at cosely, plastron
j.cos.plas <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$age == "juvenile" & meta$location == "Cosley" & meta$sample.type == "plastron"),"sample.ID"]))])

#juveniles at shedd, plastron
j.shedd.plas <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$age == "juvenile" & meta$location == "Shedd" & meta$sample.type == "plastron"),"sample.ID"]))])

#tubb water at the shedd
w.shedd <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$water.type == "tub" & meta$location == "Shedd" & meta$sample.type == "water"),"sample.ID"]))])

#tubb water at cosley
w.cos <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$water.type == "tub" & meta$location == "Cosley" & meta$sample.type == "water"),"sample.ID"]))])

j.water.input <- cbind.data.frame(j.cos.plas,w.shedd,j.shedd.plas,w.cos)

j.water.input <- venn.cat(j.water.input,T,F)

ggplot()+theme_bw()
grid.draw(venn.quad(j.water.input,category=c("Cosley plastron","Shedd water","Shedd plastron","Cosley water")))

3.5.2 Comparisons of water and plastron samples for captive juveniles based on families

#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)
fam <- otu_table(phylo.fam)

#juveniles at cosely, plastron
j.cos.plas.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$age == "juvenile" & meta$location == "Cosley" & meta$sample.type == "plastron"),"sample.ID"]))])

#juveniles at shedd, plastron
j.shedd.plas.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$age == "juvenile" & meta$location == "Shedd" & meta$sample.type == "plastron"),"sample.ID"]))])

#tubb water at the shedd
w.shedd.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$water.type == "tub" & meta$location == "Shedd" & meta$sample.type == "water"),"sample.ID"]))])

#tubb water at cosley
w.cos.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$water.type == "tub" & meta$location == "Cosley" & meta$sample.type == "water"),"sample.ID"]))])

j.water.input.f <- cbind.data.frame(j.cos.plas.f,w.shedd.f,j.shedd.plas.f,w.cos.f)

j.water.input.f <- venn.cat(j.water.input.f,T,F)

ggplot()+theme_bw()
grid.draw(venn.quad(j.water.input.f,category=c("Cosley plastron","Shedd water","Shedd plastron","Cosley water")))

3.5.3 Comparisons of plastron samples for captive and wild turtles based on ASVs

  • Captive turtles were juveniles
  • Wild turtles were adults
  • This was included in the manuscript
#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)
#wild adults, plastron
a.wild.plas <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$captive.wild == "wild" & meta$sample.type == "plastron"),"sample.ID"]))])

plas.input <- cbind.data.frame(j.cos.plas,a.wild.plas,j.shedd.plas)

plas.input <- venn.cat(plas.input,T,F)

ggplot()+theme_bw()
grid.draw(venn.trip(plas.input,category=c("Cosley","Wild","Shedd")))

3.5.4 Comparisons of plastron samples for captive and wild turtles based on families

  • Captive turtles were juveniles
  • Wild turtles were adults
#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)
#wild adults, plastron
a.wild.plas.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$captive.wild == "wild" & meta$sample.type == "plastron"),"sample.ID"]))])


plas.input.f <- cbind.data.frame(j.cos.plas.f,a.wild.plas.f,j.shedd.plas.f)

plas.input.f <- venn.cat(plas.input.f,T,F)

ggplot()+theme_bw()
grid.draw(venn.trip(plas.input.f,category=c("Cosley","Wild","Shedd")))

3.5.5 Comparisons of water and plastron samples for wild turtles based on ASVs

  • Wild turtles were adults
#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)
#wild adults, plastron
w.wild <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$captive.wild == "wild" & meta$sample.type == "water"),"sample.ID"]))])

a.water.input <- cbind.data.frame(w.shedd,a.wild.plas,w.cos,w.wild)

a.water.input <- venn.cat(a.water.input,T,F)

ggplot()+theme_bw()
grid.draw(venn.quad(a.water.input,category=c("Shedd water","Wild adult","Cosley water","Wild water")))

3.5.6 Comparisons of water and plastron samples for wild turtles based on families

  • Wild turtles were adults
#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)
#wild adults, plastron
w.wild.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$captive.wild == "wild" & meta$sample.type == "water"),"sample.ID"]))])

a.water.input.f <- cbind.data.frame(w.shedd.f,a.wild.plas.f,w.cos.f,w.wild.f)

a.water.input.f <- venn.cat(a.water.input.f,T,F)

ggplot()+theme_bw()
grid.draw(venn.quad(a.water.input.f,category=c("Shedd water","Wild adult","Cosley water","Wild water")))

3.5.7 Comparisons of cloaca samples based on ASVs

  • Wild turtles were adults
#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)

#juveniles at cosely, cloaca
j.cos.clo <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$age == "juvenile" & meta$location == "Cosley" & meta$sample.type == "cloaca"),"sample.ID"]))])

#adults at cosely, cloaca
a.cos.clo <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$age == "adult" & meta$location == "Cosley" & meta$sample.type == "cloaca"),"sample.ID"]))])

#wild adults, cloaca
a.wild.clo <- rowSums(asv[,which(colnames(asv) %in% c(meta[which(meta$captive.wild == "wild" & meta$sample.type == "cloaca"),"sample.ID"]))])

clo.input <- cbind.data.frame(a.wild.clo,w.cos,a.cos.clo,j.cos.clo)

clo.input <- venn.cat(clo.input,T,F)

ggplot()+theme_bw()
grid.draw(venn.quad(clo.input,category=c("Wild adult","Cosley water","Cosley adult","Cosley juvenile")))

3.5.8 Comparisons of cloaca samples based on familes

  • Wild turtles were adults
#make row sums for each subcategory (actual numbers are irrelevant, only presence absence maters)

#juveniles at cosely, cloaca
j.cos.clo.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$age == "juvenile" & meta$location == "Cosley" & meta$sample.type == "cloaca"),"sample.ID"]))])

#adults at cosely, cloaca
a.cos.clo.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$age == "adult" & meta$location == "Cosley" & meta$sample.type == "cloaca"),"sample.ID"]))])

#wild adults, cloaca
a.wild.clo.f <- rowSums(fam[,which(colnames(fam) %in% c(meta[which(meta$captive.wild == "wild" & meta$sample.type == "cloaca"),"sample.ID"]))])

clo.input.f <- cbind.data.frame(a.wild.clo.f,w.cos.f,a.cos.clo.f,j.cos.clo.f)

clo.input.f <- venn.cat(clo.input.f,T,F)

ggplot()+theme_bw()
grid.draw(venn.quad(clo.input.f,category=c("Wild adult","Cosley water","Cosley adult","Cosley juvenile")))

4 Water

4.1 Subset

  • Make subset of water samples
water <- meta[meta$sample.type == "water",]

water.no.tap <- water[water$water.type != "tap",] #removes tap water

4.2 Heatmaps: sites

  • Make heatmaps of water samples
  • Tub and marsh water (tap water was removed)
  • Data are pseudoreplicated (repeated samples per tub)
  • Note: the ASV and Family graphs are labeled “OTU” on the Y axis. This is due to a bug in phyloseq that I have been unable to isolate.
phylo.heat.loop(phylo.list, data = water.no.tap  , ID.col = "sample.ID", order1 = "location.general",title = "Water locations",method = "NMDS",taxa.label.list = label.list)

  • Differences between wild and tub samples remain obvious all the way through Class, and are still apparent at the Phylum level
  • The two sites are largely similar by the Class level, but different before then

4.3 Alpha diversity: sites

4.3.1 Richness

Compare richness among Cosley, Shedd, and wild (no tap)

  • Run a linear model on richness where
    • location.general = shedd, cosely, or wild marshes
    • specific.wild = tub# or swamp (PW or TC)
    • specific is nested in location
    • Date is a random effect (categorical)
    • Tap water was not included

Run model

wrm1 <- lmer(rich~log10(read.depth)+location.general/specific.wild+(1|date),data = water.no.tap )
check_model(wrm1,check=c("qq","ncv"))

  • Model fit looks ok, run ANOVA
kable.wrap(Anova(wrm1),caption="Result: Mixed effects ANOVA comparing mean richness among water samples at different locaitons")
Result: Mixed effects ANOVA comparing mean richness among water samples at different locaitons
Chisq Df Pr(>Chisq)
log10(read.depth) 66.01709 1 0.0000000
location.general 88.81393 2 0.0000000
location.general:specific.wild 12.84350 3 0.0049876
  • All factors were significant
  • Run post hoc test
kable.wrap(pairs(emmeans(wrm1,"location.general")),caption="Result: Post hoc test of mixed effects ANOVA comparing mean richness among water samples at different locaitons")
Result: Post hoc test of mixed effects ANOVA comparing mean richness among water samples at different locaitons
contrast estimate SE df t.ratio p.value
Cosley - Shedd -35.17123 17.41127 13.67654 -2.020027 0.1447243
Cosley - Wild 159.00412 21.87357 20.65879 7.269235 0.0000012
Shedd - Wild 194.17534 23.48132 16.25875 8.269355 0.0000009
  • Tub water richness did not differ significantly between Shedd and Cosely
  • Tubs at both the Shedd and Cosley had significantly higher richness than the marshes

4.3.2 Evenness

Compare evenness among Cosley, Shedd, and wild (no tap)

  • Run a linear model on evenness where
    • location.general = shedd, cosely, or wild marshes
    • specific.wild = tub# or swamp (PW or TC)
    • specific is nested in location
    • Date is a random effect (categorical)
    • Tap water was not included

Run model

wem1 <- lmer(even~log10(read.depth)+location.general/specific.wild+(1|date),data = water.no.tap )
check_model(wem1,check=c("qq","ncv"))

  • Model fit looks acceptable, run ANOVA
kable.wrap(Anova(wem1),caption="Result: Mixed effects ANOVA comparing mean evenness among water samples at different locaitons")
Result: Mixed effects ANOVA comparing mean evenness among water samples at different locaitons
Chisq Df Pr(>Chisq)
log10(read.depth) 2.555057 1 0.1099419
location.general 3.353942 2 0.1869393
location.general:specific.wild 6.171513 3 0.1035574

4.4 Beta diversity: sites

  • PCoAs (PERMANOVAs cannot be run accurately due to complex data structure)
for(i in 1:length(dists)){

  meta.i <- water
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
 plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(shape=location.general,fill=water.type,stroke=1),color = "black",size=4)+
    scale_shape_manual(values=c(21, 22,24,25))+
    scale_fill_manual(values = c("lightblue","blue","darkblue"))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    guides(fill=guide_legend(override.aes=list(colour=c("lightblue","blue","darkblue"))))+
    ggtitle(paste(names(dists)[i],"(water samples)"))
 
 print(plot)}

  • The complex data structure makes it impossible to run a statistically valid PERMANOVA across all groups, and some samples sizes are very small
  • Nevertheless, using any dissimilarity metric, there clearly appear to be strong differences among all categories. Interestingly, the tap water was often closer to the marshes than to the tubs

4.5 Cosley

Look more closely within Cosley

  • Subset to Cosley data
cos.w <- water[water$location == "Cosley",]

4.5.1 Tap and tubs

Compare tubs and tap water within Cosley

  • Compare tap and tubs statically for days when both were present
  • For each of the 5 days, there is a water sample for tub7, a water sample for tub11, and a tap water sample
    • All of these days were water change days
      • All tub samples were taken after water changes
      • Water changes involved completely changing the water and scrubbing the tanks (no chemicals other than water were used)
      • Tab samples were collected straight from the hose used to fill the tanks
  • Note This is not the full collection of water samples available and should not be relied on for tests of changes over time. Date is simply included here as a covariate. Subsequent tests using all water samples will evaluate changes over time.
  • The question of interest here is whether tubs and tap water are different
    • Therefore, we will simply compare tubs and tap, with tub number nested under tub
    • The only model output terms we care about here are date and water.type
#subset data
water.cosley.tapXtub <- cos.w[cos.w$date %in% cos.w[cos.w$water.type == "tap","date"],]

4.5.1.1 Heatmaps

  • Ordered by date then by tap or tub
  • Thus, the data are in triplets, with each triplet containing a tap, tub11, and tub 7 sample (in that order)
  • The triplets are ordered by date
  • Note: the ASV and Family graphs are labeled “OTU” on the Y axis. This is due to a bug in phyloseq that I have been unable to isolate.
phylo.heat.loop(phylo.list, data = water.cosley.tapXtub , ID.col = "sample.ID", order1="date",order2="specific",title = "Tub vs tap ordered by tap or tub (water change days)",method = "NMDS",taxa.label = label.list)

4.5.1.2 Richness

Compare tubs and tap water within Cosley (richness)

  • Plot regressions of richness over time
#linear regressions
ggplot(water.cosley.tapXtub,aes(x = as.Date(date),y=rich,group=specific,color=specific,fill=specific))+
    geom_point(size=4)+
   geom_smooth(method="lm",se=T)+ 
   theme_bw()+
  ggtitle("Cosley water richness among tubs and tap over time")

  • Run and check linear model for richness
wr3 <- lm(rich~water.type/specific+date.sc+log10(read.depth),data=water.cosley.tapXtub)
check_model(wr3,check=c("qq","ncv"))

  • Acceptable fit, run ANOVA
kable.wrap(Anova(wr3),caption="Result: ANOVA examining richness and date among tubs and tap")
Result: ANOVA examining richness and date among tubs and tap
Sum Sq Df F value Pr(>F)
water.type 332359.0387 1 1071.637845 0.0000000
date.sc 935.8983 1 3.017652 0.1130055
log10(read.depth) 12585.8881 1 40.581156 0.0000814
water.type:specific 2274.0964 1 7.332455 0.0220147
Residuals 3101.4119 10 NA NA
  • Significant main effect of richness (tubs have more richness than tap water)
  • Non-significant effect of date (sloped line, but small sample sizes and wide confidence)

4.5.1.3 Evenness

Compare tubs and tap water within Cosley (evenness)

  • Plot evenness
#linear regressions
ggplot(water.cosley.tapXtub,aes(x = as.Date(date),y=even,group=specific,color=specific,fill=specific))+
    geom_point(size=4)+
   geom_smooth(method="lm",se=T)+ 
   theme_bw()+
  ggtitle("Cosley water evenness among tubs and tap over time")

  • Run and check linear model for evenness
we3 <- lm(even~water.type/specific+date.sc+log10(read.depth),data=water.cosley.tapXtub)
check_model(we3,check=c("qq","ncv"))

  • Acceptable fit, run ANOVA
kable.wrap(Anova(we3),caption="Result: ANOVA examining evenness and date among tubs and tap")
Result: ANOVA examining evenness and date among tubs and tap
Sum Sq Df F value Pr(>F)
water.type 0.2450827 1 49.9672076 0.0000342
date.sc 0.0057748 1 1.1773646 0.3033584
log10(read.depth) 0.0063828 1 1.3013247 0.2805547
water.type:specific 0.0013124 1 0.2675764 0.6162100
Residuals 0.0490487 10 NA NA
  • Significant main effect of evenness (tubs have more evenness than tap water)
  • Non-significant of date (probably due to small sample size for tubs and flat slope for tap)

4.5.1.4 Beta diversity

Compare tubs and tap water within Cosley

4.5.1.4.1 PCoA plots
for(i in 1:length(dists)){

  meta.i <- water.cosley.tapXtub
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
 plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(shape=specific,fill=water.type,stroke=1),color = "black",size=4)+
    scale_shape_manual(values=c(21, 22,24,25))+
    scale_fill_manual(values = c("red","blue"))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    guides(fill=guide_legend(override.aes=list(colour=c("red","blue"))))+
    ggtitle(paste(names(dists)[i],"(Cosley water samples on a water change day)"))
 
 print(plot)}

4.5.1.4.2 PERMANOVAs
  • Compare beta diversity for tap water and tubs at Cosley
  • Again, only the water.type result, and to a lesser degree the date are interesting
  • Run PERMANOVAs (specific [i.e., tub number] nested in water type)
temp<- perm.loop("dist_subset(distance.list[[i]],water.cosley.tapXtub$sample.ID)~log10(read.depth)+water.type/specific+date.sc",water.cosley.tapXtub,c("read.depth","water.type","date"),"PERMANOVA results: Cosley tubs vs tap",distance.list=dists)
Bray-Curtis PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2851652 0.0663062 2.166450 0.0841832
water.type 1 2.0659689 0.4803763 15.695524 0.0002000
date.sc 1 0.3859589 0.0897426 2.932196 0.0331934
water.type:specific 1 0.2473586 0.0575155 1.879226 0.1293741
Residual 10 1.3162791 0.3060594 NA NA
Total 14 4.3007307 1.0000000 NA NA
Jaccard PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3679718 0.0717031 1.718589 0.0917816
water.type 1 1.8426318 0.3590560 8.605898 0.0002000
date.sc 1 0.4490512 0.0875023 2.097266 0.0473905
water.type:specific 1 0.3310972 0.0645177 1.546369 0.1289742
Residual 10 2.1411268 0.4172208 NA NA
Total 14 5.1318788 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0174296 0.1364875 5.039445 0.0055989
water.type 1 0.0520158 0.4073250 15.039411 0.0003999
date.sc 1 0.0185427 0.1452043 5.361290 0.0065987
water.type:specific 1 0.0051265 0.0401447 1.482237 0.2039592
Residual 10 0.0345863 0.2708384 NA NA
Total 14 0.1277010 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2647658 0.0723704 3.401719 0.0473905
water.type 1 2.3182418 0.6336620 29.784838 0.0002000
date.sc 1 0.1790666 0.0489456 2.300653 0.1021796
water.type:specific 1 0.1180795 0.0322755 1.517089 0.1977604
Residual 10 0.7783295 0.2127465 NA NA
Total 14 3.6584832 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2914649 0.0941084 4.367951 0.0179964
water.type 1 1.7835822 0.5758841 26.729119 0.0002000
date.sc 1 0.2514214 0.0811791 3.767851 0.0309938
water.type:specific 1 0.1033711 0.0333765 1.549139 0.1987602
Residual 10 0.6672806 0.2154520 NA NA
Total 14 3.0971201 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1722990 0.0533140 3.024421 0.0667866
water.type 1 2.3091001 0.7144979 40.532379 0.0002000
date.sc 1 0.1018249 0.0315074 1.787365 0.1693661
water.type:specific 1 0.0788634 0.0244025 1.384316 0.2383523
Residual 10 0.5696927 0.1762783 NA NA
Total 14 3.2317802 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1548390 0.0945062 5.386546 0.0131974
water.type 1 1.0417821 0.6358530 36.241558 0.0002000
date.sc 1 0.1127102 0.0687928 3.920968 0.0345931
water.type:specific 1 0.0416144 0.0253994 1.447684 0.2125575
Residual 10 0.2874551 0.1754486 NA NA
Total 14 1.6384009 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Cosley tubs vs tap
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1411360 0.0483185 3.202831 0.0697860
water.type 1 2.2250200 0.7617450 50.492869 0.0002000
date.sc 1 0.0690141 0.0236273 1.566151 0.2253549
water.type:specific 1 0.0451209 0.0154473 1.023938 0.3557289
Residual 10 0.4406602 0.1508619 NA NA
Total 14 2.9209512 1.0000000 NA NA
Summary (p values): PERMANOVA results: Cosley tubs vs tap
read.depth water.type date read.depth.adjusted water.type.adjusted date.adjusted
Bray-Curtis 0.0841832 0.0002000 0.0331934 0.1683663 0.0007998 0.0995801
Jaccard 0.0917816 0.0002000 0.0473905 0.1683663 0.0007998 0.0995801
Weighted Unifrac 0.0055989 0.0003999 0.0065987 0.0223955 0.0007998 0.0263947
Uneighted Unifrac 0.0473905 0.0002000 0.1021796 0.1421716 0.0007998 0.1021796
Bray-Curtis: Family 0.0179964 0.0002000 0.0309938 0.0539892 0.0007998 0.1239752
Jaccard: Family 0.0667866 0.0002000 0.1693661 0.1335733 0.0007998 0.3387323
Weighted Unifrac: Family 0.0131974 0.0002000 0.0345931 0.0527894 0.0007998 0.1239752
Unweighted Unifrac: Family 0.0697860 0.0002000 0.2253549 0.1335733 0.0007998 0.3387323
  • The difference between tubs and tap water is strongly significant
  • The effect of date was generally not significant, but as before, that could be from low sample sizes and a lack of effect for tap water

4.5.2 Tubs and dates

Compare tubs over time within Cosley

  • This will use all available data on water samples of Cosley tubs to compare tubs as well as looking at changes over time
    • Water samples were only collected from 2 tubs
  • Water changes took place weekly, and half the water samples were taken the day before a water change, while the other half were taken the day after a water change
  • Water changes are, therefore, included in the model
#subset data
water.cosley.no.tap <- cos.w[cos.w$water.type != "tap",] #subsets to Cosely water without tap water

water.cosley.no.tap[water.cosley.no.tap$day.number=="C1","day.number"] <- "after.change"
water.cosley.no.tap[water.cosley.no.tap$day.number=="C7","day.number"] <- "before.change"

colnames(water.cosley.no.tap)[which(colnames(water.cosley.no.tap)=="day.number")] <- "water.change"

4.5.2.1 Richness

Compare tubs over time within Cosley

  • Plot richness
#linear regressions
ggplot(water.cosley.no.tap,aes(x = as.Date(date),y=rich,shape=water.change,color=specific))+
    geom_point(size=4)+
   geom_smooth(aes(linetype = water.change),method="lm",se=F)+ 
   theme_bw()+
  ggtitle("Richness between tubs over time")

ggplot(water.cosley.no.tap,aes(x = specific,y=rich,fill=water.change))+
   geom_boxplot()+
   theme_bw()+
  ggtitle("Richness between tubs and water changes")

  • Run and check linear model for richness
wr4 <- lm(rich~water.change+specific+date.sc+log10(read.depth),data=water.cosley.no.tap)
check_model(wr4,check=c("qq","ncv"))

  • Acceptable fit, run ANOVA
kable.wrap(Anova(wr4),caption="Result: ANOVA examining richness between tubs and water changes over time")
Result: ANOVA examining richness between tubs and water changes over time
Sum Sq Df F value Pr(>F)
water.change 6600.747 1 17.520251 0.0007955
specific 3156.493 1 8.378226 0.0111176
date.sc 7681.375 1 20.388546 0.0004105
log10(read.depth) 12582.564 1 33.397690 0.0000364
Residuals 5651.243 15 NA NA
  • Significant difference for richness between tubs
  • Samples right after water changes have higher richness than days a week later (although this is largely driven by tub11)
  • Richness significantly declined over time

4.5.2.2 Evenness

Compare tubs over time within Cosley

  • Plot evenness
#linear regressions
ggplot(water.cosley.no.tap,aes(x = as.Date(date),y=even,shape=water.change,color=specific))+
    geom_point(size=4)+
   geom_smooth(aes(linetype = water.change),method="lm",se=F)+ 
   theme_bw()+
  ggtitle("Evenness between tubs over time")

ggplot(water.cosley.no.tap,aes(x = specific,y=even,fill=water.change))+
   geom_boxplot()+
   theme_bw()+
  ggtitle("Evenness between tubs and water changes")

  • Run and check linear model for evenness
we4 <- lm(even~water.change+specific+date.sc+log10(read.depth),data=water.cosley.no.tap)
check_model(we4,check=c("qq","ncv"))

  • Acceptable fit, run ANOVA
kable.wrap(Anova(we4),caption="Result: ANOVA examining evenness between tubs and water changes over time")
Result: ANOVA examining evenness between tubs and water changes over time
Sum Sq Df F value Pr(>F)
water.change 0.0145462 1 4.164238 0.0592935
specific 0.0074514 1 2.133156 0.1647721
date.sc 0.0263423 1 7.541190 0.0150028
log10(read.depth) 0.0126241 1 3.613993 0.0766843
Residuals 0.0523968 15 NA NA
  • Evenness significantly declined over time
  • Nearly significant effect of water change days (higher immediately after water change)
  • No difference between tubs

4.5.2.3 Beta diversity

Compare tubs over time within Cosley

4.5.2.3.1 PCoAs (by tub)
  • Color plot by tub, and shape by water changes
for(i in 1:length(dists)){

  meta.i <- water.cosley.no.tap
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
 plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(shape=water.change,color=specific,stroke=1),size=4)+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    ggtitle(paste(names(dists)[i],"(Cosley water samples between tubs and water changes)"))
 
 print(plot)}

  • Looks like probably an effect of tub and water changes
4.5.2.3.2 PCoAs (by date)
  • Plotted continuously
for(i in 1:length(dists)){

  meta.i <- water.cosley.no.tap
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
  plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(shape=specific,fill=as.Date(date),color=as.Date(date),size=4))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    ggtitle(paste(names(dists)[i],"(Cosley water tubs over time)"))

 print(plot)}

  • There is clearly an effect of date
4.5.2.3.3 PERMANOVA

Compare tubs over time within Cosley

temp<- perm.loop("dist_subset(distance.list[[i]],water.cosley.no.tap$sample.ID)~log10(read.depth)+date.sc+water.change+specific",water.cosley.no.tap,c("read.depth","date","water.change","specific"),"PERMANOVA results: Cosley tubs and water changes over time",distance.list=dists)
Bray-Curtis PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2645120 0.0548287 1.474485 0.1033793
date.sc 1 0.9487781 0.1966650 5.288831 0.0002000
water.change 1 0.4632039 0.0960140 2.582065 0.0023995
specific 1 0.4569510 0.0947179 2.547209 0.0033993
Residual 15 2.6908918 0.5577745 NA NA
Total 19 4.8243368 1.0000000 NA NA
Jaccard PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3573124 0.0551351 1.282460 0.1087782
date.sc 1 0.9166107 0.1414375 3.289886 0.0002000
water.change 1 0.5244395 0.0809236 1.882311 0.0043991
specific 1 0.5030909 0.0776294 1.805687 0.0055989
Residual 15 4.1792214 0.6448744 NA NA
Total 19 6.4806748 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0115369 0.0780177 2.332605 0.0723855
date.sc 1 0.0473687 0.3203293 9.577338 0.0002000
water.change 1 0.0098006 0.0662765 1.981563 0.1103779
specific 1 0.0049801 0.0336777 1.006911 0.3661268
Residual 15 0.0741887 0.5016988 NA NA
Total 19 0.1478749 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1779077 0.0748160 2.049445 0.0099980
date.sc 1 0.3833540 0.1612129 4.416127 0.0002000
water.change 1 0.2808745 0.1181169 3.235593 0.0002000
specific 1 0.2336836 0.0982716 2.691967 0.0007998
Residual 15 1.3021162 0.5475825 NA NA
Total 19 2.3779360 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2089550 0.0707354 1.951977 0.0573885
date.sc 1 0.6008503 0.2033998 5.612911 0.0002000
water.change 1 0.2980901 0.1009094 2.784642 0.0115977
specific 1 0.2404228 0.0813879 2.245937 0.0299940
Residual 15 1.6057184 0.5435675 NA NA
Total 19 2.9540366 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1051010 0.0791503 2.216688 0.0075985
date.sc 1 0.2323099 0.1749497 4.899652 0.0002000
water.change 1 0.1258111 0.0947468 2.653485 0.0009998
specific 1 0.1534411 0.1155546 3.236229 0.0003999
Residual 15 0.7112033 0.5355985 NA NA
Total 19 1.3278665 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0886338 0.0726102 2.038622 0.0719856
date.sc 1 0.2992744 0.2451703 6.883462 0.0002000
water.change 1 0.1048851 0.0859235 2.412410 0.0415917
specific 1 0.0757265 0.0620364 1.741748 0.1079784
Residual 15 0.6521596 0.5342595 NA NA
Total 19 1.2206793 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Cosley tubs and water changes over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0743859 0.0900828 2.492967 0.0063987
date.sc 1 0.1297426 0.1571209 4.348188 0.0002000
water.change 1 0.0904461 0.1095321 3.031208 0.0003999
specific 1 0.0836010 0.1012425 2.801801 0.0015997
Residual 15 0.4475747 0.5420218 NA NA
Total 19 0.8257504 1.0000000 NA NA
Summary (p values): PERMANOVA results: Cosley tubs and water changes over time
read.depth date water.change specific read.depth.adjusted date.adjusted water.change.adjusted specific.adjusted
Bray-Curtis 0.1033793 2e-04 0.0023995 0.0033993 0.2171566 0.0007998 0.0071986 0.0101980
Jaccard 0.1087782 2e-04 0.0043991 0.0055989 0.2171566 0.0007998 0.0087982 0.0111978
Weighted Unifrac 0.0723855 2e-04 0.1103779 0.3661268 0.2171566 0.0007998 0.1103779 0.3661268
Uneighted Unifrac 0.0099980 2e-04 0.0002000 0.0007998 0.0399920 0.0007998 0.0007998 0.0031994
Bray-Curtis: Family 0.0573885 2e-04 0.0115977 0.0299940 0.1147770 0.0007998 0.0231954 0.0599880
Jaccard: Family 0.0075985 2e-04 0.0009998 0.0003999 0.0255949 0.0007998 0.0029994 0.0015997
Weighted Unifrac: Family 0.0719856 2e-04 0.0415917 0.1079784 0.1147770 0.0007998 0.0415917 0.1079784
Unweighted Unifrac: Family 0.0063987 2e-04 0.0003999 0.0015997 0.0255949 0.0007998 0.0015997 0.0047990
  • Strong effect of date
  • Generally an effect of water change (except of weighted unifrac)
  • Generally a significant difference between tubs

4.6 Shedd

  • Subset to Shedd samples
  • Note unlike in Cosely, there are no samples for before/after water changes, and on most days, there are two samples per tub
water.shedd <- water[water$location == "Shedd" ,]

4.6.1 Richness

Shedd tub water richness between tubs and over time

#linear regressions
ggplot(water.shedd,aes(x = as.Date(date),y=rich,group=specific))+
    geom_point(aes(shape=specific,fill=specific,color=specific),size=4)+
   geom_smooth(method="lm",aes(fill=NULL,color=specific),se=F)+ 
   theme_bw()+
  ggtitle("Shedd tub water richness over time")

  • Looks like increasing richness over time
  • Will run a linear model comparing richness between tubs (“specific”) and date (as a linear variable), with read depth included as a covariate
#"specific = tub#
wr5 <- lm(rich~specific+date.sc+log10(read.depth),data=water.shedd)
check_model(wr5,check=c("qq","ncv"))

  • Distribution is tailed due to outliers, but still not terrible. Attempts at other distributions are worse
  • Run ANOVA
kable.wrap(Anova(wr5),caption="Results: ANOVA comparing Shedd tub water richness between tubs and over time")
Results: ANOVA comparing Shedd tub water richness between tubs and over time
Sum Sq Df F value Pr(>F)
specific 365.1422 1 1.210767 0.2969621
date.sc 5954.2046 1 19.743414 0.0012480
log10(read.depth) 6992.2794 1 23.185543 0.0007074
Residuals 3015.7928 10 NA NA
  • There is a significant increase in Shedd tub richness over time
  • This is very interesting because at the Shedd, richness is increasing over time, and at Cosley, it is decreasing over time

4.6.2 Evenness

Shedd tub water evenness between tubs and over time

#linear regressions
ggplot(water.shedd,aes(x = as.Date(date),y=even,group=specific))+
    geom_point(aes(shape=specific,fill=specific,color=specific,size=4))+
   geom_smooth(method="lm",aes(fill=NULL,color=specific),se=F)+ 
   theme_bw()+
  ggtitle("Shedd tub water evenness over time")

  • No obvious trends
  • Will run a linear model comparing evenness between tubs (“specific”) and date (as a linear variable), with read depth included as a covariate
#"specific" = tub#
we5 <- lm(even~specific+date.sc+log10(read.depth),data=water.shedd)
check_model(we5,check=c("qq","ncv")) 

  • Reasonable fit, run ANOVA
kable.wrap(Anova(we5),caption="Results: ANOVA comparing Shedd tub water evenness between tubs and over time")
Results: ANOVA comparing Shedd tub water evenness between tubs and over time
Sum Sq Df F value Pr(>F)
specific 0.0003846 1 0.1923842 0.6702640
date.sc 0.0006152 1 0.3077535 0.5912530
log10(read.depth) 0.0003912 1 0.1956914 0.6676356
Residuals 0.0199901 10 NA NA
  • No significant effects

4.6.3 Beta diversity

4.6.3.1 PCoA plots

  • Plot by date
#Plot by date
for(i in 1:length(dists)){

  meta.i <- water.shedd
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
 plot <- ggplot(meta.i,aes(x = x,y=y,group=date))+
    geom_point(aes(shape=specific,fill=date,color=date),size=4)+
    theme_bw()+
    ggtitle(paste(names(dists)[i],"(Shedd tub water over time)"))
 
 print(plot)
}

  • Clearly a very strong effect of date

4.6.3.2 PERMANOVAs

temp<- perm.loop("dist_subset(distance.list[[i]],water.shedd$sample.ID)~log10(read.depth)+date.sc+specific",water.shedd,c("read.depth","date","specific"),"PERMANOVA results: Shedd tubs over time",distance.list=dists)
Bray-Curtis PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0937305 0.0411557 0.6997983 0.6928614
date.sc 1 0.6940624 0.3047530 5.1819195 0.0005999
specific 1 0.1502735 0.0659830 1.1219529 0.3119376
Residual 10 1.3393925 0.5881083 NA NA
Total 13 2.2774589 1.0000000 NA NA
Jaccard PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1677983 0.0480349 0.7534199 0.7162567
date.sc 1 0.8344927 0.2388864 3.7468996 0.0002000
specific 1 0.2638147 0.0755211 1.1845369 0.2481504
Residual 10 2.2271551 0.6375576 NA NA
Total 13 3.4932609 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0020747 0.0293355 0.4330641 0.9040192
date.sc 1 0.0161929 0.2289600 3.3800095 0.0037992
specific 1 0.0045483 0.0643101 0.9493741 0.4547091
Residual 10 0.0479080 0.6773944 NA NA
Total 13 0.0707239 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0872984 0.0624359 1.052274 0.3625275
date.sc 1 0.3856891 0.2758452 4.649005 0.0002000
specific 1 0.0956045 0.0683764 1.152394 0.2859428
Residual 10 0.8296165 0.5933425 NA NA
Total 13 1.3982085 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0393045 0.0318276 0.5192556 0.8232354
date.sc 1 0.3659376 0.2963257 4.8344414 0.0005999
specific 1 0.0727359 0.0588994 0.9609213 0.4341132
Residual 10 0.7569387 0.6129472 NA NA
Total 13 1.2349165 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0272159 0.0358914 0.5917664 0.8648270
date.sc 1 0.2087029 0.2752298 4.5379038 0.0002000
specific 1 0.0624568 0.0823657 1.3580204 0.1723655
Residual 10 0.4599103 0.6065131 NA NA
Total 13 0.7582859 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0145628 0.0341304 0.582272 0.7304539
date.sc 1 0.1369835 0.3210441 5.477075 0.0011998
specific 1 0.0250316 0.0586658 1.000850 0.3945211
Residual 10 0.2501034 0.5861597 NA NA
Total 13 0.4266813 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Shedd tubs over time
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0216499 0.0386862 0.6649559 0.7638472
date.sc 1 0.1697656 0.3033542 5.2141883 0.0005999
specific 1 0.0426289 0.0761736 1.3093061 0.2103579
Residual 10 0.3255840 0.5817860 NA NA
Total 13 0.5596285 1.0000000 NA NA
Summary (p values): PERMANOVA results: Shedd tubs over time
read.depth date specific read.depth.adjusted date.adjusted specific.adjusted
Bray-Curtis 0.6928614 0.0005999 0.3119376 1 0.0011998 0.9926015
Jaccard 0.7162567 0.0002000 0.2481504 1 0.0007998 0.9926015
Weighted Unifrac 0.9040192 0.0037992 0.4547091 1 0.0037992 0.9926015
Uneighted Unifrac 0.3625275 0.0002000 0.2859428 1 0.0007998 0.9926015
Bray-Curtis: Family 0.8232354 0.0005999 0.4341132 1 0.0017996 0.7890422
Jaccard: Family 0.8648270 0.0002000 0.1723655 1 0.0007998 0.6894621
Weighted Unifrac: Family 0.7304539 0.0011998 0.3945211 1 0.0017996 0.7890422
Unweighted Unifrac: Family 0.7638472 0.0005999 0.2103579 1 0.0017996 0.6894621
  • Strongly significant effect of date in each case

5 Cloaca

  • Subset samples to all cloacal samples
cloaca <- meta[meta$sample.type == "cloaca",] #subset to cloaca samples

5.1 Compare wild, captive, age

  • Analyses of cloaca data are challenging because data are available from disparate groups:
    • Wild adults
      • 2018
      • 2019
    • Captive (Cosley)
      • Adults
      • Juveniles
  • These groups do not make for clean comparisons in a single model, because date (year) is a factor in the wild, whereas age is a factor in captivity
  • Therefore, we will run 6 models:
    • Cosley juvenile vs Cosley adult
    • Wild adult 2018 vs wild adult 2019
    • Cosley juvenile vs wild adults 2018
    • Cosley juvenile vs wild adults 2019
    • Cosley adult vs wild adults 2018
    • Cosley adult vs wild adults 2019
  • We will use a sequential Bonferroni correction to adjust the p values
  • This will only be done for PERMANOVAs. The sample sizes within each of those groups is often too small to make meaningful comparisons of alpha diversity

make subsets

cloaca.wild <- cloaca[cloaca$captive.wild == "wild",] #subset to wild cloaca samples

cloaca.cosley  <- cloaca[cloaca$captive.wild == "captive",] #subset to captive cloaca samples

cloaca.cosley.adultXwild18 <- rbind.data.frame(cloaca.wild[cloaca.wild$year==2018,] ,cloaca.cosley[cloaca.cosley$age=="adult",]) #subset to wild 2018 cloaca samples and captive adults

cloaca.cosley.adultXwild19 <- rbind.data.frame(cloaca.wild[cloaca.wild$year==2019,] ,cloaca.cosley[cloaca.cosley$age=="adult",]) #subset to wild 2018 cloaca samples and captive adults

cloaca.cosley.juvenileXwild18 <- rbind.data.frame(cloaca.wild[cloaca.wild$year==2018,] ,cloaca.cosley[cloaca.cosley$age=="juvenile",]) #subset to wild 2018 cloaca samples and captive juveniles

cloaca.cosley.juvenileXwild19 <- rbind.data.frame(cloaca.wild[cloaca.wild$year==2019,] ,cloaca.cosley[cloaca.cosley$age=="juvenile",]) #subset to wild 2019 cloaca samples and captive juveniles

5.1.1 Heatmaps

  • Note: the ASV and Family graphs are labeled “OTU” on the Y axis. This is due to a bug in phyloseq that I have been unable to isolate.
phylo.heat.loop(phylo.list, data = cloaca , ID.col = "sample.ID", order1 = "location.general", order2="age",order3="year",title = "Cloaca by location age and year",method="NMDS",taxa.label=label.list)

  • Very clear differences among groups
  • Interestingly, there are a few prominent ASVs shared by wild turtles and captive juveniles, but not wild turtles and captive adults

5.1.2 Richness

  • This plot was included in the manuscript
ggplot(cloaca,aes(x=location.general,y=rich,fill=paste(age,year)))+
  geom_boxplot()+
  ggtitle("cloacal richness")

5.1.3 Evenness

  • This plot was included in the manuscript
ggplot(cloaca,aes(x=location.general,y=even,fill=paste(age,year)))+
  geom_boxplot()+
  ggtitle("cloacal evenness")

5.1.4 Beta diversity

Comparing cloacal samples for wild and captive turtles, years, and age

5.1.4.1 PCoAs and boxplots

  • Split by location, year, and age
for(i in 1:length(dists)){

  meta.i <- cloaca
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
 plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(shape=age,color=paste(location.general,year)),size=4)+
  #  scale_shape_manual(values=c(21, 22,24,25))+
 #   scale_fill_manual(values = c("#cc3a47",rgb(1, 0.7921569, 0.1568627),rgb(0.0078, 0.4392, 0.7490)))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
 #   guides(fill=guide_legend(override.aes=list(colour=c("#cc3a47",rgb(1, 0.7921569, 0.1568627),rgb(0.0078, 0.4392, 0.7490)))))+
    ggtitle(paste(names(dists)[i]),"Cloacal samples")
 
 print(plot)
 }

  • Generally appear to be strong effects among all groups

5.1.4.2 PERMANOVAs

  • Note Final P value adjustment will be done in the “results summary” tab (to account for the four pairwise tests and the 8 dissimilarities)
5.1.4.2.1 Wild: 2018 vs 2019
cloaca.wild.perm <- perm.loop("dist_subset(distance.list[[i]],cloaca.wild$sample.ID)~log10(read.depth)+year",cloaca.wild,c("log10(read.depth)","year"),"PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.4278623 0.1177648 2.583890 0.0283943
year 1 0.8870925 0.2441633 5.357213 0.0002000
Residual 14 2.3182382 0.6380719 NA NA
Total 16 3.6331931 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.4747661 0.0985958 1.968165 0.040192
year 1 0.9633959 0.2000706 3.993803 0.000200
Residual 14 3.3771174 0.7013336 NA NA
Total 16 4.8152794 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0186261 0.0911756 1.972060 0.1103779
year 1 0.0534322 0.2615533 5.657205 0.0015997
Residual 14 0.1322298 0.6472712 NA NA
Total 16 0.2042881 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2850398 0.1190033 2.386388 0.0159968
year 1 0.4379699 0.1828512 3.666738 0.0002000
Residual 14 1.6722164 0.6981455 NA NA
Total 16 2.3952262 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3743847 0.1468614 3.149911 0.0251950
year 1 0.5108747 0.2004029 4.298279 0.0071986
Residual 14 1.6639789 0.6527357 NA NA
Total 16 2.5492383 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2165538 0.0997655 1.974412 0.0385923
year 1 0.4185512 0.1928250 3.816108 0.0002000
Residual 14 1.5355219 0.7074094 NA NA
Total 16 2.1706268 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1396137 0.1442332 3.173478 0.0355929
year 1 0.2124435 0.2194728 4.828931 0.0099980
Residual 14 0.6159145 0.6362939 NA NA
Total 16 0.9679717 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1604971 0.1098488 2.175524 0.0261948
year 1 0.2677390 0.1832484 3.629179 0.0002000
Residual 14 1.0328358 0.7069028 NA NA
Total 16 1.4610718 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca (wild adult 2018 vs wild adult 2019)
log10(read.depth) year log10(read.depth).adjusted year.adjusted
Bray-Curtis 0.0283943 0.0002000 0.0851830 0.0007998
Jaccard 0.0401920 0.0002000 0.0851830 0.0007998
Weighted Unifrac 0.1103779 0.0015997 0.1103779 0.0015997
Uneighted Unifrac 0.0159968 0.0002000 0.0639872 0.0007998
Bray-Curtis: Family 0.0251950 0.0071986 0.1007798 0.0143971
Jaccard: Family 0.0385923 0.0002000 0.1007798 0.0007998
Weighted Unifrac: Family 0.0355929 0.0099980 0.1007798 0.0143971
Unweighted Unifrac: Family 0.0261948 0.0002000 0.1007798 0.0007998
5.1.4.2.2 Cosley: Juvenile vs adult
cloaca.cosley.perm <- perm.loop("dist_subset(distance.list[[i]],cloaca.cosley$sample.ID)~log10(read.depth)+age",cloaca.cosley,c("log10(read.depth)","age"),"PERMANOVA results: cloaca (Cosley juvenile vs adult)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1064645 0.0481266 0.8856602 0.4797041
age 1 1.3844555 0.6258345 11.5170528 0.0039992
Residual 6 0.7212551 0.3260389 NA NA
Total 8 2.2121750 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1900066 0.0702879 0.9225685 0.4833033
age 1 1.2775325 0.4725891 6.2030021 0.0049990
Residual 6 1.2357234 0.4571230 NA NA
Total 8 2.7032625 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0010333 0.0094201 0.4412257 0.6324735
age 1 0.0946072 0.8624802 40.3973116 0.0025995
Residual 6 0.0140515 0.1280996 NA NA
Total 8 0.1096920 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0807284 0.0670471 1.023680 0.4033193
age 1 0.6501613 0.5399763 8.244402 0.0063987
Residual 6 0.4731657 0.3929767 NA NA
Total 8 1.2040553 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0149021 0.0117373 0.4865306 0.6352729
age 1 1.0709603 0.8435157 34.9651180 0.0057988
Residual 6 0.1837764 0.1447470 NA NA
Total 8 1.2696388 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0784606 0.0782130 1.172467 0.3247351
age 1 0.5231892 0.5215385 7.818221 0.0057988
Residual 6 0.4015153 0.4002485 NA NA
Total 8 1.0031650 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0055344 0.0089738 0.5025049 0.6006799
age 1 0.5451076 0.8838774 49.4943664 0.0047990
Residual 6 0.0660812 0.1071488 NA NA
Total 8 0.6167231 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca (Cosley juvenile vs adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0504899 0.0717672 1.173300 0.3281344
age 1 0.3948396 0.5612311 9.175399 0.0091982
Residual 6 0.2581945 0.3670017 NA NA
Total 8 0.7035240 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca (Cosley juvenile vs adult)
log10(read.depth) age log10(read.depth).adjusted age.adjusted
Bray-Curtis 0.4797041 0.0039992 1 0.0119976
Jaccard 0.4833033 0.0049990 1 0.0119976
Weighted Unifrac 0.6324735 0.0025995 1 0.0103979
Uneighted Unifrac 0.4033193 0.0063987 1 0.0119976
Bray-Curtis: Family 0.6352729 0.0057988 1 0.0191962
Jaccard: Family 0.3247351 0.0057988 1 0.0191962
Weighted Unifrac: Family 0.6006799 0.0047990 1 0.0191962
Unweighted Unifrac: Family 0.3281344 0.0091982 1 0.0191962
5.1.4.2.3 Cosley adults vs wild adults 2018
cloaca.cosley.adultXwild18.perm <- perm.loop("dist_subset(distance.list[[i]],cloaca.cosley.adultXwild18$sample.ID)~log10(read.depth)+captive.wild",cloaca.cosley.adultXwild18,c("log10(read.depth)","captive.wild"),"PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.5544145 0.1636303 2.901932 0.0371926
captive.wild 1 1.3053988 0.3852763 6.832754 0.0003999
Residual 8 1.5284013 0.4510934 NA NA
Total 10 3.3882147 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.5403462 0.1403006 2.065091 0.0547890
captive.wild 1 1.2177419 0.3161860 4.653958 0.0003999
Residual 8 2.0932581 0.5435134 NA NA
Total 10 3.8513462 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0199115 0.1149893 1.833840 0.1745651
captive.wild 1 0.0663856 0.3833781 6.114085 0.0099980
Residual 8 0.0868626 0.5016327 NA NA
Total 10 0.1731597 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3161539 0.1256498 2.123163 0.0903819
captive.wild 1 1.0087402 0.4009062 6.774295 0.0005999
Residual 8 1.1912563 0.4734440 NA NA
Total 10 2.5161504 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.4842592 0.1828761 3.665359 0.0399920
captive.wild 1 1.1068162 0.4179791 8.377494 0.0029994
Residual 8 1.0569425 0.3991448 NA NA
Total 10 2.6480179 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2722368 0.1227062 2.107111 0.0769846
captive.wild 1 0.9127765 0.4114189 7.064883 0.0003999
Residual 8 1.0335928 0.4658749 NA NA
Total 10 2.2186060 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1677794 0.1635340 3.193585 0.0679864
captive.wild 1 0.4378903 0.4268099 8.334990 0.0041992
Residual 8 0.4202911 0.4096561 NA NA
Total 10 1.0259608 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2037742 0.1309541 2.403306 0.0693861
captive.wild 1 0.6739864 0.4331328 7.948975 0.0003999
Residual 8 0.6783128 0.4359131 NA NA
Total 10 1.5560734 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
log10(read.depth) captive.wild log10(read.depth).adjusted captive.wild.adjusted
Bray-Curtis 0.0371926 0.0003999 0.1487702 0.0015997
Jaccard 0.0547890 0.0003999 0.1643671 0.0015997
Weighted Unifrac 0.1745651 0.0099980 0.1807638 0.0099980
Uneighted Unifrac 0.0903819 0.0005999 0.1807638 0.0015997
Bray-Curtis: Family 0.0399920 0.0029994 0.1599680 0.0059988
Jaccard: Family 0.0769846 0.0003999 0.2039592 0.0015997
Weighted Unifrac: Family 0.0679864 0.0041992 0.2039592 0.0059988
Unweighted Unifrac: Family 0.0693861 0.0003999 0.2039592 0.0015997
5.1.4.2.4 Cosley adults vs wild adults 2019
cloaca.cosley.adultXwild19.perm <- perm.loop("dist_subset(distance.list[[i]],cloaca.cosley.adultXwild19$sample.ID)~log10(read.depth)+captive.wild",cloaca.cosley.adultXwild19,c("log10(read.depth)","captive.wild"),"PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1262621 0.0439245 1.16183 0.2835433
captive.wild 1 1.5528363 0.5402061 14.28878 0.0007998
Residual 11 1.1954272 0.4158694 NA NA
Total 13 2.8745256 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2261601 0.0585835 1.159421 0.2639472
captive.wild 1 1.4886191 0.3856055 7.631481 0.0003999
Residual 11 2.1456922 0.5558109 NA NA
Total 13 3.8604714 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0050725 0.0308591 0.9950402 0.3539292
captive.wild 1 0.1032275 0.6279991 20.2496106 0.0007998
Residual 11 0.0560753 0.3411419 NA NA
Total 13 0.1643752 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1074430 0.0475151 1.280733 0.2277544
captive.wild 1 1.2309881 0.5443862 14.673527 0.0002000
Residual 11 0.9228094 0.4080987 NA NA
Total 13 2.2612404 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0726882 0.0415525 1.273648 0.2657469
captive.wild 1 1.0488440 0.5995751 18.377915 0.0002000
Residual 11 0.6277798 0.3588724 NA NA
Total 13 1.7493121 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0996076 0.0530847 1.27921 0.2381524
captive.wild 1 0.9202512 0.4904369 11.81832 0.0007998
Residual 11 0.8565318 0.4564784 NA NA
Total 13 1.8763906 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0258491 0.0379620 1.34886 0.24995
captive.wild 1 0.4442711 0.6524563 23.18296 0.00020
Residual 11 0.2108007 0.3095817 NA NA
Total 13 0.6809209 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0579197 0.0435361 1.119236 0.2903419
captive.wild 1 0.7032224 0.5285859 13.589025 0.0007998
Residual 11 0.5692422 0.4278780 NA NA
Total 13 1.3303843 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
log10(read.depth) captive.wild log10(read.depth).adjusted captive.wild.adjusted
Bray-Curtis 0.2835433 0.0007998 0.9110178 0.0015997
Jaccard 0.2639472 0.0003999 0.9110178 0.0011998
Weighted Unifrac 0.3539292 0.0007998 0.9110178 0.0015997
Uneighted Unifrac 0.2277544 0.0002000 0.9110178 0.0007998
Bray-Curtis: Family 0.2657469 0.0002000 0.9526095 0.0007998
Jaccard: Family 0.2381524 0.0007998 0.9526095 0.0015997
Weighted Unifrac: Family 0.2499500 0.0002000 0.9526095 0.0007998
Unweighted Unifrac: Family 0.2903419 0.0007998 0.9526095 0.0015997
5.1.4.2.5 Cosley juveniles vs wild adults 2018
cloaca.cosley.juvenileXwild18.perm <- perm.loop("dist_subset(distance.list[[i]],cloaca.cosley.juvenileXwild18$sample.ID)~log10(read.depth)+captive.wild",cloaca.cosley.juvenileXwild18,c("log10(read.depth)","captive.wild"),"PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.5200786 0.1377421 2.938154 0.0379924
captive.wild 1 1.6625870 0.4403337 9.392689 0.0015997
Residual 9 1.5930776 0.4219242 NA NA
Total 11 3.7757433 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.5342978 0.1260081 2.123733 0.0519896
captive.wild 1 1.4416320 0.3399925 5.730216 0.0013997
Residual 9 2.2642581 0.5339995 NA NA
Total 11 4.2401879 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0158595 0.0757849 1.788438 0.1831634
captive.wild 1 0.1136001 0.5428412 12.810448 0.0017996
Residual 9 0.0798099 0.3813739 NA NA
Total 11 0.2092695 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3398836 0.1180108 2.844231 0.0413917
captive.wild 1 1.4647283 0.5085676 12.257215 0.0005999
Residual 9 1.0754935 0.3734216 NA NA
Total 11 2.8801053 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.344235 0.1208204 3.026439 0.0735853
captive.wild 1 1.481228 0.5198848 13.022630 0.0007998
Residual 9 1.023683 0.3592948 NA NA
Total 11 2.849147 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2896774 0.1133679 2.758741 0.045191
captive.wild 1 1.3204897 0.5167856 12.575676 0.000200
Residual 9 0.9450313 0.3698465 NA NA
Total 11 2.5551984 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1218166 0.1040778 2.691588 0.0949810
captive.wild 1 0.6412976 0.5479121 14.169730 0.0013997
Residual 9 0.4073245 0.3480101 NA NA
Total 11 1.1704387 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2218915 0.1208843 3.269756 0.0355929
captive.wild 1 1.0029217 0.5463818 14.778887 0.0002000
Residual 9 0.6107561 0.3327339 NA NA
Total 11 1.8355693 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca (Cosley adult vs wild 2018 adult)
log10(read.depth) captive.wild log10(read.depth).adjusted captive.wild.adjusted
Bray-Curtis 0.0379924 0.0015997 0.1519696 0.0041992
Jaccard 0.0519896 0.0013997 0.1519696 0.0041992
Weighted Unifrac 0.1831634 0.0017996 0.1831634 0.0041992
Uneighted Unifrac 0.0413917 0.0005999 0.1519696 0.0023995
Bray-Curtis: Family 0.0735853 0.0007998 0.1471706 0.0015997
Jaccard: Family 0.0451910 0.0002000 0.1423715 0.0007998
Weighted Unifrac: Family 0.0949810 0.0013997 0.1471706 0.0015997
Unweighted Unifrac: Family 0.0355929 0.0002000 0.1423715 0.0007998
5.1.4.2.6 Cosley juveniles vs wild adults 2019
cloaca.cosley.juvenileXwild19.perm <- perm.loop("dist_subset(distance.list[[i]],cloaca.cosley.juvenileXwild19$sample.ID)~log10(read.depth)+captive.wild",cloaca.cosley.juvenileXwild19,c("log10(read.depth)","captive.wild"),"PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1733209 0.0459655 1.581692 0.1741652
captive.wild 1 2.2824000 0.6053029 20.828725 0.0002000
Residual 12 1.3149533 0.3487316 NA NA
Total 14 3.7706743 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2655895 0.0577122 1.350203 0.1921616
captive.wild 1 1.9759370 0.4293679 10.045263 0.0002000
Residual 12 2.3604403 0.5129199 NA NA
Total 14 4.6019668 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0024234 0.0210445 0.546212 0.6356729
captive.wild 1 0.0594925 0.5166180 13.408852 0.0002000
Residual 12 0.0532417 0.4623375 NA NA
Total 14 0.1151576 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1439633 0.0538705 1.968795 0.1321736
captive.wild 1 1.6509598 0.6177832 22.577993 0.0002000
Residual 12 0.8774703 0.3283462 NA NA
Total 14 2.6723934 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0614453 0.0323280 1.210169 0.2767447
captive.wild 1 1.2299484 0.6471084 24.223899 0.0002000
Residual 12 0.6092901 0.3205636 NA NA
Total 14 1.9006838 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1340428 0.0544502 1.95496 0.1329734
captive.wild 1 1.5049237 0.6113221 21.94870 0.0002000
Residual 12 0.8227860 0.3342278 NA NA
Total 14 2.4617525 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0160633 0.0270506 0.9312626 0.3767247
captive.wild 1 0.3707728 0.6243824 21.4954092 0.0002000
Residual 12 0.2069871 0.3485670 NA NA
Total 14 0.5938232 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1003136 0.0548598 2.230694 0.1069786
captive.wild 1 1.1885966 0.6500226 26.431060 0.0003999
Residual 12 0.5396363 0.2951176 NA NA
Total 14 1.8285465 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca (Cosley adult vs wild 2019 adult)
log10(read.depth) captive.wild log10(read.depth).adjusted captive.wild.adjusted
Bray-Curtis 0.1741652 0.0002000 0.5286943 0.0007998
Jaccard 0.1921616 0.0002000 0.5286943 0.0007998
Weighted Unifrac 0.6356729 0.0002000 0.6356729 0.0007998
Uneighted Unifrac 0.1321736 0.0002000 0.5286943 0.0007998
Bray-Curtis: Family 0.2767447 0.0002000 0.5534893 0.0007998
Jaccard: Family 0.1329734 0.0002000 0.4279144 0.0007998
Weighted Unifrac: Family 0.3767247 0.0002000 0.5534893 0.0007998
Unweighted Unifrac: Family 0.1069786 0.0003999 0.4279144 0.0007998
5.1.4.2.7 Results summary
  • Calculate adjusted p values (sequential Bonferroni) based on all comparisons within ASVs (all 4 dissimilarities X all 6 tests) and within families
asv.adjusted <- p.adjust(c(cloaca.wild.perm[1:4,2],cloaca.cosley.perm[1:4,2],cloaca.cosley.adultXwild18.perm[1:4,2],cloaca.cosley.adultXwild19.perm[1:4,2],cloaca.cosley.juvenileXwild18.perm[1:4,2],cloaca.cosley.juvenileXwild19.perm[1:4,2]),"holm")
asv.adjusted <- cbind(asv.adjusted[1:4],asv.adjusted[5:8],asv.adjusted[9:12],asv.adjusted[13:16],asv.adjusted[17:20],asv.adjusted[21:24])

family.adjusted <-  p.adjust(c(cloaca.wild.perm[5:8,2],cloaca.cosley.perm[5:8,2],cloaca.cosley.adultXwild18.perm[5:8,2],cloaca.cosley.adultXwild19.perm[5:8,2],cloaca.cosley.juvenileXwild18.perm[5:8,2],cloaca.cosley.juvenileXwild19.perm[5:8,2]),"holm")
family.adjusted <- cbind(family.adjusted[1:4],family.adjusted[5:8],family.adjusted[9:12],family.adjusted[13:16],family.adjusted[17:20],family.adjusted[21:24])

adjusted <- rbind.data.frame(asv.adjusted,family.adjusted)

rownames(adjusted) <- rownames(cloaca.wild.perm)

colnames(adjusted) <- c("Wild adults (2018 vs 2019)","Cosley (juvenile vs adult)","Cosley adult vs wild adult 2018","Cosley adult vs wild adult 2019","Cosley juvenile vs wild adult 2018","Cosley juvenile vs wild adult 2019")

kable.wrap(adjusted,caption="Summary Results: Adjusted P values from cloaca PERMANOVAs")
Summary Results: Adjusted P values from cloaca PERMANOVAs
Wild adults (2018 vs 2019) Cosley (juvenile vs adult) Cosley adult vs wild adult 2018 Cosley adult vs wild adult 2019 Cosley juvenile vs wild adult 2018 Cosley juvenile vs wild adult 2019
Bray-Curtis 0.0047990 0.0159968 0.0063987 0.0087982 0.0127974 0.0047990
Jaccard 0.0047990 0.0159968 0.0063987 0.0063987 0.0125975 0.0047990
Weighted Unifrac 0.0127974 0.0129974 0.0159968 0.0087982 0.0127974 0.0047990
Uneighted Unifrac 0.0047990 0.0159968 0.0077984 0.0047990 0.0077984 0.0047990
Bray-Curtis: Family 0.0293941 0.0293941 0.0239952 0.0047990 0.0095981 0.0047990
Jaccard: Family 0.0047990 0.0293941 0.0059988 0.0095981 0.0047990 0.0047990
Weighted Unifrac: Family 0.0293941 0.0293941 0.0293941 0.0047990 0.0125975 0.0047990
Unweighted Unifrac: Family 0.0047990 0.0293941 0.0059988 0.0095981 0.0047990 0.0059988
  • All comparisons are significant after correction for multiple comparisons

6 Plastron

  • Make plastron subset
plastron <- meta[meta$sample.type=="plastron",] #subsets to plastron

6.1 Cosley headstarts

6.1.1 Subset

  • Subset plastron samples to Cosley turtles (all of which were juveniles for plastron samples)
  • Subset to only turtles with at least 3 data points
plastron.cosley <- plastron[plastron$location == "Cosley",] #subset to cosley

plastron.cosley.count <- plyr::count(plastron.cosley$source.ID) #counts number of data points per individual

plastron.cosley.3 <- plastron.cosley[which(plastron.cosley$source.ID %in% plastron.cosley.count[which(plastron.cosley.count$freq>2),1]),] #subset to individuals with at least 3 data points

6.1.2 Heatmaps

  • Ordered by date
  • Note: the ASV graph labeled “OTU” on the Y axis. This is due to a bug in phyloseq that I have been unable to isolate.
phylo.heat(phylo.list[[1]], plastron.cosley.3, ID.col = "sample.ID", order1 = "date", title = "Cosley plastron ordered by date (ASV)", method = "NMDS", taxa.label = "ASV")

  • A slight shift over time is visible at the ASV level. The other levels were not informative and are not shown

6.1.3 Richness (individuals and time)

Comparing Cosley plastron samples over time and among individuals

6.1.3.1 Plot

  • Plot individuals over time
  • Each line is an individual
ggplot(plastron.cosley.3,aes(x=as.Date(date),y=rich,group=source.ID))+
  geom_point(aes(color=source.ID))+
  geom_smooth(method="lm",se=F,aes(color=source.ID))+
  ggtitle("date X richness seperated by individuals (Cosley juveniles)")+
  labs(x="Date",y="Richness",color="Turtle ID")

6.1.3.2 Statistics

Comparing Cosley plastron samples over time and among individuals

  • Runs a model looking at richness over time and by individuals
  • For the random effect, used a combination of date and tub (thus it is nested within date). This was done because preliminary analyses suggested that there was no over-ridding effect of a particular tub across dates, and just including tub resulted in a singular model fit
  • Individuals rotated among tubs during the trial

Run and check model

pl.m <- lmer(rich~date.sc*source.ID+log10(read.depth)+(1|paste(plastron.cosley.3$date,plastron.cosley.3$specific)),data=plastron.cosley.3)
check_model(pl.m,check=c("qq","ncv"))

* Acceptable fit * Run stats

kable.wrap(Anova(pl.m),"Result: Mixed effects ANOVA comparing richness of individuals over time")
Result: Mixed effects ANOVA comparing richness of individuals over time
Chisq Df Pr(>Chisq)
date.sc 0.0461281 1 0.8299431
source.ID 32.6229629 17 0.0125774
log10(read.depth) 6.2470390 1 0.0124401
date.sc:source.ID 14.8905476 17 0.6033652
  • There was a significant effect of read depth (blocking variable).
  • There was a significant difference among individuals
  • There was no overarching effect of date.
  • Based on the plot, some individuals clearly had different trends, but there was no significant interaction, likely due to the small sample sizes.

6.1.4 Richness (growth)

Associations between richness and growth for Cosley plastron samples

  • Growth was calculated proximate to the collection of each swab (change in plastron length per day)
  • Linear regression
ggplot(plastron.cosley.3,aes(x=rich,y=PL.diff.per.day))+
  geom_point()+
  geom_smooth(method="lm",se=T)+
  ggtitle("Plastron richness X plastron growth (Cosley juveniles)")+
  labs(x="Richness",y="Growth")

  • Construct and check model assumptions
  • Individual ID will be included as a random effect
#using PL
gr.pl2 <- lmer(rich~date.sc+PL.diff.per.day+log10(read.depth)+(1|paste(plastron.cosley.3$date,plastron.cosley.3$specific))+(1|source.ID),data=plastron.cosley.3)
check_model(gr.pl2,check=c("qq","ncv"))

  • Acceptable model
  • Run stats
kable.wrap(Anova(gr.pl2),"Result: Correlation between growth rate (PL measured at each point) and richness")
Result: Correlation between growth rate (PL measured at each point) and richness
Chisq Df Pr(>Chisq)
date.sc 0.4405829 1 0.5068412
PL.diff.per.day 1.6924252 1 0.1932816
log10(read.depth) 19.1135466 1 0.0000123
  • Growth was not significant

6.1.5 Richness (size)

Construct and check models looking for associations between size (PL) and richness * Linear regression

ggplot(plastron.cosley.3,aes(x=PL,y=rich))+
  geom_point()+
  geom_smooth(method="lm",se=T)+
  ggtitle("Plastron length X plastron richness (Cosley juveniles)")+
  labs(x="Plastron length",y="richness")

  • Construct and check model assumptions
#using PL
gr.pl3 <- lmer(rich~date.sc+PL+log10(read.depth)+(1|paste(plastron.cosley.3$date,plastron.cosley.3$specific))+(1|source.ID),data=plastron.cosley.3)
check_model(gr.pl3,check=c("qq","ncv"))

kable.wrap(Anova(gr.pl3),"Result: Correlation between size (PL) and richness")
Result: Correlation between size (PL) and richness
Chisq Df Pr(>Chisq)
date.sc 0.9562194 1 0.3281412
PL 1.9324152 1 0.1644944
log10(read.depth) 19.6689296 1 0.0000092
  • PL was not significantly associated with richness

6.1.6 Evenness (individuals and time)

Comparing Cosley plastron samples over time and among individuals

6.1.6.1 Plot

  • Plot individuals over time
ggplot(plastron.cosley.3,aes(x=as.Date(date),y=even,group=source.ID))+
  geom_point(aes(color=source.ID))+
  geom_smooth(method="lm",se=F,aes(color=source.ID))+
  ggtitle("date X evenness seperated by individuals (Cosely juveniles)")+
  labs(x="Date",y="Evenness",color="Turtle ID")

6.1.6.2 Statistics

Comparing evenness of Cosley plastron samples over time and among individuals

  • Runs a model looking at evenness over time and by individuals

  • For the random effect, used a combination of date and tub (thus it is nested within date). This was done because preliminary analyses suggested that there was no over-ridding effect of a particular tub across dates, and just including tub resulted in a singular model fit

  • Run and check model

pl.even.m <- lmer(even~date.sc*source.ID+log10(read.depth)+(1|paste(plastron.cosley.3$date,plastron.cosley.3$specific)),data=plastron.cosley.3)
check_model(pl.even.m,check=c("qq","ncv"))

  • Acceptable fit
  • Run stats
kable.wrap(Anova(pl.even.m),"Result: Mixed effects ANOVA comparing evenness of individuals over time")
Result: Mixed effects ANOVA comparing evenness of individuals over time
Chisq Df Pr(>Chisq)
date.sc 6.438678 1 0.0111662
source.ID 13.738224 17 0.6855377
log10(read.depth) 3.679309 1 0.0550917
date.sc:source.ID 8.145705 17 0.9633311
  • There was a nearly significant effect of read depth (blocking variable).
  • There no significant difference among individuals
  • There was a positive effect of date
  • Based on the plot, some individuals clearly had different trends, but there was no significant interaction, likely due to the small sample sizes.

6.1.7 Evenness (growth)

Associations between evenness and growth for Cosley plastron samples

  • Growth was calculated proximate to the collection of each swab (change in plastron length per day)
  • Linear regression
ggplot(plastron.cosley.3,aes(x=even,y=PL.diff.per.day))+
  geom_point()+
  geom_smooth(method="lm",se=T)+
  ggtitle("Plastron evenness X plastron growth (Cosley juveniles)")+
  labs(x="evenness",y="Growth")

  • Construct and check model assumptions
  • Individual ID will be included as a random effect
#using PL
gr.pl4 <- lmer(even~date.sc+PL.diff.per.day+log10(read.depth)+(1|paste(plastron.cosley.3$date,plastron.cosley.3$specific))+(1|source.ID),data=plastron.cosley.3)
check_model(gr.pl4,check=c("qq","ncv"))

  • Acceptable model
  • Run stats
kable.wrap(Anova(gr.pl4),"Result: Correlation between growth rate (PL measured at each point) and evenness")
Result: Correlation between growth rate (PL measured at each point) and evenness
Chisq Df Pr(>Chisq)
date.sc 7.0457525 1 0.0079453
PL.diff.per.day 0.0001911 1 0.9889701
log10(read.depth) 5.0098370 1 0.0252037
  • Growth was not significant

6.1.8 Evenness (size)

Construct and check models looking for associations between size (PL) and evenness * Linear regression

ggplot(plastron.cosley.3,aes(x=PL,y=even))+
  geom_point()+
  geom_smooth(method="lm",se=T)+
  ggtitle("Plastron length X plastron evenness (Cosley juveniles)")+
  labs(x="Plastron length",y="evenness")

  • Construct and check model assumptions
#using PL
gr.pl5 <- lmer(even~date.sc+PL+log10(read.depth)+(1|paste(plastron.cosley.3$date,plastron.cosley.3$specific))+(1|source.ID),data=plastron.cosley.3)
kable.wrap(Anova(gr.pl5),"Result: Correlation between size (PL) and evenness")
Result: Correlation between size (PL) and evenness
Chisq Df Pr(>Chisq)
date.sc 2.2142181 1 0.1367443
PL 0.0384766 1 0.8444893
log10(read.depth) 4.9957432 1 0.0254097
  • PL was not significantly associated with evenness

6.1.9 Beta-diversity (individuals, time, and growth)

6.1.9.1 PCoA plots

Look for changes in beta diversity over time and by individuals

  • Colored by date
#colored by date
for(i in 1:length(dists)){

  meta.i <- plastron.cosley.3
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
centroids <- aggregate(cbind(x,y)~date,meta.i,mean)
 colnames(centroids)[2:3] <- c("x.cent","y.cent")
 meta.i <- merge(meta.i,centroids,by="date")
  
  plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(fill=date,color=date,size=4,stroke=1))+
    theme_bw()+
    geom_segment(aes(x=x.cent, y=y.cent, xend=x, yend=y, color = date))+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    ggtitle(paste(names(dists)[i],"(Cosely headstart plastron samples)"))
  print(plot)}

  • There is an extremely clear change over time
  • This is particularly pronounced for presence/absence based metrics

6.1.9.2 PERMANOVAs

PERMANOVAs on plastron samples (Cosley juveniles) examining changes over time, differences among individuals, effects of tubs, and associations with growth

  • Run PERMANOVAs
    • Using the following order of terms: read depth, date (continuous), specific (tub), source.ID (individual), growth
temp<- perm.loop("dist_subset(dists[[i]],plastron.cosley.3$sample.ID)~log10(read.depth)+date.sc+ specific+ source.ID + PL.diff.per.day", plastron.cosley.3, c("read.depth","date","specific","source.ID", "PL.diff.per.day"),"PERMANOVA results: Cosley headstart plastrons (PL for growth rate)",distance.list=dists)
Bray-Curtis PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3079202 0.0236566 3.204754 0.0083983
date.sc 1 2.9610468 0.2274889 30.817816 0.0002000
specific 7 1.4868919 0.1142337 2.210741 0.0002000
source.ID 17 3.3276522 0.2556542 2.037256 0.0002000
PL.diff.per.day 1 0.3207619 0.0246432 3.338408 0.0067986
Residual 48 4.6119507 0.3543232 NA NA
Total 75 13.0162237 1.0000000 NA NA
Jaccard PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3946026 0.0198597 2.163747 0.0143971
date.sc 1 3.1250202 0.1572770 17.135597 0.0002000
specific 7 2.2461519 0.1130450 1.759493 0.0002000
source.ID 17 4.9035088 0.2467853 1.581628 0.0002000
PL.diff.per.day 1 0.4464826 0.0224707 2.448223 0.0077984
Residual 48 8.7537636 0.4405622 NA NA
Total 75 19.8695297 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0059616 0.0203424 1.857077 0.1127774
date.sc 1 0.0307610 0.1049644 9.582295 0.0002000
specific 7 0.0284863 0.0972025 1.267671 0.1725655
source.ID 17 0.0686748 0.2343362 1.258397 0.1087782
PL.diff.per.day 1 0.0050883 0.0173625 1.585041 0.1631674
Residual 48 0.1540892 0.5257920 NA NA
Total 75 0.2930612 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2200893 0.0230961 2.447964 0.0017996
date.sc 1 1.2665965 0.1329162 14.087842 0.0002000
specific 7 1.1147296 0.1169793 1.771241 0.0002000
source.ID 17 2.3037467 0.2417543 1.507273 0.0002000
PL.diff.per.day 1 0.3085879 0.0323831 3.432299 0.0002000
Residual 48 4.3155390 0.4528710 NA NA
Total 75 9.5292890 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0792960 0.0191240 1.831787 0.0919816
date.sc 1 0.4628674 0.1116309 10.692527 0.0002000
specific 7 0.4409984 0.1063567 1.455334 0.0363927
source.ID 17 0.9687179 0.2336282 1.316352 0.0379924
PL.diff.per.day 1 0.1166634 0.0281360 2.694998 0.0181964
Residual 48 2.0778659 0.5011242 NA NA
Total 75 4.1464091 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1401162 0.0239614 2.453783 0.0015997
date.sc 1 0.7478237 0.1278862 13.096253 0.0002000
specific 7 0.6469557 0.1106367 1.618543 0.0002000
source.ID 17 1.4168398 0.2422955 1.459552 0.0002000
PL.diff.per.day 1 0.1549323 0.0264952 2.713251 0.0009998
Residual 48 2.7409014 0.4687249 NA NA
Total 75 5.8475692 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0423899 0.0212744 1.931848 0.0777844
date.sc 1 0.1962103 0.0984728 8.941950 0.0002000
specific 7 0.1911927 0.0959546 1.244754 0.1605679
source.ID 17 0.4703895 0.2360762 1.261012 0.0863827
PL.diff.per.day 1 0.0391020 0.0196243 1.782006 0.1051790
Residual 48 1.0532485 0.5285978 NA NA
Total 75 1.9925329 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0679414 0.0167503 1.728536 0.0509898
date.sc 1 0.5120625 0.1262442 13.027670 0.0002000
specific 7 0.4574003 0.1127677 1.662425 0.0005999
source.ID 17 1.0099509 0.2489939 1.511455 0.0002000
PL.diff.per.day 1 0.1220961 0.0301016 3.106316 0.0003999
Residual 48 1.8866766 0.4651423 NA NA
Total 75 4.0561278 1.0000000 NA NA
Summary (p values): PERMANOVA results: Cosley headstart plastrons (PL for growth rate)
read.depth date specific source.ID PL.diff.per.day read.depth.adjusted date.adjusted specific.adjusted source.ID.adjusted PL.diff.per.day.adjusted
Bray-Curtis 0.0083983 2e-04 0.0002000 0.0002000 0.0067986 0.0251950 0.0007998 0.0007998 0.0007998 0.0203959
Jaccard 0.0143971 2e-04 0.0002000 0.0002000 0.0077984 0.0287942 0.0007998 0.0007998 0.0007998 0.0203959
Weighted Unifrac 0.1127774 2e-04 0.1725655 0.1087782 0.1631674 0.1127774 0.0007998 0.1725655 0.1087782 0.1631674
Uneighted Unifrac 0.0017996 2e-04 0.0002000 0.0002000 0.0002000 0.0071986 0.0007998 0.0007998 0.0007998 0.0007998
Bray-Curtis: Family 0.0919816 2e-04 0.0363927 0.0379924 0.0181964 0.1555689 0.0007998 0.0727854 0.0759848 0.0363927
Jaccard: Family 0.0015997 2e-04 0.0002000 0.0002000 0.0009998 0.0063987 0.0007998 0.0007998 0.0007998 0.0029994
Weighted Unifrac: Family 0.0777844 2e-04 0.1605679 0.0863827 0.1051790 0.1555689 0.0007998 0.1605679 0.0863827 0.1051790
Unweighted Unifrac: Family 0.0509898 2e-04 0.0005999 0.0002000 0.0003999 0.1529694 0.0007998 0.0017996 0.0007998 0.0015997
  • Strongly significant effect of date
  • Usually an effect of individual (i.e., each individual has a distinct microbiome)
  • Usually an effect of tub
  • Often a significant effect of growth

PERMANOVAs on size

  • Look for associations between beta diversity and size
  • Only the size variable is of interest here (the rest are redundant with the previous model and are included as blocking variables)
temp<- perm.loop("dist_subset(dists[[i]],plastron.cosley.3$sample.ID)~log10(read.depth)+date.sc+ specific+ source.ID + PL", plastron.cosley.3, c("read.depth","date","specific","source.ID", "PL"),"PERMANOVA results: Cosley headstart plastron (size based on PL)",distance.list=dists)
Bray-Curtis PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3079202 0.0236566 3.110374 0.0093981
date.sc 1 2.9610468 0.2274889 29.910228 0.0002000
specific 7 1.4868919 0.1142337 2.145635 0.0003999
source.ID 17 3.3276522 0.2556542 1.977259 0.0002000
PL 1 0.1808180 0.0138917 1.826485 0.0793841
Residual 48 4.7518946 0.3650747 NA NA
Total 75 13.0162237 1.0000000 NA NA
Jaccard PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3946026 0.0198597 2.121653 0.0169966
date.sc 1 3.1250202 0.1572770 16.802239 0.0002000
specific 7 2.2461519 0.1130450 1.725263 0.0002000
source.ID 17 4.9035088 0.2467853 1.550859 0.0002000
PL 1 0.2728070 0.0137299 1.466796 0.1025795
Residual 48 8.9274392 0.4493030 NA NA
Total 75 19.8695297 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0059616 0.0203424 1.843319 0.1151770
date.sc 1 0.0307610 0.1049644 9.511309 0.0002000
specific 7 0.0284863 0.0972025 1.258280 0.1797640
source.ID 17 0.0686748 0.2343362 1.249075 0.1165767
PL 1 0.0039383 0.0134384 1.217714 0.2769446
Residual 48 0.1552393 0.5297161 NA NA
Total 75 0.2930612 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2200893 0.0230961 2.350375 0.0025995
date.sc 1 1.2665965 0.1329162 13.526226 0.0002000
specific 7 1.1147296 0.1169793 1.700630 0.0002000
source.ID 17 2.3037467 0.2417543 1.447185 0.0002000
PL 1 0.1294045 0.0135797 1.381936 0.1121776
Residual 48 4.4947224 0.4716745 NA NA
Total 75 9.5292890 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0792960 0.0191240 1.807536 0.0965807
date.sc 1 0.4628674 0.1116309 10.550971 0.0002000
specific 7 0.4409984 0.1063567 1.436067 0.0409918
source.ID 17 0.9687179 0.2336282 1.298925 0.0461908
PL 1 0.0887861 0.0214128 2.023861 0.0589882
Residual 48 2.1057432 0.5078474 NA NA
Total 75 4.1464091 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1401162 0.0239614 2.383880 0.0029994
date.sc 1 0.7478237 0.1278862 12.723170 0.0002000
specific 7 0.6469557 0.1106367 1.572435 0.0002000
source.ID 17 1.4168398 0.2422955 1.417973 0.0003999
PL 1 0.0745608 0.0127507 1.268547 0.1923615
Residual 48 2.8212730 0.4824694 NA NA
Total 75 5.8475692 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0423899 0.0212744 1.921518 0.0807838
date.sc 1 0.1962103 0.0984728 8.894140 0.0002000
specific 7 0.1911927 0.0959546 1.238099 0.1649670
source.ID 17 0.4703895 0.2360762 1.254269 0.0873825
PL 1 0.0334403 0.0167828 1.515835 0.1699660
Residual 48 1.0589102 0.5314392 NA NA
Total 75 1.9925329 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Cosley headstart plastron (size based on PL)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0679414 0.0167503 1.653935 0.0639872
date.sc 1 0.5120625 0.1262442 12.465416 0.0002000
specific 7 0.4574003 0.1127677 1.590678 0.0023995
source.ID 17 1.0099509 0.2489939 1.446222 0.0003999
PL 1 0.0369973 0.0091213 0.900645 0.5514897
Residual 48 1.9717754 0.4861226 NA NA
Total 75 4.0561278 1.0000000 NA NA
Summary (p values): PERMANOVA results: Cosley headstart plastron (size based on PL)
read.depth date specific source.ID PL read.depth.adjusted date.adjusted specific.adjusted source.ID.adjusted PL.adjusted
Bray-Curtis 0.0093981 2e-04 0.0003999 0.0002000 0.0793841 0.0281944 0.0007998 0.0007998 0.0007998 0.3175365
Jaccard 0.0169966 2e-04 0.0002000 0.0002000 0.1025795 0.0339932 0.0007998 0.0007998 0.0007998 0.3175365
Weighted Unifrac 0.1151770 2e-04 0.1797640 0.1165767 0.2769446 0.1151770 0.0007998 0.1797640 0.1165767 0.3175365
Uneighted Unifrac 0.0025995 2e-04 0.0002000 0.0002000 0.1121776 0.0103979 0.0007998 0.0007998 0.0007998 0.3175365
Bray-Curtis: Family 0.0965807 2e-04 0.0409918 0.0461908 0.0589882 0.1919616 0.0007998 0.0819836 0.0923815 0.2359528
Jaccard: Family 0.0029994 2e-04 0.0002000 0.0003999 0.1923615 0.0119976 0.0007998 0.0007998 0.0015997 0.5098980
Weighted Unifrac: Family 0.0807838 2e-04 0.1649670 0.0873825 0.1699660 0.1919616 0.0007998 0.1649670 0.0923815 0.5098980
Unweighted Unifrac: Family 0.0639872 2e-04 0.0023995 0.0003999 0.5514897 0.1919616 0.0007998 0.0071986 0.0015997 0.5514897

6.2 Shedd

Head start plastrons

  • Make Shedd plastron subset
plastron.shedd <- plastron[plastron$location=="Shedd",]

6.2.1 Richness

compare individuals, dates and tubs (Shedd plastron)

  • Plot
ggplot(plastron.shedd,aes(x=specific,y=rich,fill=date))+
  geom_boxplot()+
  ggtitle("Shedd plastron richness over time")

  • Make and check model
plsrm1 <- lm(rich~log10(read.depth)+source.ID+specific*date,data=plastron.shedd)
check_model(plsrm1,check=c("qq","ncv"))

  • Reasonable fit
kable.wrap(Anova(plsrm1),"ANOVA results: Shedd headstart plastron richness")
ANOVA results: Shedd headstart plastron richness
Sum Sq Df F value Pr(>F)
log10(read.depth) 2532.6559 1 3.7006597 0.1027363
source.ID 5526.2152 9 0.8971979 0.5760098
specific 1662.8127 1 2.4296644 0.1700718
date 973.1022 1 1.4218750 0.2781103
specific:date 904.1887 1 1.3211801 0.2941268
Residuals 4106.2774 6 NA NA
  • No significant effects, but there were only two dates and the sample sizes were so small that this is questionable

6.2.2 Evenness

compare individuals, dates and tubs (Shedd plastron)

  • Plot
ggplot(plastron.shedd,aes(x=specific,y=even,fill=date))+
  geom_boxplot()+
  ggtitle("Shedd plastron evenness over time")

  • Make and check model
plsrm1 <- lm(even~log10(read.depth)+source.ID+specific*date,data=plastron.shedd)
check_model(plsrm1,check=c("qq","ncv"))

  • Reasonable fit
kable.wrap(Anova(plsrm1),"ANOVA results: Shedd headstart plastron evenness")
ANOVA results: Shedd headstart plastron evenness
Sum Sq Df F value Pr(>F)
log10(read.depth) 0.0045576 1 3.5603910 0.1081158
source.ID 0.0063206 9 0.5486276 0.7992942
specific 0.0011629 1 0.9084985 0.3773170
date 0.0057512 1 4.4928449 0.0783231
specific:date 0.0006915 1 0.5401814 0.4900757
Residuals 0.0076805 6 NA NA
  • Nearly significant effect of date. Again, sample sizes are very small

6.2.3 Beta diversity

compare individuals, dates, and tubs (Shedd plastron)

6.2.3.1 PCoAs

for(i in 1:length(dists)){

  meta.i <- plastron.shedd
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
  
  plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(fill=specific,color=date,shape=specific,size=4,stroke=1))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    ggtitle(paste(names(dists)[i],"(Shedd headstart plastron)"))
  print(plot)}

6.2.3.2 Statistics

compare individuals, dates, and tubs (Shedd plastron)

  • Run PERMANOVA
temp<- perm.loop("dist_subset(dists[[i]],plastron.shedd$sample.ID)~log10(read.depth)+date + specific + source.ID", plastron.shedd, c("read.depth","date","specific","source.ID"),"PERMANOVA results: Shedd headstart plastron",distance.list=dists)
Bray-Curtis PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1282206 0.0434477 1.323989 0.2227554
date 1 0.7302927 0.2474606 7.540903 0.0002000
specific 1 0.4113658 0.1393919 4.247708 0.0009998
source.ID 9 1.0033584 0.3399893 1.151171 0.2739452
Residual 7 0.6779093 0.2297105 NA NA
Total 19 2.9511468 1.0000000 NA NA
Jaccard PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2122283 0.0455850 1.233045 0.2435513
date 1 0.9064295 0.1946940 5.266348 0.0002000
specific 1 0.5540698 0.1190099 3.219142 0.0005999
source.ID 9 1.7781137 0.3819250 1.147870 0.2259548
Residual 7 1.2048209 0.2587861 NA NA
Total 19 4.6556623 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0040543 0.0391105 1.497978 0.2219556
date 1 0.0343991 0.3318356 12.709692 0.0007998
specific 1 0.0123342 0.1189835 4.557208 0.0219956
source.ID 9 0.0339298 0.3273083 1.392921 0.2565487
Residual 7 0.0189457 0.1827621 NA NA
Total 19 0.1036632 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.1085814 0.0471090 1.104980 0.3371326
date 1 0.4073180 0.1767187 4.145078 0.0002000
specific 1 0.1820305 0.0789756 1.852436 0.0317936
source.ID 9 0.9191063 0.3987629 1.039256 0.4197161
Residual 7 0.6878582 0.2984337 NA NA
Total 19 2.3048943 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0467624 0.0416122 1.284650 0.2827435
date 1 0.2642094 0.2351108 7.258323 0.0002000
specific 1 0.1296768 0.1153949 3.562463 0.0083983
source.ID 9 0.4283109 0.3811389 1.307388 0.1911618
Residual 7 0.2548063 0.2267432 NA NA
Total 19 1.1237659 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0666548 0.0411796 0.9444442 0.5088982
date 1 0.2591262 0.1600891 3.6716048 0.0002000
specific 1 0.1440179 0.0889748 2.0406151 0.0213957
source.ID 9 0.6548084 0.4045429 1.0308995 0.4361128
Residual 7 0.4940301 0.3052136 NA NA
Total 19 1.6186375 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0285938 0.0532447 1.688133 0.1785643
date 1 0.1604666 0.2988063 9.473713 0.0002000
specific 1 0.0552969 0.1029688 3.264647 0.0287942
source.ID 9 0.1741016 0.3241961 1.142078 0.3641272
Residual 7 0.1185666 0.2207840 NA NA
Total 19 0.5370255 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Shedd headstart plastron
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0347466 0.0318631 0.7119713 0.7666467
date 1 0.1305242 0.1196922 2.6744873 0.0021996
specific 1 0.0980304 0.0898950 2.0086780 0.0229954
source.ID 9 0.4855733 0.4452764 1.1055078 0.2883423
Residual 7 0.3416241 0.3132733 NA NA
Total 19 1.0904985 1.0000000 NA NA
Summary (p values): PERMANOVA results: Shedd headstart plastron
read.depth date specific source.ID read.depth.adjusted date.adjusted specific.adjusted source.ID.adjusted
Bray-Curtis 0.2227554 0.0002000 0.0009998 0.2739452 0.8878224 0.0007998 0.0029994 0.9038192
Jaccard 0.2435513 0.0002000 0.0005999 0.2259548 0.8878224 0.0007998 0.0023995 0.9038192
Weighted Unifrac 0.2219556 0.0007998 0.0219956 0.2565487 0.8878224 0.0007998 0.0439912 0.9038192
Uneighted Unifrac 0.3371326 0.0002000 0.0317936 0.4197161 0.8878224 0.0007998 0.0439912 0.9038192
Bray-Curtis: Family 0.2827435 0.0002000 0.0083983 0.1911618 0.8482304 0.0007998 0.0335933 0.7646471
Jaccard: Family 0.5088982 0.0002000 0.0213957 0.4361128 1.0000000 0.0007998 0.0641872 0.8650270
Weighted Unifrac: Family 0.1785643 0.0002000 0.0287942 0.3641272 0.7142571 0.0007998 0.0641872 0.8650270
Unweighted Unifrac: Family 0.7666467 0.0021996 0.0229954 0.2883423 1.0000000 0.0021996 0.0641872 0.8650270
  • Date is strongly significant
  • Tub is significant for ASVs, but generally slightly non significant for families

6.3 Shedd vs Cosley vs wild

Comparing plastron samples for wild and captive turtles (and years)

6.3.1 Overview

  • Comparative analyses of plastron data are challenging because data are available from disparate groups:

    • Wild adults
      • 2018
      • 2019
    • Captive
      • Cosley (multiple dates, tubs, and individuals)
      • Shedd (two tubs, two dates, multiple individuals)
  • These groupings are not ideal, but they can be analyzed following several steps

    • First, in the captive animals, in both cases, data will be limited to the last day
      • This is the closest date to release
      • All individuals on the last day will be included, not just the ones that had been used previously in plastron.cosely.3
    • Second, a single variable (specific.years) will be nested in site
      • This has the two years for wild and tubs for captive turtles
      • It’s not ideal, but should suitably partition the variance for PERMANOVAs
    • We will only look statistically at beta diversity, because these issues in model design make it difficult to trust the linear models, and the samples sizes would be too small in most subsets to justify testing them independently, particularly given the large number of pairwise comparisons that would be required.
  • Will additionally compare years for wild turtles

  • Note that the captive vs wild comparison is entirely confounded by the age difference (juvenile vs adult)

Make subsets

plastron.wild <- plastron[plastron$captive.wild == "wild",] #subset to wild plastron samples


plastron.shedd.last <- plastron.shedd[plastron.shedd$date=="2020-04-08",] #subset to last day for shedd


plastron.cosley.last <- plastron.cosley[plastron.cosley$date == "2019-08-08",] #subset to last day for cosley


plastron.sites <- rbind.data.frame(plastron.wild,plastron.shedd.last,plastron.cosley.last ) #combines into single data set

6.3.2 Heatmaps

  • Note: the ASV and Family graphs are labeled “OTU” on the Y axis. This is due to a bug in phyloseq that I have been unable to isolate.
phylo.heat.loop(phylo.list, data = plastron.sites , ID.col = "sample.ID", order1 = "captive.wild", order2 = "location.general", order3="year",title = "Plastron by location and year",method="NMDS",taxa.label=label.list)

  • All sites and years are clearly different until class (especially wild 2018)
  • The wild 2018 difference remains even at the Phylum level

6.3.3 Richness

Comparing plastron samples for wild and captive turtles (and years)

  • Boxplot
ggplot(plastron.sites,aes(x=location.general,y=rich,fill=specific.year))+
  geom_boxplot()+
  ggtitle("Plastron richness among sites and tubs or years")

6.3.4 Evenness

Comparing plastron samples for wild and captive turtles (and years)

  • Boxplot
ggplot(plastron.sites,aes(x=location.general,y=even,fill=specific.year))+
  geom_boxplot()+
  ggtitle("Plastron evenness among sites")

6.3.5 Beta diversity

Comparing plastron samples for wild and captive turtles (and years)

6.3.5.1 All sites

6.3.5.1.1 PCoAs and boxplots
for(i in 1:length(dists)){

  meta.i <- plastron.sites
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
  
  plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(fill=location.general,color=location.general,shape=specific.year,size=4,stroke=1))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    ggtitle(paste(names(dists)[i],"(plastron among sites)"))
  print(plot)}

6.3.5.1.2 PERMANOVAs
  • Run PERMANOVAs
temp<- perm.loop("dist_subset(dists[[i]],plastron.sites$sample.ID)~log10(read.depth)+location.general/specific.year", plastron.sites, c("read.depth","location.general"),"PERMANOVA results: Plastron samples among sites",distance.list=dists)
Bray-Curtis PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.960558 0.1531792 13.401906 2e-04
location.general 2 4.574201 0.3573842 15.634071 2e-04
location.general:specific.year 3 1.583089 0.1236874 3.607205 2e-04
Residual 32 4.681264 0.3657491 NA NA
Total 38 12.799113 1.0000000 NA NA
Jaccard PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.623674 0.1101269 7.415774 2e-04
location.general 2 4.151652 0.2815887 9.480877 2e-04
location.general:specific.year 3 1.961986 0.1330731 2.986979 2e-04
Residual 32 7.006359 0.4752113 NA NA
Total 38 14.743670 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0156323 0.0349713 2.355665 0.0917816
location.general 2 0.1911461 0.4276157 14.402077 0.0002000
location.general:specific.year 3 0.0278720 0.0623529 1.400028 0.2257548
Residual 32 0.2123539 0.4750601 NA NA
Total 38 0.4470043 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.246172 0.1323570 10.246079 2e-04
location.general 2 3.233958 0.3434814 13.294865 2e-04
location.general:specific.year 3 1.043126 0.1107912 2.858873 2e-04
Residual 32 3.891979 0.4133703 NA NA
Total 38 9.415235 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.6890191 0.1468618 11.534287 2e-04
location.general 2 1.3879652 0.2958396 11.617377 2e-04
location.general:specific.year 3 0.7030586 0.1498543 3.923104 2e-04
Residual 32 1.9115713 0.4074443 NA NA
Total 38 4.6916142 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.2706904 0.1536163 12.669415 2e-04
location.general 2 2.9178535 0.3527451 14.546225 2e-04
location.general:specific.year 3 0.8738336 0.1056395 2.904185 2e-04
Residual 32 3.2094689 0.3879991 NA NA
Total 38 8.2718464 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2418437 0.1102292 8.33895 0.0002000
location.general 2 0.7896790 0.3599253 13.61436 0.0002000
location.general:specific.year 3 0.2344309 0.1068505 2.69445 0.0043991
Residual 32 0.9280542 0.4229950 NA NA
Total 38 2.1940077 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Plastron samples among sites
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.9635365 0.1652172 13.111604 0.0002000
location.general 2 1.9298226 0.3309059 13.130312 0.0002000
location.general:specific.year 3 0.5869850 0.1006501 2.662523 0.0013997
Residual 32 2.3515939 0.4032268 NA NA
Total 38 5.8319381 1.0000000 NA NA
Summary (p values): PERMANOVA results: Plastron samples among sites
read.depth location.general read.depth.adjusted location.general.adjusted
Bray-Curtis 0.0002000 2e-04 0.0007998 0.0007998
Jaccard 0.0002000 2e-04 0.0007998 0.0007998
Weighted Unifrac 0.0917816 2e-04 0.0917816 0.0007998
Uneighted Unifrac 0.0002000 2e-04 0.0007998 0.0007998
Bray-Curtis: Family 0.0002000 2e-04 0.0007998 0.0007998
Jaccard: Family 0.0002000 2e-04 0.0007998 0.0007998
Weighted Unifrac: Family 0.0002000 2e-04 0.0007998 0.0007998
Unweighted Unifrac: Family 0.0002000 2e-04 0.0007998 0.0007998
  • All site comparisons were significant

6.3.5.2 Wild (years)

Comparing plastron samples for wild samples across years

  • Run PERMANOVAs (see previous section for PCoAs)
temp<- perm.loop("dist_subset(dists[[i]],plastron.sites$sample.ID)~log10(read.depth)+as.factor(year)", plastron.sites, c("read.depth","year"),"PERMANOVA results: Wild plastron samples across years",distance.list=dists)
Bray-Curtis PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.960558 0.1531792 8.554810 2e-04
as.factor(year) 2 2.817390 0.2201239 6.146779 2e-04
Residual 35 8.021164 0.6266969 NA NA
Total 38 12.799113 1.0000000 NA NA
Jaccard PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.623674 0.1101269 5.466903 2e-04
as.factor(year) 2 2.724970 0.1848230 4.587480 2e-04
Residual 35 10.395026 0.7050501 NA NA
Total 38 14.743670 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0156323 0.0349713 1.441152 0.2129574
as.factor(year) 2 0.0517233 0.1157109 2.384196 0.0647870
Residual 35 0.3796487 0.8493178 NA NA
Total 38 0.4470043 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.246172 0.1323570 7.571771 2e-04
as.factor(year) 2 2.408714 0.2558315 7.317698 2e-04
Residual 35 5.760349 0.6118115 NA NA
Total 38 9.415235 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.6890191 0.1468618 7.760614 2e-04
as.factor(year) 2 0.8951517 0.1907982 5.041171 2e-04
Residual 35 3.1074434 0.6623399 NA NA
Total 38 4.6916142 1.0000000 NA NA
Jaccard: Family PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 1.270690 0.1536163 9.164265 2e-04
as.factor(year) 2 2.148157 0.2596950 7.746293 2e-04
Residual 35 4.852999 0.5866887 NA NA
Total 38 8.271846 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2418437 0.1102292 5.484866 0.0023995
as.factor(year) 2 0.4089124 0.1863769 4.636941 0.0002000
Residual 35 1.5432516 0.7033939 NA NA
Total 38 2.1940077 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: Wild plastron samples across years
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.9635365 0.1652172 9.837198 2e-04
as.factor(year) 2 1.4402122 0.2469526 7.351903 2e-04
Residual 35 3.4281893 0.5878302 NA NA
Total 38 5.8319381 1.0000000 NA NA
Summary (p values): PERMANOVA results: Wild plastron samples across years
read.depth year read.depth.adjusted year.adjusted
Bray-Curtis 0.0002000 0.000200 0.0007998 0.0007998
Jaccard 0.0002000 0.000200 0.0007998 0.0007998
Weighted Unifrac 0.2129574 0.064787 0.2129574 0.0647870
Uneighted Unifrac 0.0002000 0.000200 0.0007998 0.0007998
Bray-Curtis: Family 0.0002000 0.000200 0.0007998 0.0007998
Jaccard: Family 0.0002000 0.000200 0.0007998 0.0007998
Weighted Unifrac: Family 0.0023995 0.000200 0.0023995 0.0007998
Unweighted Unifrac: Family 0.0002000 0.000200 0.0007998 0.0007998
  • Years were significantly different in all cases except for weighted unifrac for ASVs

7 Cloaca X plastron

7.1 Correlations

7.1.1 Subset

  • Subsets data to paired cloaca and plastron samples (wild turtles only)
  • Reorganizes data into a paired format for tests
cloaca.plastron <- cloaca[which(cloaca$captive.wild == "wild"),] #subsets to cloacal samples that also have a platron sample

cloaca.plastron <- meta[which(paste(meta$source.ID, meta$date) %in% paste(cloaca.plastron$source.ID , cloaca.plastron$date)),] #uses cloaca.plastron to subset the metadata to both the cloacal and plastron samples for individuals that have both

cloaca.plastron  <- cloaca.plastron[order(cloaca.plastron$source.ID),]


#reformat data into a paired layout for plots and correlation tests
cloaca.cor <- cloaca.plastron[cloaca.plastron$sample.type == "cloaca",c("source.ID","location.general","year","captive.wild","rich","even","read.depth")]
colnames(cloaca.cor)[5:7] <- gsub(" ",".",paste("c",colnames(cloaca.cor)[5:7]))


plastron.cor <- cloaca.plastron[cloaca.plastron$sample.type == "plastron",c("rich","even","read.depth")]
colnames(plastron.cor)[1:3] <- gsub(" ",".",paste("p",colnames(plastron.cor)[1:3]))


cloaca.plastron.cor <- cbind.data.frame(cloaca.cor,plastron.cor)

7.1.2 Richness

Look for correlations between plastron and cloacal richness

  • Plot
ggplot(cloaca.plastron.cor,aes(x=p.rich,y=c.rich,color=location.general,fill=location.general))+
  geom_point()+
  geom_smooth(method="lm")+
  ggtitle("Cloaca richness X plastron richness")

  • Construct and check model
cpm1 <- lm(p.rich~c.rich*as.factor(year),data=cloaca.plastron.cor)
check_model(cpm1,check=c("qq","ncv"))

  • Good fit
kable.wrap(Anova(cpm1),"Result: ANOVA looking for associations between cloacal and plastron richness")
Result: ANOVA looking for associations between cloacal and plastron richness
Sum Sq Df F value Pr(>F)
c.rich 1545.87246 1 0.9127142 0.3568242
as.factor(year) 83.31825 1 0.0491928 0.8279200
c.rich:as.factor(year) 89.50823 1 0.0528475 0.8217582
Residuals 22018.21931 13 NA NA
  • No significant association (as expected)

7.1.3 Evenness

Look for correlations between plastron and cloacal evenness

  • Plot
ggplot(cloaca.plastron.cor,aes(x=p.even,y=c.even,color=location.general,fill=location.general))+
  geom_point()+
  geom_smooth(method="lm")+
  ggtitle("Cloaca evenness X plastron evenness")

  • Construct and check model
cpm2 <- lm(p.even~c.even*as.factor(year),data=cloaca.plastron.cor)
check_model(cpm2,check=c("qq","ncv"))

  • Acceptable fit
kable.wrap(Anova(cpm2),"Result: ANOVA looking for associations between cloacal and plastron evenness")
Result: ANOVA looking for associations between cloacal and plastron evenness
Sum Sq Df F value Pr(>F)
c.even 0.0057114 1 1.2081947 0.2916297
as.factor(year) 0.0021439 1 0.4535303 0.5124589
c.even:as.factor(year) 0.0000353 1 0.0074679 0.9324519
Residuals 0.0614536 13 NA NA
  • No significant association

7.1.4 Beta diversity

Look for associations between plastron and cloacal beta diversity

7.1.4.1 Plots

  • Make scatter plots of dissimilarities that will be used in Mantel testing
    • For each plot, only the lower left hand of the dist object will be used
for(i in 1:length(dists)){
  mantel.plot.i <- cbind.data.frame(split.dist(as.matrix(dist_subset(dists[[i]],cloaca.plastron[cloaca.plastron$sample.type=="cloaca","sample.ID"])))[,1],
       split.dist(as.matrix(dist_subset(dists[[i]],cloaca.plastron[cloaca.plastron$sample.type=="plastron","sample.ID"])))  )
  colnames(mantel.plot.i)[1:2] <- c("cloaca","plastron")
  meta.i <- cloaca.plastron
  colnames(meta.i)[which(colnames(meta.i)=="sample.ID")] <- "ID1"
  mantel.plot.i <- merge(mantel.plot.i,meta.i,by="ID1",all.x=T,all.y=F)
  colnames(meta.i)[which(colnames(meta.i)=="ID1")] <- "ID2"
  mantel.plot.i <- merge(mantel.plot.i,meta.i,by="ID2",all.x=T,all.y=F)
  mantel.plot.i$color <- paste(mantel.plot.i$location.general.x,mantel.plot.i$year.x,mantel.plot.i$location.general.y,mantel.plot.i$year.y)
  
  m.plot.i <- ggplot(mantel.plot.i,aes(x=cloaca,y=plastron,fill=color,color=color))+
    geom_point()+
    geom_smooth(method="lm")+
    ggtitle(paste(names(dists)[i],"(correlation of plastron and cloaca dissimilarities)"))
  print(m.plot.i )
  }

7.1.4.2 Mantel

  • Run test using all paired cloaca and plastron samples, with permutations constrained by location X year combinations
set.seed(1234)
mantel.res <- vector("list",length(dists))

for(i in 1:length(dists)){
  
  m.res.i <- mantel(dist_subset(dists[[i]],cloaca.plastron[cloaca.plastron$sample.type=="cloaca","sample.ID"]),
       dist_subset(dists[[i]],cloaca.plastron[cloaca.plastron$sample.type=="plastron","sample.ID"]),
       permutations=5000,strata=cloaca.plastron.cor$year)
  mantel.res[[i]] <- c(m.res.i$statistic,m.res.i$signif)}
mantel.res <- do.call("rbind.data.frame",mantel.res)
colnames(mantel.res) <- c("mantel.r","p")
mantel.res$p.adjusted <- c(p.adjust(mantel.res$p[1:4],"holm"),p.adjust(mantel.res$p[5:8],"holm"))
rownames(mantel.res) <- names(dists)
kable.wrap(mantel.res,"Results: Mantel tests with permutations constrained by year")
Results: Mantel tests with permutations constrained by year
mantel.r p p.adjusted
Bray-Curtis 0.5096190 0.0073985 0.0221956
Jaccard 0.4539915 0.0085983 0.0221956
Weighted Unifrac -0.1118988 0.5944811 0.5944811
Uneighted Unifrac 0.3314133 0.0009998 0.0039992
Bray-Curtis: Family 0.1493540 0.0295941 0.1183763
Jaccard: Family 0.2470407 0.0463907 0.1373725
Weighted Unifrac: Family 0.1167151 0.0457908 0.1373725
Unweighted Unifrac: Family 0.0912156 0.2735453 0.2735453
  • Usually a significant association in the ASVs, but not by families

7.2 Comparisons of mean values

Comparing cloaca and plastron samples

  • With the exception of the heatmaps, comparisons will only be made for wild individuals because including the captive juveniles results in models that are more complex than can be fit with the current data

7.2.1 Heatmaps

  • Note: the ASV and Family graphs are labeled “OTU” on the Y axis. This is due to a bug in phyloseq that I have been unable to isolate.
phylo.heat.loop(phylo.list, data = cloaca.plastron , ID.col = "sample.ID", order2 = "sample.type", order1 = "location.general", order3="age", order4 = "year",title = "Cloaca vs plastron",method="NMDS",taxa.label=label.list)

7.2.2 Richness

Comparing cloaca and plastron samples for wild turtles

7.2.2.1 Boxplot

ggplot(cloaca.plastron,aes(y=rich,x=sample.type, fill = paste(year)))+
  geom_boxplot()+
  ggtitle("Cloaca vs plastron richnness (wild)")

7.2.2.2 Statistics

  • Run and check model
cxprm1 <- lm(rich~sample.type*as.factor(year)+log10(read.depth)+source.ID,data = cloaca.plastron)
check_model(cxprm1,check=c("qq","ncv"))

  • Good fit
  • Run ANOVA
kable.wrap(Anova(cxprm1),"Result: ANOVA comparing cloaca and plastron richness")
Result: ANOVA comparing cloaca and plastron richness
Sum Sq Df F value Pr(>F)
sample.type 10463.841 1 15.4264575 0.0012017
as.factor(year) 4020.062 1 5.9266305 0.0269984
log10(read.depth) 306.887 1 0.4524322 0.5107804
source.ID 8893.203 13 1.0085326 0.4863598
sample.type:as.factor(year) 5703.824 1 8.4089395 0.0104450
Residuals 10852.877 16 NA NA
  • Year, sample type, and the interaction were significant.
  • Although the slopes differed, plastron had higher richness in both years

7.2.3 Evenness

Comparing cloaca and plastron samples for wild turtles

7.2.3.1 Boxplot

ggplot(cloaca.plastron,aes(y=even,x=sample.type, fill = paste(year)))+
  geom_boxplot()+
  ggtitle("Cloaca vs plastron evennness (wild)")

  • Pretty obvious interaction

7.2.3.2 Statistics

  • Run and check model
cxpem1 <- lm(even~sample.type*as.factor(year)+log10(read.depth)+source.ID,data = cloaca.plastron)
check_model(cxpem1,check=c("qq","ncv"))

  • Ok fit
  • Run ANOVA
kable.wrap(Anova(cxpem1),"Result: ANOVA comparing cloaca and plastron evenness")
Result: ANOVA comparing cloaca and plastron evenness
Sum Sq Df F value Pr(>F)
sample.type 0.0056180 1 5.205937 0.0365385
as.factor(year) 0.0039659 1 3.674987 0.0732696
log10(read.depth) 0.0051050 1 4.730587 0.0449716
source.ID 0.0411410 13 2.932575 0.0221547
sample.type:as.factor(year) 0.0203795 1 18.884755 0.0005007
Residuals 0.0172664 16 NA NA
  • Essentially everything is significant
  • Higher plastron evenness in 2018, but higher cloaca evenness in 2019

7.2.4 Beta diversity

7.2.4.1 PCoAs

for(i in 1:length(dists)){

  meta.i <-cloaca.plastron
  pcoa <- cmdscale(dist_subset(dists[[i]],meta.i$sample.ID),k=2,add=T,eig=T) #calculates pcoa with a correction for negative eigenvalues (look up the add argument)
  pcoa.eig <- (pcoa$eig[1:2]/sum(pcoa$eig))*100 #calculates percent variance explained by each of the first coordinates
  meta.i$x <- pcoa$points[,1] #adds x coordinates 
  meta.i$y <-  pcoa$points[,2] #adds y coordinates 
  
  plot <- ggplot(meta.i,aes(x = x,y=y))+
    geom_point(aes(color=sample.type,shape=as.factor(year),size=4,stroke=1))+
    theme_bw()+
    xlab(pcoa.eig[1])+
    ylab(pcoa.eig[2])+
    ggtitle(paste(names(dists)[i],"(Plastron X cloaca)"))
  print(plot)}

  • Obvious differences in years and sample types, thought two cloaca samples kept falling out with the plastron samples

7.2.4.2 PERMAOVAs

temp<- perm.loop("dist_subset(distance.list[[i]],cloaca.plastron$sample.ID)~log10(read.depth)+source.ID+as.factor(year)*sample.type",cloaca.plastron,c("read.depth","source.ID","year","sample.type","sample.type*year"),"PERMANOVA results: cloaca vs plastron (wild)",distance.list=dists)
Bray-Curtis PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.7452907 0.0598648 3.863253 0.0019996
source.ID 13 3.5110353 0.2820209 1.399972 0.0379924
as.factor(year) 1 0.5468607 0.0439261 2.834681 0.0137972
sample.type 1 3.7800216 0.3036270 19.593938 0.0002000
as.factor(year):sample.type 1 0.7796637 0.0626258 4.041427 0.0023995
Residual 16 3.0866866 0.2479354 NA NA
Total 33 12.4495585 1.0000000 NA NA
Jaccard PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.7098953 0.0516075 2.559891 0.0049990
source.ID 13 4.3695659 0.3176558 1.212054 0.0677864
as.factor(year) 1 0.6059614 0.0440518 2.185104 0.0113977
sample.type 1 2.8129307 0.2044926 10.143461 0.0002000
as.factor(year):sample.type 1 0.8202742 0.0596317 2.957918 0.0019996
Residual 16 4.4370352 0.3225606 NA NA
Total 33 13.7556626 1.0000000 NA NA
Weighted Unifrac PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.0443781 0.0681830 4.215129 0.0065987
source.ID 13 0.2007752 0.3084735 1.466930 0.0845831
as.factor(year) 1 0.0264226 0.0405959 2.509675 0.0637872
sample.type 1 0.1831781 0.2814371 17.398675 0.0002000
as.factor(year):sample.type 1 0.0276606 0.0424980 2.627264 0.0511898
Residual 16 0.1684525 0.2588124 NA NA
Total 33 0.6508670 1.0000000 NA NA
Uneighted Unifrac PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.4749081 0.0581980 4.074623 0.0009998
source.ID 13 2.6331525 0.3226819 1.737843 0.0021996
as.factor(year) 1 0.4637652 0.0568325 3.979019 0.0027994
sample.type 1 2.3529843 0.2883485 20.188167 0.0002000
as.factor(year):sample.type 1 0.3705583 0.0454104 3.179321 0.0101980
Residual 16 1.8648423 0.2285287 NA NA
Total 33 8.1602107 1.0000000 NA NA
Bray-Curtis: Family PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.5126837 0.0669938 4.925228 0.0017996
source.ID 13 1.9199275 0.2508822 1.418791 0.0823835
as.factor(year) 1 0.4503420 0.0588474 4.326326 0.0061988
sample.type 1 2.7171344 0.3550554 26.102852 0.0002000
as.factor(year):sample.type 1 0.3871240 0.0505866 3.719006 0.0087982
Residual 16 1.6654942 0.2176347 NA NA
Total 33 7.6527059 1.0000000 NA NA
Jaccard: Family PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.3135107 0.0433016 2.828901 0.0107978
source.ID 13 2.3296188 0.3217629 1.616988 0.0049990
as.factor(year) 1 0.4983156 0.0688265 4.496450 0.0015997
sample.type 1 1.9680229 0.2718199 17.758060 0.0002000
as.factor(year):sample.type 1 0.3575166 0.0493796 3.225980 0.0073985
Residual 16 1.7731873 0.2449096 NA NA
Total 33 7.2401720 1.0000000 NA NA
Weighted Unifrac: Family PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2373474 0.0751578 5.617298 0.0011998
source.ID 13 0.8385806 0.2655428 1.526667 0.0469906
as.factor(year) 1 0.2051100 0.0649496 4.854335 0.0015997
sample.type 1 1.0582957 0.3351172 25.046669 0.0002000
as.factor(year):sample.type 1 0.1426058 0.0451572 3.375050 0.0147970
Residual 16 0.6760472 0.2140754 NA NA
Total 33 3.1579868 1.0000000 NA NA
Unweighted Unifrac: Family PERMANOVA results: cloaca vs plastron (wild)
Df SumOfSqs R2 F Pr(>F)
log10(read.depth) 1 0.2310475 0.0457538 3.203444 0.0069986
source.ID 13 1.6426605 0.3252925 1.751944 0.0017996
as.factor(year) 1 0.3627722 0.0718390 5.029790 0.0003999
sample.type 1 1.4206618 0.2813306 19.697289 0.0002000
as.factor(year):sample.type 1 0.2386568 0.0472607 3.308945 0.0065987
Residual 16 1.1539958 0.2285233 NA NA
Total 33 5.0497946 1.0000000 NA NA
Summary (p values): PERMANOVA results: cloaca vs plastron (wild)
read.depth source.ID year sample.type sample.type*year read.depth.adjusted source.ID.adjusted year.adjusted sample.type.adjusted sample.type*year.adjusted
Bray-Curtis 0.0019996 0.0379924 0.0137972 2e-04 0.0023995 0.0059988 0.1139772 0.0341932 0.0007998 0.0079984
Jaccard 0.0049990 0.0677864 0.0113977 2e-04 0.0019996 0.0099980 0.1355729 0.0341932 0.0007998 0.0079984
Weighted Unifrac 0.0065987 0.0845831 0.0637872 2e-04 0.0511898 0.0099980 0.1355729 0.0637872 0.0007998 0.0511898
Uneighted Unifrac 0.0009998 0.0021996 0.0027994 2e-04 0.0101980 0.0039992 0.0087982 0.0111978 0.0007998 0.0203959
Bray-Curtis: Family 0.0017996 0.0823835 0.0061988 2e-04 0.0087982 0.0053989 0.0939812 0.0061988 0.0007998 0.0263947
Jaccard: Family 0.0107978 0.0049990 0.0015997 2e-04 0.0073985 0.0139972 0.0149970 0.0047990 0.0007998 0.0263947
Weighted Unifrac: Family 0.0011998 0.0469906 0.0015997 2e-04 0.0147970 0.0047990 0.0939812 0.0047990 0.0007998 0.0263947
Unweighted Unifrac: Family 0.0069986 0.0017996 0.0003999 2e-04 0.0065987 0.0139972 0.0071986 0.0015997 0.0007998 0.0263947
  • Nearly everything is significant, including the interaction with year (but generally not individual ID).