Skip to content

Latest commit

 

History

History
134 lines (103 loc) · 5.33 KB

File metadata and controls

134 lines (103 loc) · 5.33 KB

Taxonomic Diversity

Load data

rm(list= ls())


library(ggplot2)
library(reshape2)
library(phenotypicForest)
library(magrittr)

TAX= read.csv2("../00_Data/AllSeason_Cerc_En_Taxonomy_R.csv",header = T)
OTU_Table = as.data.frame(read.csv2("../00_Data/05_Cercozoa_Seasonal_OTU_Table_min-freq-7633_transposed_withMetadata.csv",header = T))

SampleMetadata = OTU_Table[,1:17]
Species = OTU_Table[,18:ncol(OTU_Table)]

Prepare Data

# group Sample Metadata by sampling period

SampleMetadata_S18 = SampleMetadata[OTU_Table$SamplingDescription == "Spring 2018",]
SampleMetadata_S19 = SampleMetadata[OTU_Table$SamplingDescription == "Spring 2019",]
SampleMetadata_A17 = SampleMetadata[OTU_Table$SamplingDescription == "Autumn 2017",]
SampleMetadata_A18 = SampleMetadata[OTU_Table$SamplingDescription == "Autumn 2018",]

# group OTUS by sampling period

species_mat_S18 = Species[OTU_Table$SamplingDescription == "Spring 2018",]
species_mat_S19 = Species[OTU_Table$SamplingDescription == "Spring 2019",]
species_mat_A17 = Species[OTU_Table$SamplingDescription == "Autumn 2017",]
species_mat_A18 = Species[OTU_Table$SamplingDescription == "Autumn 2018",]


Abundances = colSums(species_mat_S18)# choose subtable
TAX = cbind(TAX, Abundances)

TAX$OTU_ID = paste0("OTU", TAX$OTU_Number, "_", TAX$Species)
TAX$Order = as.factor(TAX$Order)

Aggregate tables

OrderTable = species_mat_S18# choose subtable
colnames(OrderTable) = TAX$Order

# First aggregate by Habitat
HabitatAggregatedOrderTable = aggregate(OrderTable, by = list(SampleMetadata_S18$Microhabitat), FUN = sum)# choose subtable
rownames(HabitatAggregatedOrderTable) = HabitatAggregatedOrderTable$Group.1
HabitatAggregatedOrderTable = HabitatAggregatedOrderTable[,-1]


# if you want to check the number of OTUs instead of the amount of reads:
#HabitatAggregatedOrderTable[,] = ifelse(HabitatAggregatedOrderTable[,] > 0, 1, 0)

#Then aggregate the (transposed) table by Order
HabitatAggregatedOrderTable = aggregate(t(HabitatAggregatedOrderTable), 
                                        by = list(TAX$Order), 
                                        FUN = sum)
rownames(HabitatAggregatedOrderTable) = HabitatAggregatedOrderTable$Group.1
HabitatAggregatedOrderTable = HabitatAggregatedOrderTable[,-1]
HabitatAggregatedOrderTable = as.data.frame(HabitatAggregatedOrderTable)

# melt the table so ggplot can interpret it as a histogram
data = HabitatAggregatedOrderTable
data$Order = rownames(data)
data2 = melt(data, id.vars = "Order")

# add the Stratum
data3 = data2
data3$Stratum = ifelse(data2$variable %in% c("Leaf Litter", "Soil"), "Ground", "Canopy")

# for the plotting we need to change the column names
colnames(data3) = c("score", "item", "value", "family")

data3$item <- factor(data3$item, levels=c("Soil","Leaf Litter","Lichen","Orthotrichum","Hypnum", "Arboreal Soil","Bark","Deadwood","Fresh Leaves"))

ggplot

#plot 

S18 = ggplot(data3, aes(fill=score, y=value, x=item)) + 
            geom_bar(position="fill", stat="identity")+ 
            scale_fill_manual(values = c("#008B8B","#ADD8E6","#EC6B25","#ffd27f","#228B22",
                               "#90EE90","#6f0000","#f6beb2","#730073","#cc99cc",
                               "#4c3706","#998b6a","#b25162","#ffabba","#656565",
                               "#c2c2c2","#c7b823","#e9df81","#2ca898","#9be8df",
                               "#73315d","#e0a9cd","#ffff4c","#ffffb2","#0a2d22"), 
                              limits = c("Cercomonadida","Cryomonadida","Cryptofilida", "Desmothoracida",
                               "Ebriida","Euglyphida","Glissomonadida","Limnofilida","Marimonadida",
                               "Novel-clade-10","Novel-clade-12","Novel-clade-9","Pansomonadida",
                               "Plasmodiophorida","Spongomonadida","Tectofilosida","Thaumatomonadida",
                               "Tremulida","unassigned Cercozoa","unassigned Filosa",
                               "unassigned Granofilosea","unassigned Imbricatea",
                               "unassigned Thecofilosea","Vampyrellida","Ventricleftida"),
                                 guide = guide_legend(ncol = 1))+
          scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
          labs(fill = "Order", y= "Relative Abundances", x = "Microhabitat",title = "Spring 2018")+
          theme_minimal()+
        theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        legend.text = element_text(size = 16), 
        legend.title = element_text(size = 16, face = "bold"),
        legend.key.height = unit(3, "mm"),
        legend.key.width = unit(3, "mm"),
        axis.text=element_text(size=14), 
        axis.title=element_text(size=16,face="bold"))+
        geom_label(x = 6.5, y = 0.75, fill = "darkolivegreen4", color = "white", 
                  label = "Canopy", size = 8 ) +
        geom_label(x = 1.5, y = 0.75, fill = "burlywood4", color = "white", 
                  label = "Ground", size = 8)+
        geom_vline(xintercept = 2.50, size = 0.6, linetype=1.5, color = "#6A785F")+
        coord_flip()

S18

Combine all plots

After you repeated the above mentioned code for every sampling period you can combine all plots with the ggarrange function from the ggpubr package.