Ordination
library("BiocStyle")
library("knitr")
library("rmarkdown")
library("phyloseq"); packageVersion("phyloseq")
[1] ‘1.32.0’
data(GlobalPatterns)
library("ggplot2"); packageVersion("ggplot2")
[1] ‘3.3.2’
library("plyr"); packageVersion("plyr")
[1] ‘1.8.6’
theme_set(theme_bw())
$set(message = FALSE, error = FALSE, warning = FALSE,
opts_chunkcache = FALSE, fig.width = 8, fig.height = 5)
#Remove OTUs that do not show appear more than 5 times in more than half the samples
GlobalPatterns
GP = genefilter_sample(GP, filterfun_sample(function(x) x > 5), A=0.5*nsamples(GP))
wh0 = prune_taxa(wh0, GP)
GP1 =
#Transform to even sampling depth.
transform_sample_counts(GP1, function(x) 1E6 * x/sum(x))
GP1 =
#Keep only the most abundant five phyla.
tapply(taxa_sums(GP1), tax_table(GP1)[, "Phylum"], sum, na.rm=TRUE)
phylum.sum = names(sort(phylum.sum, TRUE))[1:5]
top5phyla = prune_taxa((tax_table(GP1)[, "Phylum"] %in% top5phyla), GP1)
GP1 =# Define a human-associated versus non-human categorical variable:
get_variable(GP1, "SampleType") %in% c("Feces", "Mock", "Skin", "Tongue")
human =sample_data(GP1)$human <- factor(human)
Four main ordination plots
Just OTUs
ordinate(GP1, "NMDS", "bray") GP.ord <-
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1333468
## Run 1 stress 0.171623
## Run 2 stress 0.1871687
## Run 3 stress 0.1570722
## Run 4 stress 0.1333468
## ... Procrustes: rmse 4.578937e-06 max resid 1.330355e-05
## ... Similar to previous best
## Run 5 stress 0.1702603
## Run 6 stress 0.3181951
## Run 7 stress 0.1448829
## Run 8 stress 0.3589772
## Run 9 stress 0.1333468
## ... Procrustes: rmse 7.038866e-06 max resid 2.098435e-05
## ... Similar to previous best
## Run 10 stress 0.1610141
## Run 11 stress 0.1450956
## Run 12 stress 0.1333468
## ... New best solution
## ... Procrustes: rmse 1.595709e-06 max resid 4.013971e-06
## ... Similar to previous best
## Run 13 stress 0.144883
## Run 14 stress 0.1815173
## Run 15 stress 0.168397
## Run 16 stress 0.2839613
## Run 17 stress 0.1510167
## Run 18 stress 0.1622613
## Run 19 stress 0.1535666
## Run 20 stress 0.1675647
## *** Solution reached
plot_ordination(GP1, GP.ord, type="taxa", color="Phylum", title="taxa")
p1 =print(p1)
+ facet_wrap(~Phylum, 3) p1
Just samples
plot_ordination(GP1, GP.ord, type="samples", color="SampleType", shape="human")
p2 =+ geom_polygon(aes(fill=SampleType)) + geom_point(size=5) + ggtitle("samples") p2
biplot graphic
plot_ordination(GP1, GP.ord, type="biplot", color="SampleType", shape="Phylum", title="biplot")
p3 =# Some stuff to modify the automatic shape scale
get_taxa_unique(GP1, "Phylum")
GP1.shape.names = 15:(15 + length(GP1.shape.names) - 1)
GP1.shape <-names(GP1.shape) <- GP1.shape.names
"samples"] <- 16
GP1.shape[+ scale_shape_manual(values=GP1.shape) p3
split graphic
plot_ordination(GP1, GP.ord, type="split", color="Phylum", shape="human", label="SampleType", title="split")
p4 = p4
function(n){
gg_color_hue <- seq(15, 375, length=n+1)
hues =hcl(h=hues, l=65, c=100)[1:n]
} levels(p4$data$Phylum)
color.names <- gg_color_hue(length(color.names))
p4cols <-names(p4cols) <- color.names
"samples"] <- "black"
p4cols[+ scale_color_manual(values=p4cols) p4
@# Supported Ordination Methods
"bray"
dist = c("DCA", "CCA", "RDA", "DPCoA", "NMDS", "MDS", "PCoA")
ord_meths = llply(as.list(ord_meths), function(i, physeq, dist){
plist = ordinate(physeq, method=i, distance=dist)
ordi =plot_ordination(physeq, ordi, "samples", color="SampleType")
}, GP1, dist)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1333468
## Run 1 stress 0.1385323
## Run 2 stress 0.1630808
## Run 3 stress 0.1666795
## Run 4 stress 0.1530178
## Run 5 stress 0.3819611
## Run 6 stress 0.1576439
## Run 7 stress 0.1333468
## ... Procrustes: rmse 7.349806e-06 max resid 2.180866e-05
## ... Similar to previous best
## Run 8 stress 0.1385323
## Run 9 stress 0.1469145
## Run 10 stress 0.159001
## Run 11 stress 0.1385323
## Run 12 stress 0.1333468
## ... New best solution
## ... Procrustes: rmse 2.403081e-06 max resid 6.886338e-06
## ... Similar to previous best
## Run 13 stress 0.1333468
## ... Procrustes: rmse 4.618725e-06 max resid 1.381285e-05
## ... Similar to previous best
## Run 14 stress 0.254662
## Run 15 stress 0.1333468
## ... Procrustes: rmse 4.785134e-06 max resid 1.436663e-05
## ... Similar to previous best
## Run 16 stress 0.1333468
## ... Procrustes: rmse 5.228518e-06 max resid 1.556762e-05
## ... Similar to previous best
## Run 17 stress 0.1502708
## Run 18 stress 0.167948
## Run 19 stress 0.189785
## Run 20 stress 0.1333468
## ... Procrustes: rmse 2.606267e-06 max resid 7.795527e-06
## ... Similar to previous best
## *** Solution reached
names(plist) <- ord_meths
ldply(plist, function(x){
pdataframe = x$data[, 1:2]
df =colnames(df) = c("Axis_1", "Axis_2")
return(cbind(df, x$data))
})names(pdataframe)[1] = "method"
ggplot(pdataframe, aes(Axis_1, Axis_2, color=SampleType, shape=human, fill=SampleType))
p = p + geom_point(size=4) + geom_polygon()
p = p + facet_wrap(~method, scales="free")
p = p + scale_fill_brewer(type="qual", palette="Set1")
p = p + scale_colour_brewer(type="qual", palette="Set1")
p = p
2]] plist[[
plist[[2]] + scale_colour_brewer(type="qual", palette="Set1")
p = p + scale_fill_brewer(type="qual", palette="Set1")
p = p + geom_point(size=5) + geom_polygon(aes(fill=SampleType))
p = p
MDS (“PCoA”) on Unifrac Distances
ordinate(GP1, "PCoA", "unifrac", weighted=TRUE)
ordu =plot_ordination(GP1, ordu, color="SampleType", shape="human")
plot_ordination(GP1, ordu, color="SampleType", shape="human")
p = p + geom_point(size=7, alpha=0.75)
p = p + scale_colour_brewer(type="qual", palette="Set1")
p =+ ggtitle("MDS/PCoA on weighted-UniFrac distance, GlobalPatterns") p
rm(list=ls())
Alpha diversity graphics
data("GlobalPatterns")
theme_set(theme_bw())
"Set1"
pal = function(palname=pal, ...){
scale_colour_discrete <-scale_colour_brewer(palette=palname, ...)
} function(palname=pal, ...){
scale_fill_discrete <-scale_fill_brewer(palette=palname, ...)
}
Prepare data
prune_species(speciesSums(GlobalPatterns) > 0, GlobalPatterns) GP <-
Plot Examples
plot_richness(GP)
plot_richness(GP, measures=c("Chao1", "Shannon"))
plot_richness(GP, x="SampleType", measures=c("Chao1", "Shannon"))
sampleData(GP)$human <- getVariable(GP, "SampleType") %in% c("Feces", "Mock", "Skin", "Tongue")
plot_richness(GP, x="human", color="SampleType", measures=c("Chao1", "Shannon"))
merge_samples(GP, "SampleType")
GPst =# repair variables that were damaged during merge (coerced to numeric)
sample_data(GPst)$SampleType <- factor(sample_names(GPst))
sample_data(GPst)$human <- as.logical(sample_data(GPst)$human)
plot_richness(GPst, x="human", color="SampleType", measures=c("Chao1", "Shannon"))
p =+ geom_point(size=5, alpha=0.7) p
rm(list=ls())
Heatmap Plots
theme_set(theme_bw())
data("GlobalPatterns")
subset_taxa(GlobalPatterns, Kingdom=="Bacteria")
gpt <- prune_taxa(names(sort(taxa_sums(gpt),TRUE)[1:300]), gpt)
gpt <-plot_heatmap(gpt, sample.label="SampleType")
Subset a smaller dataset based on an Archaeal phylum
subset_taxa(GlobalPatterns, Phylum=="Crenarchaeota")
gpac <-plot_heatmap(gpac)
Re-label by a sample variable and taxonomic family
plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family")) (p <-
Re-label axis titles
$scales$scales[[1]]$name <- "My X-Axis"
p$scales$scales[[2]]$name <- "My Y-Axis"
pprint(p)
Now repeat the plot, but change the color scheme.
plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#000033", high="#CCFF66")
plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#000033", high="#FF3300")
plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#000033", high="#66CCFF")
plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#66CCFF", high="#000033", na.value="white")
plot_heatmap(gpac, "NMDS", "bray", "SampleType", "Family", low="#FFFFCC", high="#000033", na.value="white")
Now try different ordination methods, distances
plot_heatmap(gpac, "NMDS", "jaccard")
plot_heatmap(gpac, "DCA", "none", "SampleType", "Family")
Unconstrained redundancy analysis (Principle Components Analysis, PCA)
plot_heatmap(gpac, "RDA", "none", "SampleType", "Family")
PCoA/MDS ordination on the (default) bray-curtis distance.
plot_heatmap(gpac, "PCoA", "bray", "SampleType", "Family")
MDS/PCoA ordination on the Unweighted-UniFrac distance.
plot_heatmap(gpac, "PCoA", "unifrac", "SampleType", "Family")
Now try weighted-UniFrac distance and MDS/PCoA ordination.
plot_heatmap(gpac, "MDS", "unifrac", "SampleType", "Family", weighted=TRUE)
heatmap(otu_table(gpac))
rm(list=ls())
Plot Microbiome Network
data(enterotype)
set.seed(711L)
plot_net(enterotype, maxdist = 0.4, point_label = "Sample_ID")
plot_net(enterotype, maxdist = 0.3, color = "SeqTech", shape="Enterotype")
# Create an igraph-based network based on the default distance method, “Jaccard”, and a maximum distance between connected nodes of 0.3.
make_network(enterotype, max.dist=0.3)
ig <-
# Now plot this network representation with the default settings.
plot_network(ig, enterotype)
plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.4, label=NULL)
make_network(enterotype, max.dist=0.2)
ig <-plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.4, label=NULL)
make_network(enterotype, dist.fun="bray", max.dist=0.3)
ig <-plot_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.4, label=NULL)