2023-12-01

Data Visualization packages to install

Install the GGally, ggpubr, and ggsignif packages

install.packages(c("GGally", "ggpubr", "ggsignif", 
                   "viridis", "scales", "viridisLite", 
                   "survival", "survminer", "ggalluvial", 
                   "stringr"))
library(GGally)
library(ggpubr)
library(ggsignif)
library(viridis)
library(scales)
library(viridisLite)
library(survival)
library(survminer)
library(ggalluvial)
library(stringr)

Exploring the data

ggpairs(iris, aes(color = Species, fill = Species), progress = F)

Anatomy of a ggplot figure

Anatomy of a ggplot figure

ggplot(data = <DATA>) + 
  <GEOM_FUNCTION>(
     mapping = aes(<MAPPINGS>),
     stat = <STAT>, 
     position = <POSITION>
  ) +
  <COORDINATE_FUNCTION> +
  <FACET_FUNCTION>

Lets build a basic ggplot figure

Let’s use the iris dataset.

head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa

Lets build a basic ggplot figure

Now we can add the aesthetics.

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length))

Lets build a basic ggplot figure

We can visualize the distribution with a violin plot

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) + 
  geom_violin(trim = FALSE)

Lets build a basic ggplot figure

Lets add the data on top

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(trim = FALSE) + 
  geom_jitter()

Lets build a basic ggplot figure

Add some statistics

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(trim = FALSE) + 
  geom_jitter() + 
  geom_signif(comparisons = 
                list(c("setosa", "versicolor"), 
                     c("versicolor", "virginica"), 
                     c("setosa", "virginica")), 
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0)

Lets build a basic ggplot figure

Change the y label and add color to violin plot and data

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(aes(color = Species), trim = FALSE, alpha = 0.8) + 
  geom_jitter(aes(fill = Petal.Length), shape = 21) + 
  geom_signif(comparisons = 
                list(c("setosa", "versicolor"), 
                     c("versicolor", "virginica"), 
                     c("setosa", "virginica")), 
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0) +
  ylab("Sepal length")

Lets build a basic ggplot figure

Now we’ll removed the violin plot legend, change the color palette, labels

iris |> 
  mutate(Species = str_to_title(Species)) |>  
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(aes(color = Species), trim = FALSE, 
              show.legend = FALSE) + 
  geom_jitter(aes(fill = Petal.Length), shape = 21, size = 3, 
              alpha = 0.8) + 
  geom_signif(comparisons = 
                list(c("Setosa", "Versicolor"), 
                     c("Versicolor", "Virginica"), 
                     c("Setosa", "Virginica")),
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0) +
  scale_fill_viridis(name = "Petal length") +
  scale_color_manual(values = c("setosa" = "#06d6a0", 
                                "versicolor" = "#118ab2", 
                                "virginica" = "#073b4c")) +
  ylab("Sepal length") + 
  theme(axis.title = element_text(size = 12), 
        axis.text.x = element_text(face = "italic"))

Basic figures: Survival plotCode from https://rkabacoff.github.io/datavis/Models.html#survival-plots

data(lung)
sfit <- survfit(Surv(time, status) ~  sex, data=lung)
ggsurvplot(sfit, 
           conf.int=TRUE, 
           pval=TRUE,
           legend.labs=c("Male", "Female"), 
           legend.title="Sex",  
           palette=c("cornflowerblue", "indianred3"), 
           title="Kaplan-Meier Curve for lung 
           cancer survival",
           xlab = "Time (days)")

Basic figures: Bubble chartCode from https://rkabacoff.github.io/datavis/Other.html#Bubble

ggplot(mtcars, 
       aes(x = wt, y = mpg, size = hp)) +
  geom_point(alpha = .5, 
             fill="cornflowerblue", 
             color="black", 
             shape=21) +
  scale_size_continuous(range = c(1, 14)) +
  labs(title = "Auto mileage by weight 
       and horsepower",
       subtitle = "Motor Trend US Magazine 
       (1973-74 models)",
       x = "Weight (1000 lbs)",
       y = "Miles/(US) gallon",
       size = "Gross horsepower") 

Basic figures: BiplotCode from https://rkabacoff.github.io/datavis/Other.html#biplots

# fit a principal components model
fit <- prcomp(x = mtcars, 
              center = TRUE, 
              scale = TRUE)

# plot the results
library(factoextra)
fviz_pca(fit, 
         repel = TRUE, 
         labelsize = 3) + 
  theme_bw() +
  labs(title = "Biplot of mtcars data")

Basic figures: Alluvial diagramsCode from https://rkabacoff.github.io/datavis/Other.html#alluvial-diagrams

# Quick data wrangling
mtcars_table <- mtcars %>%
  mutate(am = factor(am, labels = c("Auto", "Man")),
         cyl = factor(cyl),
         gear = factor(gear),
         carb = factor(carb)) %>%
  group_by(cyl, gear, carb, am) %>%
  count()

Basic figures: Alluvial diagramsCode from https://rkabacoff.github.io/datavis/Other.html#alluvial-diagrams

ggplot(mtcars_table,
       aes(axis1 = carb,
           axis2 = cyl,
           axis3 = gear,
           axis4 = am,
           y = n)) +
  geom_alluvium(aes(fill = carb), color="black") +
  geom_stratum(alpha=.8) +
  geom_text(stat = "stratum", 
            aes(label = after_stat(stratum))) + 
  scale_x_discrete(limits = 
                     c("Carburetors", "Cylinders",
                              "Gears", "Transmission"),
                   expand = c(.1, .1)) +
  # scale_fill_brewer(palette="Paired") +
  labs(title = "Mtcars data",
       subtitle = "stratified by carb, cyl, 
       gear, and am",
       y = "Frequency") +
  theme_minimal() +
  theme(legend.position = "none") 

Basic figures: Sorted heat mapCode from https://rkabacoff.github.io/datavis/Other.html#heatmaps

library(superheat)
superheat(mtcars,
          scale = TRUE,
          left.label.text.size=3,
          bottom.label.text.size=3,
          bottom.label.size = .05,
          row.dendrogram = TRUE )

Basic figures: Interactive mapsCode from https://rkabacoff.github.io/datavis/Maps.html#geocoding

library(ggmap)
library(mapview)
library(sf)

# subset the data
homicide <- filter(crime, 
                   offense == "murder") %>%
  select(date, offense, address, lon, lat)
# view data
head(homicide, 3)
mymap <- st_as_sf(homicide, coords = c("lon", "lat"), 
                  crs = 4326)
mapview(mymap)

Basic figures: BarplotsCode from https://r4ds.had.co.nz/data-visualisation.html#the-layered-grammar-of-graphics

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = clarity), 
           position = "fill")

Basic figures: BarplotsCode from https://r4ds.had.co.nz/data-visualisation.html#the-layered-grammar-of-graphics

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = clarity), 
           position = "dodge")

Basic figures: BoxplotsCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

p = ggboxplot(ToothGrowth, x = "dose", y = "len",
      color = "dose", 
      palette =c("#00AFBB", "#E7B800", "#FC4E07"),
      add = "jitter", shape = "dose")
p

Basic figures: BoxplotsCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

# Add p-values comparing groups
 # Specify the comparisons you want
my_comparisons <- list( c("0.5", "1"), 
                        c("1", "2"), 
                        c("0.5", "2") )
# Add pairwise comparisons p-value
p + stat_compare_means(comparisons = 
                         my_comparisons) + 
  # Add global p-value
  stat_compare_means(label.y = 50)                   

Basic figures: Violin plot with box plot insideCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

ggviolin(ToothGrowth, x = "dose", y = "len", 
         fill = "dose",
         palette = c("#00AFBB", "#E7B800", "#FC4E07"),
         add = "boxplot", 
         add.params = list(fill = "white")) +
  # Add significance levels
  stat_compare_means(comparisons = my_comparisons, 
                     label = "p.signif") + 
  # Add global the p-value
  stat_compare_means(label.y = 50)                                       

Basic figures: Dot chartCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

dfm <- mtcars
# Some wrangling
dfm = dfm |> 
        mutate(cyl = as.factor(cyl), 
               mpg_z = (mpg - mean(mpg))/sd(mpg),
               mpg_grp = factor(ifelse(mpg_z < 0, "low", "high"), 
                                levels = c("low", "high"))) |> 
        rownames_to_column(var = "name")
# Convert the cyl variable to a factor
dfm$cyl <- as.factor(dfm$cyl)
# Add the name colums
dfm$name <- rownames(dfm)
# Calculate the z-score of the mpg data
dfm$mpg_z <- (dfm$mpg -mean(dfm$mpg))/sd(dfm$mpg)
dfm$mpg_grp <- factor(ifelse(dfm$mpg_z < 0, "low", "high"), 
                     levels = c("low", "high"))

Basic figures: Dot chartCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

ggdotchart(dfm, x = "name", y = "mpg_z",
           # Color by groups
           color = "cyl",             
           # Custom color palette
           palette = c("#00AFBB", "#E7B800", "#FC4E07"),
           # Sort value in descending order
           sorting = "descending", 
           # Add segments from y = 0 to dots
           add = "segments",
           # Change segment color and size
           add.params = list(color = "lightgray", size = 2),
            # Order by groups
           group = "cyl",   
           # Large dot size
           dot.size = 6, 
           # Add mpg values as dot labels
           label = round(dfm$mpg_z,1),   
           # Adjust label parameters
           font.label = list(color = "white", size = 9, 
                             vjust = 0.5),      
           # ggplot2 theme
           ggtheme = theme_pubr()                        
           ) +
           geom_hline(yintercept = 0, linetype = 2, 
                      color = "lightgray")

Basic figures: Faceted plotCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/83-create-and-customize-multi-panel-ggplots-easy-guide-to-facet/

df = ToothGrowth |> 
  mutate(dose = as.factor(dose))
p <- ggdensity(df, x = "len", fill = "dose", 
               palette = "jco", 
               ggtheme = theme_light(), 
               legend = "top")
facet(p, facet.by = c("supp", "dose"),
       panel.labs = list(
         supp = c("Orange Juice", "Vitamin C"),
         dose = c("D0.5", "D1", "D2")
         ),
       panel.labs.background = list(color = "steelblue", 
                                    fill = "steelblue", 
                                    size = 0.5),
       panel.labs.font = list(color = "white"),
       panel.labs.font.x = list(angle = 45, 
                                color = "white")
      )

Saving a figure

ggsave(file, plot, device = c("pdf", "png", "jpeg", etc.))
?ggsave

Saving a figure

Colors and color palettes

colorRampPalette

pal = colorRampPalette(c("#000033", "blue", "#FFFFFF", "red", "#9B2226"))(11)
scales::show_col(pal)

Colors and color palettes

viridis and viridisLite

Colors and color palettes

pal2 = viridis(20)
show_col(pal2)

Colors and color palettesList from https://www.skillshare.com/en/blog/7-best-color-palette-generators-to-try/

Manual colors in plots

  • scale_color_[many options]
  • scale_fill_[many options]

Manual colors in plots

Certain shapes can have a different fill and outline color

Visualization function

What are the different parts?

  • Data wrangling - factoring parameters of interest
  • Mapping aesthetics
  • Variables for fill and color
  • List of comparisons for stats
  • Heights for stats bars
  • List of values for colors
  • Name changes - what delimeter are we using

Revisiting our basic ggplot figure build

Let’s write a function that makes this visualization for us

iris |> 
  mutate(Species = factor(Species, levels = c("setosa", "versicolor", "virginica"), 
                          labels = c("Setosa", "Versicolor", "Virginica"))) |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(aes(color = Species), trim = FALSE, show.legend = FALSE) + 
  geom_jitter(aes(fill = Petal.Length), shape = 21, size = 3, alpha = 0.8) + 
  geom_signif(comparisons = list(c("Setosa", "Versicolor"), 
                                 c("Versicolor", "Virginica"), 
                                 c("Setosa", "Virginica")), 
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0) +
  scale_fill_viridis(name = "Petal length") +
  scale_color_manual(values = c("setosa" = "#06d6a0", "versicolor" = "#118ab2", 
                                "virginica" = "#073b4c")) +
  ylab("Sepal length") + 
  theme(axis.title = element_text(size = 12), 
        axis.text.x = element_text(face = "italic"))

Automate building the figure