
Data Visualization packages to install

Install the GGally, ggpubr, and ggsignif packages

install.packages(c("GGally", "ggpubr", "ggsignif", 
                   "viridis", "scales", "viridisLite", 
                   "survival", "survminer", "ggalluvial", 

Exploring the data

ggpairs(iris, aes(color = Species, fill = Species), progress = F)

Anatomy of a ggplot figure

ggplot(data = <DATA>) + 
     mapping = aes(<MAPPINGS>),
     stat = <STAT>, 
     position = <POSITION>
  ) +

Lets build a basic ggplot figure

Let’s use the iris dataset.

  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa

Lets build a basic ggplot figure

Now we can add the aesthetics.

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length))

Lets build a basic ggplot figure

We can visualize the distribution with a violin plot

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) + 
  geom_violin(trim = FALSE)

Lets build a basic ggplot figure

Lets add the data on top

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(trim = FALSE) + 

Lets build a basic ggplot figure

Add some statistics

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(trim = FALSE) + 
  geom_jitter() + 
  geom_signif(comparisons = 
                list(c("setosa", "versicolor"), 
                     c("versicolor", "virginica"), 
                     c("setosa", "virginica")), 
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0)

Lets build a basic ggplot figure

Change the y label and add color to violin plot and data

iris |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(aes(color = Species), trim = FALSE, alpha = 0.8) + 
  geom_jitter(aes(fill = Petal.Length), shape = 21) + 
  geom_signif(comparisons = 
                list(c("setosa", "versicolor"), 
                     c("versicolor", "virginica"), 
                     c("setosa", "virginica")), 
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0) +
  ylab("Sepal length")

Lets build a basic ggplot figure

Now we’ll removed the violin plot legend, change the color palette, labels

iris |> 
  mutate(Species = str_to_title(Species)) |>  
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(aes(color = Species), trim = FALSE, 
              show.legend = FALSE) + 
  geom_jitter(aes(fill = Petal.Length), shape = 21, size = 3, 
              alpha = 0.8) + 
  geom_signif(comparisons = 
                list(c("Setosa", "Versicolor"), 
                     c("Versicolor", "Virginica"), 
                     c("Setosa", "Virginica")),
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0) +
  scale_fill_viridis(name = "Petal length") +
  scale_color_manual(values = c("setosa" = "#06d6a0", 
                                "versicolor" = "#118ab2", 
                                "virginica" = "#073b4c")) +
  ylab("Sepal length") + 
  theme(axis.title = element_text(size = 12), 
        axis.text.x = element_text(face = "italic"))

Basic figures: Survival plotCode from https://rkabacoff.github.io/datavis/Models.html#survival-plots

sfit <- survfit(Surv(time, status) ~  sex, data=lung)
           legend.labs=c("Male", "Female"), 
           palette=c("cornflowerblue", "indianred3"), 
           title="Kaplan-Meier Curve for lung 
           cancer survival",
           xlab = "Time (days)")

Basic figures: Bubble chartCode from https://rkabacoff.github.io/datavis/Other.html#Bubble

       aes(x = wt, y = mpg, size = hp)) +
  geom_point(alpha = .5, 
             shape=21) +
  scale_size_continuous(range = c(1, 14)) +
  labs(title = "Auto mileage by weight 
       and horsepower",
       subtitle = "Motor Trend US Magazine 
       (1973-74 models)",
       x = "Weight (1000 lbs)",
       y = "Miles/(US) gallon",
       size = "Gross horsepower") 

Basic figures: BiplotCode from https://rkabacoff.github.io/datavis/Other.html#biplots

# fit a principal components model
fit <- prcomp(x = mtcars, 
              center = TRUE, 
              scale = TRUE)

# plot the results
         repel = TRUE, 
         labelsize = 3) + 
  theme_bw() +
  labs(title = "Biplot of mtcars data")

Basic figures: Alluvial diagramsCode from https://rkabacoff.github.io/datavis/Other.html#alluvial-diagrams

# Quick data wrangling
mtcars_table <- mtcars %>%
  mutate(am = factor(am, labels = c("Auto", "Man")),
         cyl = factor(cyl),
         gear = factor(gear),
         carb = factor(carb)) %>%
  group_by(cyl, gear, carb, am) %>%

Basic figures: Alluvial diagramsCode from https://rkabacoff.github.io/datavis/Other.html#alluvial-diagrams

       aes(axis1 = carb,
           axis2 = cyl,
           axis3 = gear,
           axis4 = am,
           y = n)) +
  geom_alluvium(aes(fill = carb), color="black") +
  geom_stratum(alpha=.8) +
  geom_text(stat = "stratum", 
            aes(label = after_stat(stratum))) + 
  scale_x_discrete(limits = 
                     c("Carburetors", "Cylinders",
                              "Gears", "Transmission"),
                   expand = c(.1, .1)) +
  # scale_fill_brewer(palette="Paired") +
  labs(title = "Mtcars data",
       subtitle = "stratified by carb, cyl, 
       gear, and am",
       y = "Frequency") +
  theme_minimal() +
  theme(legend.position = "none") 

Basic figures: Sorted heat mapCode from https://rkabacoff.github.io/datavis/Other.html#heatmaps

          scale = TRUE,
          bottom.label.size = .05,
          row.dendrogram = TRUE )

Basic figures: Interactive mapsCode from https://rkabacoff.github.io/datavis/Maps.html#geocoding


# subset the data
homicide <- filter(crime, 
                   offense == "murder") %>%
  select(date, offense, address, lon, lat)
# view data
head(homicide, 3)
mymap <- st_as_sf(homicide, coords = c("lon", "lat"), 
                  crs = 4326)

Basic figures: BarplotsCode from https://r4ds.had.co.nz/data-visualisation.html#the-layered-grammar-of-graphics

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = clarity), 
           position = "fill")

Basic figures: BarplotsCode from https://r4ds.had.co.nz/data-visualisation.html#the-layered-grammar-of-graphics

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = clarity), 
           position = "dodge")

Basic figures: BoxplotsCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

p = ggboxplot(ToothGrowth, x = "dose", y = "len",
      color = "dose", 
      palette =c("#00AFBB", "#E7B800", "#FC4E07"),
      add = "jitter", shape = "dose")

Basic figures: BoxplotsCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

# Add p-values comparing groups
 # Specify the comparisons you want
my_comparisons <- list( c("0.5", "1"), 
                        c("1", "2"), 
                        c("0.5", "2") )
# Add pairwise comparisons p-value
p + stat_compare_means(comparisons = 
                         my_comparisons) + 
  # Add global p-value
  stat_compare_means(label.y = 50)                   

Basic figures: Violin plot with box plot insideCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

ggviolin(ToothGrowth, x = "dose", y = "len", 
         fill = "dose",
         palette = c("#00AFBB", "#E7B800", "#FC4E07"),
         add = "boxplot", 
         add.params = list(fill = "white")) +
  # Add significance levels
  stat_compare_means(comparisons = my_comparisons, 
                     label = "p.signif") + 
  # Add global the p-value
  stat_compare_means(label.y = 50)                                       

Basic figures: Dot chartCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

dfm <- mtcars
# Some wrangling
dfm = dfm |> 
        mutate(cyl = as.factor(cyl), 
               mpg_z = (mpg - mean(mpg))/sd(mpg),
               mpg_grp = factor(ifelse(mpg_z < 0, "low", "high"), 
                                levels = c("low", "high"))) |> 
        rownames_to_column(var = "name")
# Convert the cyl variable to a factor
dfm$cyl <- as.factor(dfm$cyl)
# Add the name colums
dfm$name <- rownames(dfm)
# Calculate the z-score of the mpg data
dfm$mpg_z <- (dfm$mpg -mean(dfm$mpg))/sd(dfm$mpg)
dfm$mpg_grp <- factor(ifelse(dfm$mpg_z < 0, "low", "high"), 
                     levels = c("low", "high"))

Basic figures: Dot chartCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/#google_vignette

ggdotchart(dfm, x = "name", y = "mpg_z",
           # Color by groups
           color = "cyl",             
           # Custom color palette
           palette = c("#00AFBB", "#E7B800", "#FC4E07"),
           # Sort value in descending order
           sorting = "descending", 
           # Add segments from y = 0 to dots
           add = "segments",
           # Change segment color and size
           add.params = list(color = "lightgray", size = 2),
            # Order by groups
           group = "cyl",   
           # Large dot size
           dot.size = 6, 
           # Add mpg values as dot labels
           label = round(dfm$mpg_z,1),   
           # Adjust label parameters
           font.label = list(color = "white", size = 9, 
                             vjust = 0.5),      
           # ggplot2 theme
           ggtheme = theme_pubr()                        
           ) +
           geom_hline(yintercept = 0, linetype = 2, 
                      color = "lightgray")

Basic figures: Faceted plotCode from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/83-create-and-customize-multi-panel-ggplots-easy-guide-to-facet/

df = ToothGrowth |> 
  mutate(dose = as.factor(dose))
p <- ggdensity(df, x = "len", fill = "dose", 
               palette = "jco", 
               ggtheme = theme_light(), 
               legend = "top")
facet(p, facet.by = c("supp", "dose"),
       panel.labs = list(
         supp = c("Orange Juice", "Vitamin C"),
         dose = c("D0.5", "D1", "D2")
       panel.labs.background = list(color = "steelblue", 
                                    fill = "steelblue", 
                                    size = 0.5),
       panel.labs.font = list(color = "white"),
       panel.labs.font.x = list(angle = 45, 
                                color = "white")

Saving a figure

ggsave(file, plot, device = c("pdf", "png", "jpeg", etc.))

Saving a figure

Colors and color palettes


pal = colorRampPalette(c("#000033", "blue", "#FFFFFF", "red", "#9B2226"))(11)

Colors and color palettes

viridis and viridisLite

Colors and color palettes

pal2 = viridis(20)

Colors and color palettesList from https://www.skillshare.com/en/blog/7-best-color-palette-generators-to-try/

Manual colors in plots

  • scale_color_[many options]
  • scale_fill_[many options]

Manual colors in plots

Certain shapes can have a different fill and outline color

Visualization function

What are the different parts?

  • Data wrangling - factoring parameters of interest
  • Mapping aesthetics
  • Variables for fill and color
  • List of comparisons for stats
  • Heights for stats bars
  • List of values for colors
  • Name changes - what delimeter are we using

Revisiting our basic ggplot figure build

Let’s write a function that makes this visualization for us

iris |> 
  mutate(Species = factor(Species, levels = c("setosa", "versicolor", "virginica"), 
                          labels = c("Setosa", "Versicolor", "Virginica"))) |> 
  ggplot(aes(x = Species, y = Sepal.Length)) +
  geom_violin(aes(color = Species), trim = FALSE, show.legend = FALSE) + 
  geom_jitter(aes(fill = Petal.Length), shape = 21, size = 3, alpha = 0.8) + 
  geom_signif(comparisons = list(c("Setosa", "Versicolor"), 
                                 c("Versicolor", "Virginica"), 
                                 c("Setosa", "Virginica")), 
              map_signif_level = T, 
              y_position = c(7.8, 8.5, 9), 
              tip_length = 0) +
  scale_fill_viridis(name = "Petal length") +
  scale_color_manual(values = c("setosa" = "#06d6a0", "versicolor" = "#118ab2", 
                                "virginica" = "#073b4c")) +
  ylab("Sepal length") + 
  theme(axis.title = element_text(size = 12), 
        axis.text.x = element_text(face = "italic"))

Automate building the figure