Data visualization: ggplot2 and beyond

Descriptive Statistics

Ihor Miroshnychenko

Kyiv School of Economics

Basic concepts

The grammar of graphics

Tidy data

Examples

Examples

Examples

ggplot2

🐧 palmerpenguins

install.packages("palmerpenguins")


library(palmerpenguins)
head(penguins)
# A tibble: 6 × 8
  species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
  <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
1 Adelie  Torgersen           39.1          18.7               181        3750
2 Adelie  Torgersen           39.5          17.4               186        3800
3 Adelie  Torgersen           40.3          18                 195        3250
4 Adelie  Torgersen           NA            NA                  NA          NA
5 Adelie  Torgersen           36.7          19.3               193        3450
6 Adelie  Torgersen           39.3          20.6               190        3650
# ℹ 2 more variables: sex <fct>, year <int>

Meet the Palmer penguins

Bill dimensions

ggplot()

library(ggplot2)


ggplot()

ggplot() + data

ggplot(
  data = penguins
)

ggplot() + data + aesthetics

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm)
)

ggplot() + data + aesthetics + geometry

Scatter plot

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm)
) +
  geom_point()

Shapes

Shapes for points

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm)
) +
  geom_point(shape = 17)

Shapes for aesthetics

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, shape = species)
) +
  geom_point()

Size

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, size = body_mass_g)
) +
  geom_point()

Transparency

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, size = body_mass_g)
) +
  geom_point(alpha = 0.5)

Color

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point()

Manual color

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894"))

RColorBrewer

library(RColorBrewer)

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_brewer(palette = "Set1")

Themes

ggthemes

library(ggthemes)

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_tableau()

theme_bw()

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894")) +
  theme_bw()

theme_minimal()

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894")) +
  theme_minimal()

Labels

labs()

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894")) +
  theme_minimal() +
  labs(
    title = "Bill dimensions of penguins",
    subtitle = "Palmer penguins dataset",
    caption = "Source: palmerpenguins package",
    x = "Bill length, mm",
    y = "Bill depth, mm",
    color = "Species"
  )

Legends

theme(legend.position = "top")

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894")) +
  theme_minimal() +
  labs(
    title = "Bill dimensions of penguins",
    subtitle = "Palmer penguins dataset",
    caption = "Source: palmerpenguins package",
    x = "Bill length, mm",
    y = "Bill depth, mm",
    color = "Species"
  ) +
  theme(legend.position = "top")

theme(legend.position = "none")

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894")) +
  theme_minimal() +
  labs(
    title = "Bill dimensions of penguins",
    subtitle = "Palmer penguins dataset",
    caption = "Source: palmerpenguins package",
    x = "Bill length, mm",
    y = "Bill depth, mm",
    color = "Species"
  ) +
  theme(legend.position = "none")

Other geometries

geom_smooth()

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species, shape = species)
) +
  geom_point() +
  geom_smooth(method = 'lm', se = FALSE) +
  scale_color_manual(values = c("darkorange","purple","cyan4")) +
  theme_minimal()

Bubble plot

ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, size = body_mass_g, color = species)
) +
  geom_point(alpha = 0.5) +
  scale_color_manual(values = c("darkorange","purple","cyan4")) +
  theme_minimal()

Bar plot

ggplot(
  data = penguins, 
  aes(x = species, fill = species)
) +
  geom_bar() +
  theme_minimal()

Dodge bar plot

ggplot(
  data = penguins, 
  aes(x = species, fill = sex)
) +
  geom_bar(position = "dodge") +
  theme_minimal()

Histogram

ggplot(
  data = penguins, 
  aes(x = body_mass_g, fill = species)
) +
  geom_histogram(alpha = 0.5) +
  theme_minimal()

Density plot

ggplot(
  data = penguins, 
  aes(x = body_mass_g, fill = species)
) +
  geom_density(alpha = 0.5) +
  theme_minimal()

Facets

ggplot(
  data = penguins, 
  aes(x = body_mass_g, fill = species)
) +
  geom_density() +
  theme_minimal() +
  facet_wrap(~ species) # or facet_grid(. ~ species)

Box plot

ggplot(
  data = penguins, 
  aes(x = species, y = body_mass_g, fill = species)
) +
  geom_boxplot() +
  theme_minimal()

Box plot with jitter

ggplot(data = penguins, aes(x = species, y = flipper_length_mm)) +
  geom_boxplot(aes(color = species), width = 0.3, show.legend = FALSE) +
  geom_jitter(aes(color = species), alpha = 0.5, show.legend = FALSE, position = position_jitter(width = 0.2, seed = 0)) +
  scale_color_manual(values = c("darkorange", "purple", "cyan4")) +
  labs(x = "Species",
       y = "Flipper length (mm)") +
  theme_minimal()

ggplot2 extensions

patchwork

library(patchwork)

p1 <- ggplot(
  data = penguins, 
  aes(x = bill_length_mm, y = bill_depth_mm, color = species)
) +
  geom_point() +
  scale_color_manual(values = c("#19a6b3","#f26c0d","#5e3894")) +
  theme_minimal()

p2 <- ggplot(
  data = penguins, 
  aes(x = body_mass_g, fill = species)
) +
  geom_density(alpha = 0.5) +
  theme_minimal() +
  facet_grid(~ species)

p3 <- ggplot(
  data = penguins, 
  aes(x = species, y = body_mass_g, fill = species)
) +
  geom_boxplot() +
  theme_minimal()

(p1 | p2) / p3

ggstatsplot

library(ggstatsplot)

ggscatterstats(
  data = penguins, 
  x = bill_length_mm, 
  y = bill_depth_mm, 
  color = species
)

GGally

library(GGally)

ggpairs(
  data = penguins, 
  columns = c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"),
  mapping = aes(color = species)
)

Additional resources

Questions?



Course materials

imiroshnychenko@kse.org.ua

@araprof

@datamirosh

@ihormiroshnychenko

@aranaur

aranaur.rbind.io