library(tibble)
library(forcats)
library(stringr)
suppressPackageStartupMessages(library(dplyr))
library(paletteer)
library(scales)
library(palmerpenguins)
library(rpart)
library(ggdendro)
Display a palette
A large collection of palettes are brought together under a single interface by the paletteer package. It’s used here to load the Van Gogh palette “Starry Night”.
display_palette()
shows the loaded palette with
associated hex codes using the default shape jar
from the
ggfoundry container
set. The outline colour defaults to
mid-grey for better dark-mode support as per this example post.
pal_name <- "vangogh::StarryNight"
pal <- paletteer_d(pal_name)
display_palette(pal, pal_name)
Sunflower plot
Using the palette exhibited above, and inspired by this python
Stack Overflow answer, sunflower shapes combined with
geom_hex()
make possible this kind of ggplot.
Each additional petal reflects an increased range in the count values
as shown in the legend. And the choice of ggplot2 cut,
i.e. cut_number()
, cut_interval()
or
cut_width()
, provides flexibility in how these ranges are
constructed.
shapes <- shapes_cast() |>
filter(set == "flower") |>
pull(shape)
ggplot(diamonds, aes(carat, price)) +
geom_hex(bins = 10, colour = pal[3]) +
geom_casting(
aes(
shape = cut_number(after_stat(count), 8, dig.lab = 4),
group = cut_number(after_stat(count), 8)
),
size = 0.12, bins = 10, stat = "binhex", colour = pal[1], fill = pal[4]
) +
scale_shape_manual(values = shapes) +
scale_y_continuous(labels = label_currency(scale_cut = cut_short_scale())) +
scale_fill_gradient(
low = pal[2], high = pal[1],
labels = label_number(scale_cut = append(cut_short_scale(), 1))
) +
labs(
title = "Sunflower Plot",
shape = "Count\nIntervals",
fill = "Counts", y = "Price", x = "Carat"
) +
theme_bw()
Shapes identified by data
In the sunflower plot, scale_shape_manual()
specifies
the desired shapes. Alternatively, the data may already specify their
identity as illustrated below using Allison Horst’s palmerpenguins
dataset and ggfoundry’s penguin-set shapes.
count_df <- penguins |>
filter(!is.na(body_mass_g)) |>
mutate(
species = str_to_lower(species),
cut_mass = cut_width(body_mass_g, width = 500, dig.lab = 4)
) |>
count(species, island, cut_mass)
count_df |>
ggplot(aes(species, cut_mass, fill = species)) +
geom_casting(aes(shape = species), size = 0.25) +
geom_text(aes(label = n), size = 3, nudge_y = 0.2, nudge_x = 0.1) +
scale_discrete_identity(aesthetics = "shape", guide = "legend") +
facet_wrap(~island, scales = "free_x") +
labs(
title = "Palmer Penguins",
subtitle = "Counts by Species, Island & Body Mass Ranges",
shape = NULL, fill = NULL, x = NULL, y = "Body Mass (g)"
) +
theme_bw() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
guides(shape = guide_legend(override.aes = list(size = 8)))
Leafy dendrograms
Adding appropriate filled shapes to a dendrogram can help draw attention to important groupings. Shapes from ggfoundry’s “leaf” set are used here to augment a ggdendro plot.
data <-
rpart(Kyphosis ~ Age + Number + Start, data = kyphosis) |>
dendro_data()
ggplot() +
geom_segment(
aes(x, y, xend = xend, yend = yend),
colour = "tan4", data = data$segments,
) +
geom_label(
aes(x, y, label = label),
size = 3, fill = "seashell", data = data$labels) +
geom_casting(aes(x, y, shape = label, fill = label),
colour = "tan4", size = 0.27, data = data$leaf_labels) +
scale_shape_manual(values = c("hibiscus", "oak")) +
scale_fill_manual(values = c("olivedrab3", "darkorange")) +
labs(title = "Leafy Dendrogram", shape = "Kyphosis", fill = "Kyphosis") +
theme_dendro() +
theme(
plot.title = element_text(hjust = 0.5),
legend.key.size = unit(2, "line"),
legend.position = "top"
)