“A grammar of graphics is a tool that enables us to concisely describe the components of a graphic. Such a grammar allows us to move beyond named graphics (e.g., the”scatterplot“) and gain insight into the deep structure that underlies statistical graphics.” — Hadley Wickham
mpg
data framempg %>% print(n = 5)
## # A tibble: 234 x 11
## manufacturer model displ year cyl trans drv cty hwy fl
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
## 1 audi a4 1.8 1999 4 auto(l~ f 18 29 p
## 2 audi a4 1.8 1999 4 manual~ f 21 29 p
## 3 audi a4 2 2008 4 manual~ f 20 31 p
## 4 audi a4 2 2008 4 auto(a~ f 21 30 p
## 5 audi a4 2.8 1999 6 auto(l~ f 16 26 p
## # ... with 229 more rows, and 1 more variable: class <chr>
displ
, a car’s engine size, in litres.hwy
, a car’s fuel efficiency on the highway (in miles per gallon). All 336,776 flights that departed from NYC in 2013
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
colour
instead of color
.ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), size = 0.5) +
facet_wrap(~ class, nrow = 2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), size = 0.5) +
facet_grid(drv ~ cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
mapping
argument.geom_point
but not for geom_line
,ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
?geom_smooth
.ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "subcompact"), se = FALSE)
price, carat, color, clarity
, and cut
for each.print(diamonds)
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39
## # ... with 53,930 more rows
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot2
provides over 20 stats.?stat_bin
.?geom_bar
shows that the default value for stat is “count”.geom_bar()
uses stat_count()
.?stat_count
has a section called “Computed variables” with two new variables: count
and prop
.ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))
ggplot(data = diamonds) +
stat_summary(
mapping = aes(x = cut, y = depth),
fun.min = min,
fun.max = max,
fun = median
)
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))
?position_stack
to learn more.position = "identity"
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 1/5, position = "identity")
?position_identity
to learn more.position = "fill"
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")
?position_fill
to learn more.position = "dodge"
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
?position_dodge
to learn more.position = "jitter"
ggplot(data = mpg, aes(x = displ, y = hwy)) +
geom_point() +
geom_point(position = "jitter", color = "red")
?position_jitter
to learn more.coord_flip()
switches the x and y axes.coord_quickmap()
sets the aspect ratio correctly for maps.coord_polar()
uses polar coordinates.coord_flip()
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
theme(axis.text=element_text(size=5))
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip() +
theme(axis.text=element_text(size=5))
coord_polar()
bar <- ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut),
show.legend = FALSE, width = 1) +
theme(aspect.ratio = 1, axis.text=element_text(size=6)) +
labs(x = NULL, y = NULL)
bar + coord_flip()
bar <- ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut),
show.legend = FALSE, width = 1) +
theme(aspect.ratio = 1, axis.text=element_text(size=6)) +
labs(x = NULL, y = NULL)
bar + coord_polar()
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(
mapping = aes(<MAPPINGS>),
stat = <STAT>,
position = <POSITION>
) +
<COORDINATE_FUNCTION> +
<FACET_FUNCTION>
ggplot(mpg, aes(displ, hwy)) + geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(title = "Fuel efficiency decreases with engine size")
subtitle
: additional details beneath the title.caption
: text at the bottom right of the plot.ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) + geom_smooth(se = FALSE) +
labs(title = "Fuel efficiency decreases with engine size",
subtitle = "Two seaters (sports cars) are an exception because of their light weight",
caption = "Data from fueleconomy.gov")
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(x = "Engine displacement (L)",
y = "Highway fuel economy (mpg)",
color = "Car type")
df <- tibble(x = runif(10),
y = runif(10))
ggplot(df, aes(x, y)) + geom_point() +
labs(x = quote(sum(x[i] ^ 2, i == 1, n)),
y = quote(alpha + beta + frac(delta, theta)))
best_in_class <- mpg %>%
group_by(class) %>%
filter(row_number(desc(hwy)) == 1)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_text(aes(label = model), data = best_in_class, size = 3)
best_in_class <- mpg %>%
group_by(class) %>%
filter(row_number(desc(hwy)) == 1)
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_point(size = 3, shape = 1, data = best_in_class) +
ggrepel::geom_label_repel(aes(label = model), data = best_in_class)
class_avg <- mpg %>% group_by(class) %>%
summarise(displ = median(displ), hwy = median(hwy))
ggplot(mpg, aes(displ, hwy, color = class)) +
ggrepel::geom_label_repel(aes(label = class), data = class_avg,
size = 6, label.size = 0,
segment.color = NA) +
geom_point() + theme(legend.position = "none")
label <- mpg %>%
summarise(displ = max(displ), hwy = max(hwy),
label = "Increasing engine size is
related to decreasing fuel economy.")
ggplot(mpg, aes(displ, hwy)) + geom_point() +
geom_text(aes(label = label), data = label,
vjust = "top", hjust = "right")
label <- tibble(displ = Inf, hwy = Inf,
label = "Increasing engine size is
related to decreasing fuel economy.")
ggplot(mpg, aes(displ, hwy)) + geom_point() +
geom_text(aes(label = label), data = label,
vjust = "top", hjust = "right")
geom_hline()
and geom_vline()
:
size = 2
is often a good idea.geom_rect()
:
xmin, xmax, ymin, ymax
.geom_segment()
with the arrow
argument:
x/xend
and y/yend
define the start/end locations.ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class))
ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
scale_x_continuous() +
scale_y_continuous()
ggplot(diamonds, aes(carat, price)) +
geom_bin2d()
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d()
ggplot(diamonds, aes(carat, price)) +
geom_bin2d() +
scale_x_log10() +
scale_y_log10()
breaks
: controls the position of the ticks, or the values associated with the keys.labels
: controls the text label associated with each tick/key.breaks
ggplot(mpg, aes(displ, hwy)) + geom_point() +
scale_y_continuous(breaks = seq(15, 40, by = 5))
presidential %>%
mutate(id = 33 + row_number()) %>%
ggplot(aes(start, id)) + geom_point() +
geom_segment(aes(xend = end, yend = id)) +
scale_x_date(NULL, breaks = presidential$start, date_labels = "'%y")
labels
ggplot(mpg, aes(displ, hwy)) + geom_point() +
scale_x_continuous(labels = NULL) +
scale_y_continuous(labels = NULL)