library(ggplot2) # beautiful graphs

library(ggthemes) # nice themes for ggplot2

library(cowplot) # arrrange graphs

library(pander) # nice tables

How to Choose a Chart

Choosing the right chart to represent your data can be a daunting process. I believe that a starting point for this thinking is some basic statistical thinking about the type of variables that you have. At the broadest level, variables may be conceptualized as categorical variables, or continuous variables.

Once we have discerned the type of variable that have, there are two followup questions we may ask before deciding upon a chart strategy:

A Few Notes

A Note About Graph Labels

Graphs should have clear titles and labels.

A Note About Software

The principles of graphing discussed in this document transcend any particular software package, and could be implemented in many different software packages, such as SPSS, SAS, Stata, or R.

The graphs in these particular examples use ggplot2, a graphing library in R. ggplot2 graph syntax can be formidably complex, with a somewhat steep learning curve. More information about ggplot can be found here.

A Note About Graph Colors

This document uses colors based upon official University of Michigan colors. Using colors that match the design scheme of your organization may be helpful.

# michigan colors

michigan_colors=c("#00274c", # blue
                  "#ffcb05", # maize
                  "#a4270b", # tappan red
                  "#e96300", # ross school orange
                  "#beb300", # wave field green
                  "#21c1bc", # taubman teal
                  "#2878ba", # arboretum blue
                  "#7207a5") # ann arbor amethyst

# name individual colors

michigan_blue <- "#00274c"

michigan_maize <- "#ffcb05"

tappan_red <- "#a4270b"

ross_school_orange <- "#e96300"

wave_field_green <- "#beb300"

taubman_teal <- "#21c1bc"

arboretum_blue <- "#2878ba"

ann_arbor_amethyst <- "#7207a5"

A Simulated Data File of Continuous and Categorical Data

A few randomly selected observations…

  x y z u v w s q
287 61.71 113.8 47.32 Group B Group B Group A Group 2 81.71
97 74.06 96.36 117.2 Group A Group A Group A Group 3 104.1
443 116.2 269.1 60.45 Group A Group A Group B Group 2 136.2
89 121.3 104.2 109.2 Group A Group A Group A Group 3 151.3
402 90.86 172.2 82.98 Group A Group A Group B Group 1 100.9
480 112.6 153.7 85.66 Group A Group A Group A Group 2 132.6
519 99.03 84.41 57.17 Group A Group A Group A Group 3 129
685 90.47 63.44 129.8 Group A Group A Group A Group 3 120.5
505 108.2 129.6 65.14 Group A Group A Group A Group 2 128.2
963 114.1 77.75 88.1 Group B Group B Group A Group 1 124.1

One Thing At A Time           Two Things At A Time

Continuous           Continuous By Categorical

# Note that ggplot2 can be MUCH simpler 
# than these examples make it look.
#
# For example,

  ggplot(mydata, aes(x = x)) + geom_histogram()

# will produce a perfectly serviceable histogram.
# 
# Much of the complication of the code in this document is simply
# the result of formatting tweaks to get the graphs EXACTLY
# the way I wanted them.
my_histogram <- ggplot(mydata, aes(x = x)) + 
  geom_histogram(fill = arboretum_blue) + 
  ggtitle("histogram") +
  xlab("continuous") + ylab("count") + 
  theme_minimal()

my_facet_histogram <- ggplot(mydata, aes(x = x)) + 
  geom_histogram(fill = arboretum_blue) + 
  facet_wrap(~w, nrow = 2) + 
  ggtitle("conditional histogram") + 
  xlab("continuous") + ylab("count") + 
  theme_minimal() +
  theme(axis.text=element_text(size = 5)) # small font size for axis

plot_grid(my_histogram, my_facet_histogram, ncol=2)

my_density <- ggplot(mydata, aes(x = y)) + 
  geom_density(fill = michigan_maize) + 
  ggtitle("density") +
  xlab("continuous") + ylab("density") + 
  theme_minimal()

my_facet_density <- ggplot(mydata, aes(x = y)) + 
  geom_density(fill = michigan_maize) + 
  facet_wrap(~w, nrow = 2) +
  ggtitle("conditional density") +
  xlab("continuous") + ylab("density") + 
  theme_minimal() +
  theme(axis.text = element_text(size = 5)) # small font size for axis

plot_grid(my_density, my_facet_density, ncol = 2)

my_boxplot <- ggplot(mydata, aes(x = 2, y = y)) + 
  geom_boxplot(colour=tappan_red) + 
  scale_x_discrete(limit = c(0,1,2)) +
  ggtitle("boxplot") + 
  xlab(" ") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_conditional_boxplot <- ggplot(mydata, aes(x = w, y = y)) + 
  geom_boxplot(colour=tappan_red, width = .5) + 
  ggtitle("conditional boxplot") + 
  xlab("categorical") + ylab("continuous") + 
  theme_minimal()

plot_grid(my_boxplot, my_conditional_boxplot, ncol = 2)

my_m_barchart <- ggplot(mydata, aes(x = 1, y = q, fill = factor(1))) + 
  stat_summary(fun.y = mean, geom = "bar") +
  scale_fill_manual(values = c(arboretum_blue)) + 
  ggtitle("barchart of mean") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_facet_m_barchart <- ggplot(mydata, aes(x = factor(s), y = q, fill = s)) + 
  stat_summary(fun.y = mean, geom = "bar") + 
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("conditional \nbarchart of means") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("mean of continuous") + 
  theme_minimal()

plot_grid(my_m_barchart, 
          my_facet_m_barchart, 
          ncol = 2)

my_m_linechart <- ggplot(mydata, aes(x = factor(s), y = mean(q), group = 1)) + 
  stat_summary(fun.y = mean, geom = "line", size = 2, color = arboretum_blue) +
  geom_blank() +
  ggtitle("linechart of mean") +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_facet_m_linechart <- ggplot(mydata, aes(x = factor(s), y = q, group = 1)) + 
  stat_summary(fun.y = mean, geom = "line", size = 2, color = arboretum_blue) +
  ggtitle("conditional \nlinechart of mean") + 
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() 

plot_grid(my_m_linechart, my_facet_m_linechart)

my_violin <- ggplot(mydata, aes(x = 1, y = y)) + 
  geom_violin(fill=ann_arbor_amethyst) + 
  ggtitle("violin plot") + 
  xlab(" ") + ylab("continuous") + 
  theme_minimal()  +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_facet_violin <- ggplot(mydata, aes(x = 1, y = y)) + 
  geom_violin(fill=ann_arbor_amethyst) + 
  facet_wrap(~w, ncol = 2) + 
  ggtitle("conditional violin plot") + 
  xlab("categorical") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

plot_grid(my_violin, my_facet_violin, ncol = 2)

my_dotplot <- ggplot(mydata, aes(x = y)) + 
  geom_dotplot(colour=wave_field_green, binwidth = 3.0) + 
  ggtitle("dotplot") + 
  xlab("continuous") + ylab("density") + 
  theme_minimal()

my_facet_dotplot <- ggplot(mydata, aes(x = y)) + 
  geom_dotplot(colour=wave_field_green, binwidth = 1.5) + 
  facet_wrap(~w, nrow = 2) + 
  ggtitle("conditional dotplot") + 
  xlab("continuous") + ylab("density") + 
  theme_minimal() +
  theme(axis.text=element_text(size = 5)) # small font size for axis

plot_grid(my_dotplot, my_facet_dotplot, ncol = 2)

One Thing At A Time           Two Things At A Time

Categorical           Categorical By Categorical

my_barchart <- ggplot(mydata, aes(s, fill = s)) + 
  geom_bar(width = 1.0) + 
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal,
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("bar chart") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() 

my_facet_barchart <- ggplot(mydata, aes(s, fill = s)) + 
  geom_bar(width = 1.0) + 
  facet_wrap(~u, ncol = 2) + 
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("conditional bar chart") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() +
  theme(axis.text.x = element_text(size = rel(.6)))

plot_grid(my_barchart, my_facet_barchart, ncol = 2)

my_horiz_barchat <- ggplot(mydata, aes(s, fill = s)) + 
  geom_bar(width = 1.0) + 
  coord_flip() +
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("horizontal bar chart") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() 

my_horiz_facet_barchart <- ggplot(mydata, aes(s, fill = s)) + 
  geom_bar(width = 1.0) + 
  facet_wrap(~u, ncol = 1) +
  coord_flip() +
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("conditional \nhorizontal bar chart") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() +
  theme(axis.text.y = element_text(size = rel(.5)))

plot_grid(my_horiz_barchat, my_horiz_facet_barchart, ncol = 2)

my_pie <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = 1) + 
  coord_polar(theta="y") + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("pie chart") + guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank()) + 
  theme(axis.ticks = element_blank())

my_facet_pie <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = 1, position = "fill") + 
  coord_polar(theta="y") + 
  facet_wrap(~u, ncol = 2) + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("conditional pie chart") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_pie, my_facet_pie, ncol = 2)

my_doughnut <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = .5) + 
  coord_polar(theta="y") + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("doughnut chart") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank()) + 
  theme(axis.ticks = element_blank())

my_facet_doughnut <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = .5, position = "fill") + 
  coord_polar(theta="y") + 
  facet_wrap(~u, ncol = 2) + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("conditional doughnut chart") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_doughnut, my_facet_doughnut, ncol = 2)

Continuous by Continuous

my_scatterplot <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_point(colour = ann_arbor_amethyst) + 
  ggtitle("scatterplot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())
  
my_scatterplot_smoother <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_point(colour = ann_arbor_amethyst) + 
  geom_smooth(method = lm, color = michigan_maize, size = 2) + 
  ggtitle("scatterplot with fit line") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_scatterplot, my_scatterplot_smoother)

my_hexagon <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_hex() + 
  ggtitle("hexagon plot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() + 
  theme(legend.text = element_text(size=4), 
        legend.key.size = unit(.25, "cm")) + 
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank()) 

my_smoother <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_point(colour = ann_arbor_amethyst) + 
  geom_smooth(se=TRUE, color=michigan_maize, size=2) + 
  ggtitle("scatterplot with smoother") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_hexagon, my_smoother)

my_area <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_area(position = "stack", fill = ross_school_orange) +
  ggtitle("area plot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() + 
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

my_contour <- ggplot(mydata, aes(x=x, y=y)) + 
  stat_density_2d(aes(fill = ..level..), geom = "polygon") + 
  ggtitle("contour plot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() + 
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_area, my_contour)

Graphics made with the ggplot2 graphing library created by Hadley Wickham.

Available online at https://agroganweb.wordpress.com/data-visualization-dataviz/