Last week my colleague showed me a drawn graph and asked can we plot this kind of graph. It is a graph with grouped boxplot. Additionally, it contains a trend line that connects mean of each data box. So this post explains how to draw a graph like that using two ways;
This data is sample data set from my colleague. The data contains VI (Vegetation Index) time series values for three different crops extracted from Landsat 8 data.
library(tidyverse)
library(RColorBrewer)
# Read data
read.df <-
read.csv("D:/Blog_data/jblog/static/data/sample_ts.csv", header = TRUE)
# Make a copy of the read table
sample.df <- read.df
# Checking the levels in the date column
levels(sample.df$date)
>> [1] "04-Jul-15" "07-Aug-15" "10-May-15" "24-Apr-15"
# Checking the levels in the crop column
levels(sample.df$crop)
>> [1] "maize" "potato" "sugar beet"
# Reorder levels
sample.df$date <-
factor(
sample.df$date,
levels = c("24-Apr-15", "10-May-15",
"04-Jul-15", "07-Aug-15"),
labels = c("April", "May", "July", "August")
)
# Create a color palette for three crops
my_colors = c("greenyellow", "gold3", "darksalmon")
# Create a plot uisng ggplot
ggplot(data = sample.df, aes(x = date, y = NDVI, fill = crop)) +
geom_boxplot() + ylim(c(-0.2, 1.2)) +
stat_summary(
fun.y = mean,
colour = "red",
geom = "point",
position = position_dodge(width = 0.75)
) +
stat_summary(
fun.y = mean,
colour = "red",
aes(group = 1),
geom = "line",
lwd = 1,
lty = 1
) +
theme_bw(base_size = 12) +
scale_fill_manual(
values = my_colors,
name = "Crop Type",
labels = c("Maize", "Potato", "Sugar Beet")
) +
labs(
x = "Month",
y = "VI",
title = "Phenology of the selected crops",
subtitle = "VI extracted from Landsat 8 time series data"
)
# Make the boxplot using base
boxplot(
sample.df$NDVI ~ sample.df$crop + sample.df$date ,
ylim = c(-0.2, 1.2),
xaxt = "n" ,
xlab = "Date" ,
col = my_colors ,
pch = 20 ,
cex = 0.3 ,
ylab = "VI",
xlab = "Month",
main = "Phenology of the selected crops \n VI extracted from Landsat 8 time series data"
)
abline(v = seq(0, 3 * 4, 3) + 0.5 , col = "grey")
axis(1, labels = levels(sample.df$date) , at = seq(2, 3 * 4, 3))
# Add general trend
a = aggregate(sample.df$NDVI , by = list(sample.df$crop, sample.df$date) , mean)
lines(a[, 3], type = "l" , col = "red" , lwd = 2)
#Add legend for crops
legend(
"top",
fill = my_colors,
legend = c("Maize", "Potato", "Sugar Beet"),
horiz = T
)