Exploring Data with R
Abhik Seal
May 8, 2014
This is a introductory tutorial to get you started with Visualization data and Exploring Data with R. There are some popular books and many online materials i will Provide the links and references at the end of the tutorial.
library(ggplot2) library(gcookbook)
Scatter Plots and line plots
plot(cars$dist~cars$speed, # y~x
main="Relationship between car distance & speed", #Plot Title xlab="Speed (miles per hour)", #X axis title
ylab="Distance travelled (miles)", #Y axis title xlim=c(0,30), #Set x axis limits from 0 to 30 yaxs="i", #Set y axis style as internal
col="red", #Set the colour of plotting symbol to red
pch=19) #Set the plotting symbol to filled dots
0
5
10
15
20
25
30
20
40
60
80
120
Relationship between car distance & speed
Speed (miles per hour)
Distance tr
a
v
elled (miles)
plot(mpg~disp,data=mtcars) arrows(x0=mtcars$disp, y0=mtcars$mpg*0.95, x1=mtcars$disp, y1=mtcars$mpg*1.05, angle=90, code=3, length=0.04, lwd=0.4)
100
200
300
400
10
15
20
25
30
disp
mpg
How to draw histograms in the top and right margins of a bivariate scatter plot
layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE) par(mar=c(5.1,4.1,0.1,0))
plot(cars$dist~cars$speed, # y~x
xlab="Speed (miles per hour)", #X axis title ylab="Distance travelled (miles)", #Y axis title
xlim=c(0,30), #Set x axis limits from 0 to 30 ylim=c(0,140), #Set y axis limits from 0 to 30140 xaxs="i", #Set x axis style as internal yaxs="i", #Set y axis style as internal
col="red", #Set the colour of plotting symbol to red
pch=19) #Set the plotting symbol to filled dots
par(mar=c(0,4.1,3,0))
hist(cars$speed,ann=FALSE,axes=FALSE,col="black",border="white")
yhist <- hist(cars$dist,plot=FALSE)
par(mar=c(5.1,0,0.1,1)) barplot(yhist$density,
horiz=TRUE,space=0,axes=FALSE,
col="black",border="white")
0 5 10 15 20 25 30 20 40 60 80 100 120
Speed (miles per hour)
Distance tr
a
v
elled (miles)
#Using ggplot library
10 15 20 25 30 35 2 3 4 5
wt
mpg
# Multiple lines in a plot
plot(pressure$temperature, pressure$pressure, type="l")
points(pressure$temperature, pressure$pressure)
lines(pressure$temperature, pressure$pressure/2, col="red") points(pressure$temperature, pressure$pressure/2, col="red")
0
50
150
250
350
0
200
400
600
800
pressure$temperature
pressure$pressure
ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()
0 200 400 600 800 0 100 200 300
temperature
pressure
# Lines and points together
ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point() 0 200 400 600 800 0 100 200 300
temperature
pressure
# Showing Lines Along the Axes
ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point() +
0 200 400 600 800 0 100 200 300
temperature
pressure
# Logarithmic axisggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point() +
theme(axis.line = element_line(colour="black")) + scale_x_log10() + scale_y_log10()
1e−03 1e−01 1e+01 1e+03 100
temperature
pressure
From library(gcookbook) I am using heightweight dataset to group data points by variables, The grouping variable must be categorical—in other words, a factor or character vector.
# Other shapes and color can be used by scale_shape_manual() scale_colour_manual()
ggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex, colour=sex)) + geom_point()
50 55 60 65 70 12 14 16
ageYear
heightIn
sex f m# Change shape of points
ggplot(heightweight, aes(x=ageYear, y=heightIn)) + geom_point(shape=3) 50 55 60 65 70 12 14 16
ageYear
heightIn
# Change point size sex is categorical
ggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex)) + geom_point(size=3) + scale_shape_manual(values=c(1, 4)) 50 55 60 65 70 12 14 16
ageYear
heightIn
sex f m# Represent a third continuous variable using color or size.
ggplot(heightweight, aes(x=weightLb, y=heightIn, fill=ageYear)) + geom_point(shape=21, size=2.5) +
scale_fill_gradient(low="black", high="white", breaks=12:17,
50 55 60 65 70 50 75 100 125 150 175
weightLb
heightIn
ageYear 12 13 14 15 16 17Adding Fitted Regression Model Lines
sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn))
sp + geom_point() + stat_smooth(method=lm)
50 55 60 65 70 12 14 16
ageYear
heightIn
# Adding annotations to regression plot
model <- lm(heightIn ~ ageYear, heightweight)
summary(model)
# First generate prediction data
# Given a model, predict values of yvar from xvar
# This supports one predictor and one predicted variable
# xrange: If NULL, determine the x range from the model object. If a vector with # two numbers, use those as the min and max of the prediction range.
# samples: Number of samples across the x range. # ...: Further arguments to be passed to predict()
predictvals <- function(model, xvar, yvar, xrange=NULL, samples=100, ...) {
# If xrange isn't passed in, determine xrange from the models. # Different ways of extracting the x range, depending on model type
if (is.null(xrange)) {
if (any(class(model) %in% c("lm", "glm")))
xrange <- range(model$model[[xvar]])
else if (any(class(model) %in% "loess"))
xrange <- range(model$x)
}
newdata <- data.frame(x = seq(xrange[1], xrange[2], length.out = samples))
names(newdata) <- xvar
newdata[[yvar]] <- predict(model, newdata = newdata, ...)
newdata }
pred <- predictvals(model, "ageYear", "heightIn")
sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) +
geom_point() + geom_line(data=pred)
r
2=
0.42
50 55 60 65 70 12 14 16ageYear
heightIn
Scatter plot matrix and correlation matrix using mtcars dataset and first five variables library(corrplot) pairs(mtcars[,1:5])
mpg
4 6 8 50 250 10 25 4 6 8cyl
disp
100 400 50 250hp
10 25 100 400 3.0 4.5 3.0 4.5drat
# Scatter plot with correlations in the upper triangle, smoothing lines in the # lower triangle, and histograms on the diagonal
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {
usr <- par("usr") on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2)
}
panel.hist <- function(x, ...) { usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks
nB <- length(breaks)
y <- h$counts y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)
}
pairs(mtcars[,1:5], upper.panel = panel.cor,
diag.panel = panel.hist, lower.panel = panel.smooth)
mpg
4 6 80.85
0.85
50 2500.78
10 250.68
4 6 8cyl
0.90
0.83
0.70
disp
0.79
100 4000.71
50 250hp
0.45 10 25 100 400 3.0 4.5 3.0 4.5drat
mcor <- cor(mtcars) corrplot(mcor) −1 −0.8 −0.6 −0.4 −0.2 0 0.2 0.4 0.6 0.8 1
mpg
cyl
disp
hp
dr
at
wt
qsec
vs
am
gear
carb
mpg
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
# Correlation matrix with colored squares and black, rotated labels
−1 −0.8 −0.6 −0.4 −0.2 0 0.2 0.4 0.6 0.8 1
mpg cyl disp hp dr
at
wt qsec vs
am gear carb
mpg
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
# create a three-dimensional (3D) scatter plot.
library(rgl)
plot3d(mtcars$wt, mtcars$disp, mtcars$mpg, type="s", size=0.75, lit=FALSE)
# add vertical segments to help give a sense of the spatial positions of the points
interleave <- function(v1, v2) as.vector(rbind(v1,v2))
# Plot the points
plot3d(mtcars$wt, mtcars$disp, mtcars$mpg,
xlab="Weight", ylab="Displacement", zlab="MPG",
size=.75, type="s", lit=FALSE)
# Add the segments
segments3d(interleave(mtcars$wt, mtcars$wt), interleave(mtcars$disp, mtcars$disp), interleave(mtcars$mpg, min(mtcars$mpg)),
alpha=0.4, col="blue") Scattter plot with jitter rugs,spikes and density
x <- rnorm(1000, 50, 30)
y <- 3*x + rnorm(1000, 0, 20)
require(Hmisc)
plot(x,y)
#scat1d adds tick marks (bar codes. rug plot) # on any of the four sides of an existing plot,
# corresponding with non-missing values of a vector x.
scat1d(x, col = "red") # density bars on top of graph scat1d(y, 4, col = "blue") # density bars at right
−50
0
50
100
150
−200
0
100
200
300
400
x
y
plot(x,y, pch = 20)histSpike(x, add=TRUE, col = "green4", lwd = 2) histSpike(y, 4, add=TRUE,col = "blue", lwd = 2 )
histSpike(x, type='density',col = "red", add=TRUE) # smooth density at bottom
−50
0
50
100
150
−200
0
100
200
300
400
x
y
Bar graphs and Histograms
1
2
3
4
5
7
0
5
10
15
# Using the table function
barplot(table(mtcars$cyl))
4
6
8
0
2
4
6
8
10
14
qplot(BOD$Time, BOD$demand, geom="bar", stat="identity") 0 5 10 15 20 2 4 6
BOD$Time
BOD$demand
# Conisdering facotr0 5 10 15 20 1 2 3 4 5 7
factor(BOD$Time)
BOD$demand
# cyl is continuous here
qplot(mtcars$cyl) 0 5 10 4 5 6 7 8
mtcars$cyl
count
# Treat cyl as discrete
qplot(factor(mtcars$cyl))
0 5 10 4 6 8
factor(mtcars$cyl)
count
# Bar graph of values. This uses the BOD data frame, with the # "Time" column for x values and the "demand" column for y values.
ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")
0 5 10 15 20 2 4 6
Time
demand
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar(fill="white",color="black")
0 5 10 4 6 8
factor(cyl)
count
# Specify approximate number of bins with breaks
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(binwidth=4,fill="white", colour="black")
0 2 4 6 8 10 20 30 40
mpg
count
# Change the x axis origin using origin parameter
ggplot(mtcars, aes(x=mpg)) +
0 2 4 6 20 25 30 35
mpg
count
Histograms of multiple groups of data library(MASS)
ggplot(heightweight, aes(x=heightIn)) +
geom_histogram(fill="white", colour="black") + facet_grid(sex ~ .)
0 5 10 15 20 0 5 10 15 20 f m 50 55 60 65 70
heightIn
count
hw<-heightweight# Using plyr and revalue() to change the names on sex variable
library(plyr)
hw$sex<- revalue(hw$sex,c("f"="Female","m"="Male"))
# Using facetting
ggplot(hw, aes(x=heightIn)) +
geom_histogram(fill="white", colour="black") + facet_grid(sex ~ .)
0 5 10 15 20 0 5 10 15 20 F emale Male 50 55 60 65 70
heightIn
count
ggplot(hw, aes(x=heightIn, y = ..density.. ,fill=sex)) + geom_histogram(position="identity",alpha=0.4)+
theme_bw()+geom_density(alpha=0.3) 0.00 0.05 0.10 0.15 0.20 0.25 50 55 60 65 70
heightIn
density
sex Female MaleNegative and Positive Bar plot
csub <- subset(climate, Source=="Berkeley" & Year >= 1900)
head(csub)
csub$pos <- csub$Anomaly10y >= 0
ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) +
geom_bar(stat="identity", color="black",position="identity")
0.0 0.5 1920 1950 1980
Year
Anomaly10y
pos FALSE TRUEError Bar plot in ggplot2
myd <- data.frame (X = c(1:12,1:12),
Y = c(8, 12, 13, 18, 22, 16, 24, 29, 34, 15, 8, 6, 9, 10, 12, 18, 26, 28, 28, 30, 20, 10, 9, 9),
group = rep (c("X-Group", "Y-group"), each = 12),
error = rep (c(2.5, 3.0), each = 12))
plt = ggplot(data = myd, aes(x=X, y=Y, fill=group, width=0.8) ) +
geom_errorbar(aes(ymin=Y, ymax=Y+error, width = 0.2),
position=position_dodge(width=0.8)) +
geom_bar(stat="identity", position=position_dodge(width=0.8)) + geom_bar(stat="identity", position=position_dodge(width=0.8),
colour="black", legend=FALSE) +
scale_fill_manual(values=c("grey70", "white")) + scale_x_discrete("X", limits=c(1:12)) +
scale_y_continuous("Y (units)", expand=c(0,0),
limits = c(0, 40), breaks=seq(0, 40, by=5)) + ggtitle ("My nice plot") +
theme_bw() +
axis.title.x = element_text(face="bold", size=12),
axis.title.y = element_text(face="bold", size=12, angle=90),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y=element_text(angle=90, hjust=0.5),
legend.title = element_blank(),
legend.position = c(0.85,0.85),
legend.key.size = unit(1.5, "lines"),
legend.key = element_rect() ) plt 0 5 10 15 20 25 30 35 40 1 2 3 4 5 6 7 8 9 10 11 12
X
Y (units)
X−Group Y−groupMy nice plot
Box plots
# Using the ToothGrowth dataset # Formula syntax
OJ
VC
5
10
15
20
25
30
35
# Put interaction of two variables on x-axis
boxplot(len ~ supp + dose, data = ToothGrowth)
OJ.0.5
OJ.1
OJ.2
5
10
15
20
25
30
35
ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot() 10 20 30 OJ VC
supp
len
# Adding notchesggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot(notch=TRUE)
10 20 30 OJ VC
supp
len
# Adding meanggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot() +
stat_summary(fun.y="mean", geom="point", shape=24, size=4, fill="white")
10 20 30 OJ VC
supp
len
# Using three separate vectors
ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()
10 20 30
OJ.0.5 VC.0.5 OJ.1 VC.1 OJ.2 VC.2
interaction(supp, dose)
len
Violin plots are a way of comparing multiple data distributions
# Use the heightweight datasets
p <- ggplot(heightweight, aes(x=sex, y=heightIn))
p + geom_violin(trim=FALSE,adjuts=2)+
geom_boxplot(width=.1, fill="Grey", outlier.colour=NA)+ theme_bw()+
50 60 70 f m
sex
heightIn
Plotting curves
−4
−2
0
2
4
−40
−20
0
20
40
x
x^3 − 5 * x
# Plot a user-defined function
myfun <- function(xvar) {
1/(1 + exp(-xvar + 10))
}
curve(myfun(x), from=0, to=20)
# Add a line:
0
5
10
15
20
0.0
0.2
0.4
0.6
0.8
1.0
x
m
yfun(x)
# This sets the x range from 0 to 20
ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun, geom="line")
0.00 0.25 0.50 0.75 1.00 0 5 10 15 20
x
y
Miscellaneous plots
Making Density Plot of Two-Dimensional Data
p <- ggplot(faithful, aes(x=eruptions, y=waiting))
p + geom_point() + stat_density2d() 50 60 70 80 90 2 3 4 5
eruptions
w
aiting
50 60 70 80 90 2 3 4 5
eruptions
w
aiting
0.005 0.010 0.015 0.020 levelp + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)
50 60 70 80 90 2 3 4 5
eruptions
w
aiting
0.005 0.010 0.015 0.020 0.025 density# With points, and map density estimate to alpha
p + geom_point() +
stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)
50 60 70 80 90 2 3 4 5
eruptions
w
aiting
density 0.005 0.010 0.015 0.020 0.025Plotting Pie Charts library(RColorBrewer)
slices <- c(10, 12,4, 16, 8)
lbls <- c("IN", "AK", "ID", "MA", "MO")
IN
AK
ID
MA
MO
Pie Chart of Countries
Pie Chart with Percentages
slices <- c(10, 12, 4, 16, 8)
lbls <- c("IN", "AK", "ID", "MA", "MO")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, col=rainbow(length(lbls)),
IN 20%
AK 24%
ID 8%
MA 32%
MO 16%
Pie Chart of US States
3D Pie chart library(plotrix)
slices <- c(10, 12, 4, 16, 8)
lbls <- c("IN", "AK", "ID", "MA", "MO") pie3D(slices,labels=lbls,explode=0.1,
Pie Chart of Countries
IN
AK
ID
MA
MO
A dendrogram is the fancy word that we use to name a tree diagram to display the groups formed by hierarchical clustering. # Using Corrgrams package
library(corrgram)
R <- cor(mtcars)
# default corrgram
mpg cyl disp hp drat wt qsec vs am gear carb
# corrgram with pie charts
corrgram(R, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie,
text.panel = panel.txt, main = "mtcars Data")
gear am drat mpg vs qsec wt disp cyl hp carb
mtcars Data
The package ellipse provides the function plotcorr() that helps us to visualize correlations. plotcorr() uses ellipse-shaped glyphs for each entry of the correlation matrix. Here’s the default plot using our matrix of R:
# default corrgram library(ellipse) plotcorr(R)
mpg
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
mpg
cyl
disp
hp
dr
at
wt
qsec
vs
am
gear
carb
# colored corrgram
mpg
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
mpg
cyl
disp
hp
dr
at
wt
qsec
vs
am
gear
carb
Another colored corrgram
plotcorr(R, col = colorRampPalette(c("#E08214", "white", "#8073AC"))(10), type = "lower")
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
Visualizing Dendrograms
# prepare hierarchical cluster
hc = hclust(dist(mtcars))
plot(hc, hang = -1) ## labels at the same level
Maser
ati Bor
a
Chr
ysler Imper
ial
Cadillac Fleetw
ood
Lincoln Continental
F
ord P
anter
a L
Duster 360
Camaro Z28
Hor
net Spor
tabout
P
ontiac Firebird Hor
net 4 Dr
iv
e
V
aliant
Merc 450SLC
Merc 450SE Merc 450SL
Dodge Challenger
AMC J
a
v
elin
Honda Civic
T
o
y
ota Corolla
Fiat 128
Fiat X1−9
F
err
ar
i Dino
Lotus Europa
Merc 230
V
olv
o 142E
Datsun 710
T
o
y
ota Corona
P
orsche 914−2
Merc 240D Mazda RX4
Mazda RX4 W
ag
Merc 280
Merc 280C
0
300
Cluster Dendrogram
hclust (*, "complete")
dist(mtcars)
Height
An alternative way to produce dendrograms is to specifically convert hclust objects into dendrograms objects.
# using dendrogram objects
hcd = as.dendrogram(hc)
# alternative way to get a dendrogram
0
100
200
300
400
Maser
ati Bor
a
Chr
ysler Imper
ial
Cadillac Fleetw
ood
Lincoln Continental
F
ord P
anter
a L
Duster 360
Camaro Z28
Hor
net Spor
tabout
P
ontiac Firebird Hor
net 4 Dr
iv
e
V
aliant
Merc 450SLC
Merc 450SE Merc 450SL
Dodge Challenger
AMC J
a
v
elin
Honda Civic
T
o
y
ota Corolla
Fiat 128
Fiat X1−9
F
err
ar
i Dino
Lotus Europa
Merc 230
V
olv
o 142E
Datsun 710
T
o
y
ota Corona
P
orsche 914−2
Merc 240D Mazda RX4
Mazda RX4 W
ag
Merc 280
Merc 280C
Having an object of class dendrogram, we can also plot the branches in a triangular form.
# using dendrogram objects
0
100
200
300
400
Maser
ati Bor
a
Chr
ysler Imper
ial
Cadillac Fleetw
ood
Lincoln Continental
F
ord P
anter
a L
Duster 360
Camaro Z28
Hor
net Spor
tabout
P
ontiac Firebird Hor
net 4 Dr
iv
e
V
aliant
Merc 450SLC
Merc 450SE Merc 450SL
Dodge Challenger
AMC J
a
v
elin
Honda Civic
T
o
y
ota Corolla
Fiat 128
Fiat X1−9
F
err
ar
i Dino
Lotus Europa
Merc 230
V
olv
o 142E
Datsun 710
T
o
y
ota Corona
P
orsche 914−2
Merc 240D Mazda RX4
Mazda RX4 W
ag
Merc 280
Merc 280C
Phylogenetic trees
library(ape)# plot basic tree
Mazda RX4
Mazda RX4 Wag
Datsun 710
Hornet 4 Drive
Hornet Sportabout
Valiant
Duster 360
Merc 240D
Merc 230
Merc 280
Merc 280C
Merc 450SE
Merc 450SL
Merc 450SLC
Cadillac Fleetwood
Lincoln Continental
Chrysler Imperial
Fiat 128
Honda Civic
Toyota Corolla
Toyota Corona
Dodge Challenger
AMC Javelin
Camaro Z28
Pontiac Firebird
Fiat X1−9
Porsche 914−2
Lotus Europa
Ford Pantera L
Ferrari Dino
Maserati Bora
Volvo 142E
# fanMazda RX4
Mazda RX4 W
ag
Datsun 710
Hor
net 4 Dr
iv
e
Hor
net Spor
tabout
V
aliant
Duster 360
Merc 240D
Merc 230
Merc 280
Merc 280C
Merc 450SE
Merc 450SL
Merc 450SLC
Cadillac Fleetw
ood
Lincoln Continental
Chrysler Imper
ial
Fiat 128
Honda Civic
Toyota Corolla
T
o
y
ota Corona
Dodge Challenger
AMC J
avelin
Camaro Z28
P
ontiac Firebird
Fiat X1−9
P
orsche 914−2
Lotus Europa
Ford P
anter
a L
Ferr
ar
i Dino
Maserati Bora
V
olv
o 142E
# add colors randomly
plot(as.phylo(hc), type = "fan", tip.color = hsv(runif(15, 0.65, 0.95), 1, 1, 0.7),
edge.color = hsv(runif(10, 0.65, 0.75), 1, 1, 0.7),
Mazda RX4
Mazda RX4 W
ag
Datsun 710
Hor
net 4 Dr
iv
e
Hor
net Spor
tabout
V
aliant
Duster 360
Merc 240D
Merc 230
Merc 280
Merc 280C
Merc 450SE
Merc 450SL
Merc 450SLC
Cadillac Fleetw
ood
Lincoln Continental
Chrysler Imper
ial
Fiat 128
Honda Civic
Toyota Corolla
T
o
y
ota Corona
Dodge Challenger
AMC J
avelin
Camaro Z28
P
ontiac Firebird
Fiat X1−9
P
orsche 914−2
Lotus Europa
Ford P
anter
a L
Ferr
ar
i Dino
Maserati Bora
V
olv
o 142E
Triple heat map plot library(reshape2) library (grid) library(ggplot2)
#X axis quantitaive ggplot data
datfx <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),
levels =rev(paste("ID", 1:20, sep = ""))), matrix(sample(LETTERS[1:7],80, T), ncol = 4))
# converting data to long form for ggplot2 use
datf1x <- melt(datfx, id.var = 'indv')
plotx <- ggplot(datf1x, aes(indv, variable)) +
geom_tile(aes(fill = value),colour = "white") + scale_fill_manual(values= terrain.colors(7))+ scale_x_discrete(expand=c(0,0))
px <- plotx
#Y axis quantitaive ggplot data
datfy <- data.frame(indv=factor(paste("ID", 21:40, sep = ""),
levels =rev(paste("ID",21:40, sep = ""))), matrix(sample(LETTERS[7:10],100, T), ncol = 5))
# converting data to long form for ggplot2 use
datf1y <- melt(datfy, id.var = 'indv')
ploty <- ggplot(datf1y, aes( variable, indv)) + geom_tile(aes(fill = value),
colour = "white") +
scale_fill_manual(values= c("cyan4", "midnightblue", "green2", "lightgreen")) + scale_x_discrete(expand=c(0,0))
py <- ploty + theme(legend.position="left", axis.title=element_blank())
# plot XY quantative fill
datfxy <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),
levels =rev(paste("ID", 1:20, sep = ""))), matrix(rnorm (400, 50, 10), ncol = 20)) names (datfxy) <- c("indv",paste("ID", 21:40, sep = ""))
datfxy <- melt(datfxy, id.var = 'indv')
levels (datfxy$ variable) <- rev(paste("ID", 21:40, sep = ""))
pxy <- plotxy <- ggplot(datfxy, aes(indv, variable)) +
geom_tile(aes(fill = value),colour = "white") + scale_fill_gradient(low="red", high="yellow") + theme(axis.title=element_blank())
# Define layout for the plots (2 rows, 2 columns)
layt<-grid.layout(nrow=2,ncol=2,heights=c(6/8,2/8),widths=c(2/8,6/8),default.units=c('null','null'))
#View the layout of plots
(1, 1) 0.75null 0.25null (1, 2) 0.75null 0.75null (2, 1) 0.25null 0.25null (2, 2) 0.75null 0.25null
#Draw plots one by one in their positions
grid.newpage()
pushViewport(viewport(layout=layt))
print(py,vp=viewport(layout.pos.row=1,layout.pos.col=1)) print(pxy,vp=viewport(layout.pos.row=1,layout.pos.col=2)) print(px,vp=viewport(layout.pos.row=2,layout.pos.col=2))
ID40 ID39 ID38 ID37 ID36 ID35 ID34 ID33 ID32 ID31 ID30 ID29 ID28 ID27 ID26 ID25 ID24 ID23 ID22 ID21 X1 X2 X3 X4 X5 value G H I J ID40 ID39 ID38 ID37 ID36 ID35 ID34 ID33 ID32 ID31 ID30 ID29 ID28 ID27 ID26 ID25 ID24 ID23 ID22 ID21
ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1
30 40 50 60 70 value X1 X2 X3 X4
ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1 indv v ar iab le value A B C D E F G
Mosaic plot for categorical data
myd <- data.frame (fact1 = sample (c("A", "B", "C", "D"), 200, replace = TRUE),
fact2 = sample (c("HL", "PS", "DS"), 200, replace = TRUE),
fact3 = sample (c("Male", "Female"), 200, replace = TRUE))
#plot
# vcd package is for visualization of categorical data
require(vcd)
mytable <- table (myd)
−1.5
0.0
1.9
Pearson
residuals:
p−value =
0.27
fact2
fact1
fact3
D
Male
F
emale
C
Male
F
emale
B
Male
F
emale
A
DS
HL
PS
Male
F
emale
References
1.R Graphics Cookbook2.ggplot2 book by Hadley Wickham
3.R graphs examples
4.R Graph cookbook