Visualization

(1)

Exploring Data with R

Abhik Seal

May 8, 2014

This is a introductory tutorial to get you started with Visualization data and Exploring Data with R. There are some popular books and many online materials i will Provide the links and references at the end of the tutorial.

library(ggplot2) library(gcookbook)

Scatter Plots and line plots

plot(cars$dist~cars$speed, # y~x

main="Relationship between car distance & speed", #Plot Title xlab="Speed (miles per hour)", #X axis title

ylab="Distance travelled (miles)", #Y axis title xlim=c(0,30), #Set x axis limits from 0 to 30 yaxs="i", #Set y axis style as internal

col="red", #Set the colour of plotting symbol to red

pch=19) #Set the plotting symbol to filled dots

0

5

10

15

20

25

30

20

40

60

80

120 Relationship between car distance & speed

Speed (miles per hour)

Distance tr

a

v

elled (miles)

(2)

plot(mpg~disp,data=mtcars) arrows(x0=mtcars$disp, y0=mtcars$mpg*0.95, x1=mtcars$disp, y1=mtcars$mpg*1.05, angle=90, code=3, length=0.04, lwd=0.4)

100

200

300

400

10

15

20

25

30 disp

mpg

How to draw histograms in the top and right margins of a bivariate scatter plot

layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE) par(mar=c(5.1,4.1,0.1,0))

plot(cars$dist~cars$speed, # y~x

xlab="Speed (miles per hour)", #X axis title ylab="Distance travelled (miles)", #Y axis title

xlim=c(0,30), #Set x axis limits from 0 to 30 ylim=c(0,140), #Set y axis limits from 0 to 30140 xaxs="i", #Set x axis style as internal yaxs="i", #Set y axis style as internal

col="red", #Set the colour of plotting symbol to red

pch=19) #Set the plotting symbol to filled dots

par(mar=c(0,4.1,3,0))

hist(cars$speed,ann=FALSE,axes=FALSE,col="black",border="white")

yhist <- hist(cars$dist,plot=FALSE)

par(mar=c(5.1,0,0.1,1)) barplot(yhist$density,

(3)

horiz=TRUE,space=0,axes=FALSE,

col="black",border="white")

0 5 10 15 20 25 30 20 40 60 80 100 120

Speed (miles per hour)

Distance tr

a

v

elled (miles)

#Using ggplot library

(4)

10 15 20 25 30 35 2 3 4 5

wt

mpg

# Multiple lines in a plot

plot(pressure$temperature, pressure$pressure, type="l")

points(pressure$temperature, pressure$pressure)

lines(pressure$temperature, pressure$pressure/2, col="red") points(pressure$temperature, pressure$pressure/2, col="red")

(5)

0

50

150

250

350

0

200

400

600

800 pressure$temperature

pressure$pressure

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()

0 200 400 600 800 0 100 200 300

temperature

pressure

(6)

# Lines and points together

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point() 0 200 400 600 800 0 100 200 300

temperature

pressure

# Showing Lines Along the Axes

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point() +

(7)

0 200 400 600 800 0 100 200 300

temperature

pressure

# Logarithmic axis

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point() +

theme(axis.line = element_line(colour="black")) + scale_x_log10() + scale_y_log10()

(8)

1e−03 1e−01 1e+01 1e+03 100

temperature

pressure

From library(gcookbook) I am using heightweight dataset to group data points by variables, The grouping variable must be categorical—in other words, a factor or character vector.

# Other shapes and color can be used by scale_shape_manual() scale_colour_manual()

ggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex, colour=sex)) + geom_point()

(9)

50 55 60 65 70 12 14 16

ageYear

heightIn

sex f m

# Change shape of points

ggplot(heightweight, aes(x=ageYear, y=heightIn)) + geom_point(shape=3) 50 55 60 65 70 12 14 16

ageYear

heightIn

(10)

# Change point size sex is categorical

ggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex)) + geom_point(size=3) + scale_shape_manual(values=c(1, 4)) 50 55 60 65 70 12 14 16

ageYear

heightIn

sex f m

# Represent a third continuous variable using color or size.

ggplot(heightweight, aes(x=weightLb, y=heightIn, fill=ageYear)) + geom_point(shape=21, size=2.5) +

scale_fill_gradient(low="black", high="white", breaks=12:17,

(11)

50 55 60 65 70 50 75 100 125 150 175

weightLb

heightIn

ageYear 12 13 14 15 16 17

Adding Fitted Regression Model Lines

sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn))

sp + geom_point() + stat_smooth(method=lm)

50 55 60 65 70 12 14 16

ageYear

heightIn

(12)

# Adding annotations to regression plot

model <- lm(heightIn ~ ageYear, heightweight)

summary(model)

# First generate prediction data

# Given a model, predict values of yvar from xvar

# This supports one predictor and one predicted variable

# xrange: If NULL, determine the x range from the model object. If a vector with # two numbers, use those as the min and max of the prediction range.

# samples: Number of samples across the x range. # ...: Further arguments to be passed to predict()

predictvals <- function(model, xvar, yvar, xrange=NULL, samples=100, ...) {

# If xrange isn't passed in, determine xrange from the models. # Different ways of extracting the x range, depending on model type

if (is.null(xrange)) {

if (any(class(model) %in% c("lm", "glm")))

xrange <- range(model$model[[xvar]])

else if (any(class(model) %in% "loess"))

xrange <- range(model$x)

}

newdata <- data.frame(x = seq(xrange[1], xrange[2], length.out = samples))

names(newdata) <- xvar

newdata[[yvar]] <- predict(model, newdata = newdata, ...)

newdata }

pred <- predictvals(model, "ageYear", "heightIn")

sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) +

geom_point() + geom_line(data=pred)

(13)

r

2

=

0.42

50 55 60 65 70 12 14 16

ageYear

heightIn

Scatter plot matrix and correlation matrix using mtcars dataset and first five variables library(corrplot) pairs(mtcars[,1:5])

mpg

4 6 8 50 250 10 25 4 6 8

cyl

disp

100 400 50 250

hp

10 25 100 400 3.0 4.5 3.0 4.5

drat

(14)

# Scatter plot with correlations in the upper triangle, smoothing lines in the # lower triangle, and histograms on the diagonal

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {

usr <- par("usr") on.exit(par(usr))

par(usr = c(0, 1, 0, 1))

r <- abs(cor(x, y, use="complete.obs"))

txt <- format(c(r, 0.123456789), digits=digits)[1]

txt <- paste(prefix, txt, sep="")

if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)

text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2)

}

panel.hist <- function(x, ...) { usr <- par("usr")

on.exit(par(usr))

par(usr = c(usr[1:2], 0, 1.5) )

h <- hist(x, plot = FALSE)

breaks <- h$breaks

nB <- length(breaks)

y <- h$counts y <- y/max(y)

rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)

}

pairs(mtcars[,1:5], upper.panel = panel.cor,

diag.panel = panel.hist, lower.panel = panel.smooth)

mpg

4 6 8

0.85

50 250

0.78

10 25

0.68

4 6 8

_cyl

0.90

0.83

0.70 disp

0.79

100 400

0.71

50 250

hp

_0.45 10 25 100 400 3.0 4.5 3.0 4.5

drat

(15)

mcor <- cor(mtcars) corrplot(mcor) −1 −0.8 −0.6 −0.4 −0.2 0 0.2 0.4 0.6 0.8 1

mpg

cyl

disp

hp

dr

at

wt

qsec

vs

am

gear

carb

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# Correlation matrix with colored squares and black, rotated labels

(16)

−1 −0.8 −0.6 −0.4 −0.2 0 0.2 0.4 0.6 0.8 1

mpg cyl disp hp dr

at

wt qsec vs

am gear carb

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# create a three-dimensional (3D) scatter plot.

library(rgl)

plot3d(mtcars$wt, mtcars$disp, mtcars$mpg, type="s", size=0.75, lit=FALSE)

# add vertical segments to help give a sense of the spatial positions of the points

interleave <- function(v1, v2) as.vector(rbind(v1,v2))

# Plot the points

plot3d(mtcars$wt, mtcars$disp, mtcars$mpg,

xlab="Weight", ylab="Displacement", zlab="MPG",

size=.75, type="s", lit=FALSE)

# Add the segments

segments3d(interleave(mtcars$wt, mtcars$wt), interleave(mtcars$disp, mtcars$disp), interleave(mtcars$mpg, min(mtcars$mpg)),

alpha=0.4, col="blue") Scattter plot with jitter rugs,spikes and density

x <- rnorm(1000, 50, 30)

y <- 3*x + rnorm(1000, 0, 20)

require(Hmisc)

plot(x,y)

#scat1d adds tick marks (bar codes. rug plot) # on any of the four sides of an existing plot,

# corresponding with non-missing values of a vector x.

scat1d(x, col = "red") # density bars on top of graph scat1d(y, 4, col = "blue") # density bars at right

(17)

−50

0

50

100

150 −200

0

100

200

300

400 x

y

plot(x,y, pch = 20)

histSpike(x, add=TRUE, col = "green4", lwd = 2) histSpike(y, 4, add=TRUE,col = "blue", lwd = 2 )

histSpike(x, type='density',col = "red", add=TRUE) # smooth density at bottom

(18)

−50

0

50

100

150 −200

0

100

200

300

400 x

y

Bar graphs and Histograms

(19)

1

2

3

4

5

7

0

5

10

15

# Using the table function

barplot(table(mtcars$cyl))

4

6

8

0

2

4

6

8

10

14

(20)

qplot(BOD$Time, BOD$demand, geom="bar", stat="identity") 0 5 10 15 20 2 4 6

BOD$Time

BOD$demand

# Conisdering facotr

(21)

0 5 10 15 20 1 2 3 4 5 7

factor(BOD$Time)

BOD$demand

# cyl is continuous here

qplot(mtcars$cyl) 0 5 10 4 5 6 7 8

mtcars$cyl

count

(22)

# Treat cyl as discrete

qplot(factor(mtcars$cyl))

0 5 10 4 6 8

factor(mtcars$cyl)

count

# Bar graph of values. This uses the BOD data frame, with the # "Time" column for x values and the "demand" column for y values.

ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")

(23)

0 5 10 15 20 2 4 6

Time

demand

ggplot(mtcars, aes(x=factor(cyl))) + geom_bar(fill="white",color="black")

0 5 10 4 6 8

factor(cyl)

count

(24)

# Specify approximate number of bins with breaks

ggplot(mtcars, aes(x=mpg)) +

geom_histogram(binwidth=4,fill="white", colour="black")

0 2 4 6 8 10 20 30 40

mpg

count

# Change the x axis origin using origin parameter

ggplot(mtcars, aes(x=mpg)) +

(25)

0 2 4 6 20 25 30 35

mpg

count

Histograms of multiple groups of data library(MASS)

ggplot(heightweight, aes(x=heightIn)) +

geom_histogram(fill="white", colour="black") + facet_grid(sex ~ .)

(26)

0 5 10 15 20 0 5 10 15 20 f m 50 55 60 65 70

heightIn

count

hw<-heightweight

# Using plyr and revalue() to change the names on sex variable

library(plyr)

hw$sex<- revalue(hw$sex,c("f"="Female","m"="Male"))

# Using facetting

ggplot(hw, aes(x=heightIn)) +

geom_histogram(fill="white", colour="black") + facet_grid(sex ~ .)

(27)

0 5 10 15 20 0 5 10 15 20 F emale Male 50 55 60 65 70

heightIn

count

ggplot(hw, aes(x=heightIn, y = ..density.. ,fill=sex)) + geom_histogram(position="identity",alpha=0.4)+

theme_bw()+geom_density(alpha=0.3) 0.00 0.05 0.10 0.15 0.20 0.25 50 55 60 65 70

heightIn

density

sex Female Male

(28)

Negative and Positive Bar plot

csub <- subset(climate, Source=="Berkeley" & Year >= 1900)

head(csub)

csub$pos <- csub$Anomaly10y >= 0

ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) +

geom_bar(stat="identity", color="black",position="identity")

0.0 0.5 1920 1950 1980

Year

Anomaly10y

pos FALSE TRUE

Error Bar plot in ggplot2

myd <- data.frame (X = c(1:12,1:12),

Y = c(8, 12, 13, 18, 22, 16, 24, 29, 34, 15, 8, 6, 9, 10, 12, 18, 26, 28, 28, 30, 20, 10, 9, 9),

group = rep (c("X-Group", "Y-group"), each = 12),

error = rep (c(2.5, 3.0), each = 12))

plt = ggplot(data = myd, aes(x=X, y=Y, fill=group, width=0.8) ) +

geom_errorbar(aes(ymin=Y, ymax=Y+error, width = 0.2),

position=position_dodge(width=0.8)) +

geom_bar(stat="identity", position=position_dodge(width=0.8)) + geom_bar(stat="identity", position=position_dodge(width=0.8),

colour="black", legend=FALSE) +

scale_fill_manual(values=c("grey70", "white")) + scale_x_discrete("X", limits=c(1:12)) +

scale_y_continuous("Y (units)", expand=c(0,0),

limits = c(0, 40), breaks=seq(0, 40, by=5)) + ggtitle ("My nice plot") +

theme_bw() +

(29)

axis.title.x = element_text(face="bold", size=12),

axis.title.y = element_text(face="bold", size=12, angle=90),

panel.grid.major = element_blank(),

panel.grid.minor = element_blank(),

axis.text.y=element_text(angle=90, hjust=0.5),

legend.title = element_blank(),

legend.position = c(0.85,0.85),

legend.key.size = unit(1.5, "lines"),

legend.key = element_rect() ) plt 0 5 10 15 20 25 30 35 40 1 2 3 4 5 6 7 8 9 10 11 12

X

Y (units)

X−Group Y−group

My nice plot

Box plots

# Using the ToothGrowth dataset # Formula syntax

(30)

OJ

VC

5

10

15

20

25

30

35

# Put interaction of two variables on x-axis

boxplot(len ~ supp + dose, data = ToothGrowth)

OJ.0.5

OJ.1

OJ.2

5

10

15

20

25

30

35

(31)

ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot() 10 20 30 OJ VC

supp

len

# Adding notches

ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot(notch=TRUE)

(32)

10 20 30 OJ VC

supp

len

# Adding mean

ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot() +

stat_summary(fun.y="mean", geom="point", shape=24, size=4, fill="white")

10 20 30 OJ VC

supp

len

(33)

# Using three separate vectors

ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()

10 20 30

OJ.0.5 VC.0.5 OJ.1 VC.1 OJ.2 VC.2

interaction(supp, dose)

len

Violin plots are a way of comparing multiple data distributions

# Use the heightweight datasets

p <- ggplot(heightweight, aes(x=sex, y=heightIn))

p + geom_violin(trim=FALSE,adjuts=2)+

geom_boxplot(width=.1, fill="Grey", outlier.colour=NA)+ theme_bw()+

(34)

50 60 70 f m

sex

heightIn

Plotting curves

(35)

−4

−2

0

2

4 −40

−20

0

20

40 x

x^3 − 5 * x

# Plot a user-defined function

myfun <- function(xvar) {

1/(1 + exp(-xvar + 10))

}

curve(myfun(x), from=0, to=20)

# Add a line:

(36)

0

5

10

15

20

0.0

0.2

0.4

0.6

0.8

1.0 x

m

yfun(x)

# This sets the x range from 0 to 20

ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun, geom="line")

0.00 0.25 0.50 0.75 1.00 0 5 10 15 20

x

y

(37)

Miscellaneous plots

Making Density Plot of Two-Dimensional Data

p <- ggplot(faithful, aes(x=eruptions, y=waiting))

p + geom_point() + stat_density2d() 50 60 70 80 90 2 3 4 5

eruptions

w

aiting

(38)

50 60 70 80 90 2 3 4 5

eruptions

w

aiting

0.005 0.010 0.015 0.020 level

p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)

50 60 70 80 90 2 3 4 5

eruptions

w

aiting

0.005 0.010 0.015 0.020 0.025 density

(39)

# With points, and map density estimate to alpha

p + geom_point() +

stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)

50 60 70 80 90 2 3 4 5

eruptions

w

aiting

density 0.005 0.010 0.015 0.020 0.025

Plotting Pie Charts library(RColorBrewer)

slices <- c(10, 12,4, 16, 8)

lbls <- c("IN", "AK", "ID", "MA", "MO")

(40)

IN

AK

ID

MA

MO

Pie Chart of Countries

Pie Chart with Percentages

slices <- c(10, 12, 4, 16, 8)

lbls <- c("IN", "AK", "ID", "MA", "MO")

pct <- round(slices/sum(slices)*100)

lbls <- paste(lbls, pct) # add percents to labels

lbls <- paste(lbls,"%",sep="") # ad % to labels

pie(slices,labels = lbls, col=rainbow(length(lbls)),

(41)

IN 20%

AK 24%

ID 8%

MA 32%

MO 16%

Pie Chart of US States

3D Pie chart library(plotrix)

slices <- c(10, 12, 4, 16, 8)

lbls <- c("IN", "AK", "ID", "MA", "MO") pie3D(slices,labels=lbls,explode=0.1,

(42)

Pie Chart of Countries

IN

AK

ID

MA

MO

A dendrogram is the fancy word that we use to name a tree diagram to display the groups formed by hierarchical clustering. # Using Corrgrams package

library(corrgram)

R <- cor(mtcars)

# default corrgram

(43)

mpg cyl disp hp drat wt qsec vs am gear carb

# corrgram with pie charts

corrgram(R, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie,

text.panel = panel.txt, main = "mtcars Data")

gear am drat mpg vs qsec wt disp cyl hp carb

mtcars Data

(44)

The package ellipse provides the function plotcorr() that helps us to visualize correlations. plotcorr() uses ellipse-shaped glyphs for each entry of the correlation matrix. Here’s the default plot using our matrix of R:

# default corrgram library(ellipse) plotcorr(R)

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

mpg

cyl

disp

hp

dr

at

wt

qsec

vs

am

gear

carb

# colored corrgram

(45)

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

mpg

cyl

disp

hp

dr

at

wt

qsec

vs

am

gear

carb

Another colored corrgram

plotcorr(R, col = colorRampPalette(c("#E08214", "white", "#8073AC"))(10), type = "lower")

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

(46)

Visualizing Dendrograms

# prepare hierarchical cluster

hc = hclust(dist(mtcars))

plot(hc, hang = -1) ## labels at the same level

Maser

ati Bor

a

Chr

ysler Imper

ial

Cadillac Fleetw

ood

Lincoln Continental

F

ord P

anter

a L

Duster 360

Camaro Z28

Hor

net Spor

tabout

P

ontiac Firebird Hor

net 4 Dr

iv

e

V

aliant

Merc 450SLC

Merc 450SE Merc 450SL

Dodge Challenger

AMC J

a

v

elin

Honda Civic

T

o

y

ota Corolla

Fiat 128

Fiat X1−9

F

err

ar

i Dino

Lotus Europa

Merc 230

V

olv

o 142E

Datsun 710

T

o

y

ota Corona

P

orsche 914−2

Merc 240D Mazda RX4

Mazda RX4 W

ag

Merc 280

Merc 280C

0

300 Cluster Dendrogram

hclust (*, "complete")

dist(mtcars)

Height

An alternative way to produce dendrograms is to specifically convert hclust objects into dendrograms objects.

# using dendrogram objects

hcd = as.dendrogram(hc)

# alternative way to get a dendrogram

(47)

0

100

200

300

400 Maser

ati Bor

a

Chr

ysler Imper

ial

Cadillac Fleetw

ood

Lincoln Continental

F

ord P

anter

a L

Duster 360

Camaro Z28

Hor

net Spor

tabout

P

ontiac Firebird Hor

net 4 Dr

iv

e

V

aliant

Merc 450SLC

Merc 450SE Merc 450SL

Dodge Challenger

AMC J

a

v

elin

Honda Civic

T

o

y

ota Corolla

Fiat 128

Fiat X1−9

F

err

ar

i Dino

Lotus Europa

Merc 230

V

olv

o 142E

Datsun 710

T

o

y

ota Corona

P

orsche 914−2

Merc 240D Mazda RX4

Mazda RX4 W

ag

Merc 280

Merc 280C

Having an object of class dendrogram, we can also plot the branches in a triangular form.

# using dendrogram objects

(48)

0

100

200

300

400 Maser

ati Bor

a

Chr

ysler Imper

ial

Cadillac Fleetw

ood

Lincoln Continental

F

ord P

anter

a L

Duster 360

Camaro Z28

Hor

net Spor

tabout

P

ontiac Firebird Hor

net 4 Dr

iv

e

V

aliant

Merc 450SLC

Merc 450SE Merc 450SL

Dodge Challenger

AMC J

a

v

elin

Honda Civic

T

o

y

ota Corolla

Fiat 128

Fiat X1−9

F

err

ar

i Dino

Lotus Europa

Merc 230

V

olv

o 142E

Datsun 710

T

o

y

ota Corona

P

orsche 914−2

Merc 240D Mazda RX4

Mazda RX4 W

ag

Merc 280

Merc 280C

Phylogenetic trees

library(ape)

# plot basic tree

(49)

Mazda RX4

Mazda RX4 Wag

Datsun 710

Hornet 4 Drive

Hornet Sportabout

Valiant

Duster 360

Merc 240D

Merc 230

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetwood

Lincoln Continental

Chrysler Imperial

Fiat 128

Honda Civic

Toyota Corolla

Toyota Corona

Dodge Challenger

AMC Javelin

Camaro Z28

Pontiac Firebird

Fiat X1−9

Porsche 914−2

Lotus Europa

Ford Pantera L

Ferrari Dino

Maserati Bora

Volvo 142E

# fan

(50)

Mazda RX4

Mazda RX4 W

ag

Datsun 710

Hor

net 4 Dr

iv

e

Hor

net Spor

tabout

V

aliant

Duster 360

Merc 240D

Merc 230

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetw

ood

Lincoln Continental

Chrysler Imper

ial

Fiat 128

Honda Civic

Toyota Corolla

T

_o

y

_{ota Corona}

Dodge Challenger

AMC J

avelin

Camaro Z28

P

ontiac Firebird

Fiat X1−9

P

orsche 914−2

Lotus Europa

Ford P

anter

a L

Ferr

ar

i Dino

Maserati Bora

V

olv

o 142E

# add colors randomly

plot(as.phylo(hc), type = "fan", tip.color = hsv(runif(15, 0.65, 0.95), 1, 1, 0.7),

edge.color = hsv(runif(10, 0.65, 0.75), 1, 1, 0.7),

(51)

Mazda RX4

Mazda RX4 W

ag

Datsun 710

Hor

net 4 Dr

iv

e

Hor

net Spor

tabout

V

aliant

Duster 360

Merc 240D

Merc 230

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetw

ood

Lincoln Continental

Chrysler Imper

ial

Fiat 128

Honda Civic

Toyota Corolla

T

_o

y

_{ota Corona}

Dodge Challenger

AMC J

avelin

Camaro Z28

P

ontiac Firebird

Fiat X1−9

P

orsche 914−2

Lotus Europa

Ford P

anter

a L

Ferr

ar

i Dino

Maserati Bora

V

olv

o 142E

Triple heat map plot library(reshape2) library (grid) library(ggplot2)

#X axis quantitaive ggplot data

datfx <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),

levels =rev(paste("ID", 1:20, sep = ""))), matrix(sample(LETTERS[1:7],80, T), ncol = 4))

# converting data to long form for ggplot2 use

datf1x <- melt(datfx, id.var = 'indv')

plotx <- ggplot(datf1x, aes(indv, variable)) +

geom_tile(aes(fill = value),colour = "white") + scale_fill_manual(values= terrain.colors(7))+ scale_x_discrete(expand=c(0,0))

px <- plotx

#Y axis quantitaive ggplot data

datfy <- data.frame(indv=factor(paste("ID", 21:40, sep = ""),

levels =rev(paste("ID",21:40, sep = ""))), matrix(sample(LETTERS[7:10],100, T), ncol = 5))

# converting data to long form for ggplot2 use

datf1y <- melt(datfy, id.var = 'indv')

ploty <- ggplot(datf1y, aes( variable, indv)) + geom_tile(aes(fill = value),

colour = "white") +

scale_fill_manual(values= c("cyan4", "midnightblue", "green2", "lightgreen")) + scale_x_discrete(expand=c(0,0))

(52)

py <- ploty + theme(legend.position="left", axis.title=element_blank())

# plot XY quantative fill

datfxy <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),

levels =rev(paste("ID", 1:20, sep = ""))), matrix(rnorm (400, 50, 10), ncol = 20)) names (datfxy) <- c("indv",paste("ID", 21:40, sep = ""))

datfxy <- melt(datfxy, id.var = 'indv')

levels (datfxy$ variable) <- rev(paste("ID", 21:40, sep = ""))

pxy <- plotxy <- ggplot(datfxy, aes(indv, variable)) +

geom_tile(aes(fill = value),colour = "white") + scale_fill_gradient(low="red", high="yellow") + theme(axis.title=element_blank())

# Define layout for the plots (2 rows, 2 columns)

layt<-grid.layout(nrow=2,ncol=2,heights=c(6/8,2/8),widths=c(2/8,6/8),default.units=c('null','null'))

#View the layout of plots

(53)

(1, 1) 0.75null 0.25null (1, 2) 0.75null 0.75null (2, 1) 0.25null 0.25null (2, 2) 0.75null 0.25null

#Draw plots one by one in their positions

grid.newpage()

pushViewport(viewport(layout=layt))

print(py,vp=viewport(layout.pos.row=1,layout.pos.col=1)) print(pxy,vp=viewport(layout.pos.row=1,layout.pos.col=2)) print(px,vp=viewport(layout.pos.row=2,layout.pos.col=2))

(54)

ID40 ID39 ID38 ID37 ID36 ID35 ID34 ID33 ID32 ID31 ID30 ID29 ID28 ID27 ID26 ID25 ID24 ID23 ID22 ID21 X1 X2 X3 X4 X5 value G H I J ID40 ID39 ID38 ID37 ID36 ID35 ID34 ID33 ID32 ID31 ID30 ID29 ID28 ID27 ID26 ID25 ID24 ID23 ID22 ID21

ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1

30 40 50 60 70 value X1 X2 X3 X4

ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1 indv v ar iab le value A B C D E F G

Mosaic plot for categorical data

myd <- data.frame (fact1 = sample (c("A", "B", "C", "D"), 200, replace = TRUE),