Skip to Navigation Skip to Content Skip to Search Skip to Site Map
Search

R publication quality graph – 31st March 2016

This demonstration covers the kinds of things you might want to adjust in a basic plot to improve the graph for publication.

You can watch the recording (18 minutes 59 seconds):


or download  (right click and choose the save/download link option) the recording to your computer via the link:

PublicationGraphInR (.m4v format, 68.1 MB)

and read the parts of the script below (code available when viewing the full article)

(Note, because there are macrons in the electorate names Windows users may need to be a little careful about what they do with the code if you want to run it. The web browser should be utf-8 text, and R is utf-8 text, so moving directly between the two should be fine. However, if you go through other programs along the way, they might save text in different formats so it gets confused about ā characters and similar).

The script setup:

#the data
myDataAsCSV <- '
"electorate","voteForChange","flagEngagement"
"Auckland Central",43.2,0.423586347935161
"Bay of Plenty",51.5,0.88506684143897
"Botany",48.3,0.661947477705919
"Christchurch Central",43.4,0.703167051224859
"Christchurch East",41.7,0.833151238481109
"Clutha-Southland",50.5,0.950475833747897
"Coromandel",45.3,0.983566573480444
"Dunedin North",35.6,0.551329630228422
"Dunedin South",38.4,0.867277725399521
"East Coast",42.3,0.856585026187755
"East Coast Bays",51.1,0.815888098479214
"Epsom",49.7,0.65748517669255
"Hamilton East",47.9,0.597251860336753
"Hamilton West",44.6,0.686031896369701
"Hauraki-Waikato",25.4,0.411097224074744
"Helensville",43.3,0.727646340573673
"Hunua",46.5,0.756556383535633
"Hutt South",42.8,0.680261979611132
"Ikaroa-Rāwhiti",22.6,0.432602584593948
"Ilam",50.8,0.837238221916498
"Invercargill",39.9,0.934564176189736
"Kaikōura",46.8,1
"Kelston",34.9,0.469257527412655
"Mana",43.1,0.507944135242772
"Māngere",28.9,0.0544140160244452
"Manukau East",32.3,0
"Manurewa",34.3,0.132529882117495
"Maungakiekie",40.8,0.41468947772484
"Mt Albert",38.4,0.432330822805254
"Mt Roskill",41.5,0.486582160264273
"Napier",41.6,0.894848132765746
"Nelson",47.8,0.870043482766168
"New Lynn",39.4,0.502643442210299
"New Plymouth",49.1,0.892690864023117
"North Shore",49.4,0.810894088275051
"Northcote",43.6,0.695741071754572
"Northland",39.1,0.873607691718847
"Ōhāriu",45.9,0.682053942967811
"Ōtaki",42.4,0.871201990768258
"Pakuranga",46.7,0.769551647169245
"Palmerston North",41.9,0.692079207166083
"Papakura",40.9,0.588374174033903
"Port Hills",45.8,0.833572100973093
"Rangitata",48.2,0.992356076228686
"Rangitῑkei",43.9,0.93746099432336
"Rimutaka",40,0.673313028292408
"Rodney",47.4,0.885872529962551
"Rongotai",37.1,0.439776911528572
"Rotorua",43.6,0.778237149054976
"Selwyn",51.6,0.912420601004816
"Tāmaki",51.9,0.73409305218088
"Tāmaki Makaurau",22,0.490079546048277
"Taranaki-King Country",48,0.901063691018269
"Taupō",46.9,0.896130076233718
"Tauranga",49.7,0.855785581595647
"Te Atatū",37.9,0.529643776090699
"Te Tai Hauāuru",25.9,0.487708546729501
"Te Tai Tokerau",21.1,0.403183414236336
"Te Tai Tonga",31.9,0.626235289146382
"Tukituki",43.3,0.824123757661351
"Upper Harbour",44,0.665894713624441
"Waiariki",23.8,0.420042156326721
"Waikato",47.6,0.921147170086325
"Waimakariri",48.9,0.96636391650484
"Wairarapa",43.1,0.911188546803545
"Waitaki",49.4,0.952927596648344
"Wellington Central",40.9,0.355053049929952
"West Coast-Tasman",41.8,0.904091426775813
"Whanganui",40.8,0.944221177494395
"Whangarei",41.3,0.898294406700538
"Wigram",42.9,0.779634155293247
'

#readInTheData
fr <- read.csv(text= myDataAsCSV)

#Identify some electorates of interest
mel <- c("Hauraki-Waikato", "Ikaroa-Rāwhiti", "Tāmaki Makaurau", "Te Tai Hauāuru", "Te Tai Tokerau","Te Tai Tonga", "Waiariki")
sael <- c("Māngere", "Manukau East", "Manurewa")

#create some predefined subsets based on those regions
melc <- fr$electorate %in% mel
saelc <- fr$electorate %in% sael
gelc <- !(melc | saelc)

#create a quadratic (bendy) line of best fit for only the general (non-South Auckland) electorates with a 95% confidence interval for the line location
## create a sequence of points within the data min and max to calculate the line position from
genxpoints <- seq(from=min(fr$flagEngagement[gelc]), to=max(fr$flagEngagement[gelc]), length.out=100)
##generate squares
fr$f2 <- fr$flagEngagement * fr$flagEngagement
##generate cubes
fr$f3 <- fr$flagEngagement * fr$flagEngagement * fr$flagEngagement
## make model
genmod <- lm(voteForChange ~ flagEngagement + f2 + f3, data=fr[gelc,])
summary(genmod)
## with and Rsquared of .22 it is a pretty poor line, but I am assuming that will get described in the text
## make a data.frame based on the sequence, having the same column names as the orginal used to build the model
predictionData <- data.frame(flagEngagement = genxpoints, f2 = genxpoints * genxpoints, f3 = genxpoints * genxpoints * genxpoints)
## generate the y points (column 1 of the result), together with upper and lower 95% CI bounds for the ypoints.
genypoints <- as.data.frame(predict(genmod, newdata=predictionData, interval="confidence"))

#For making graphs I am assuming:
# it needs particular fonts and axis details
# we can use colour to illustrate things
# it needs to be a 300 dots per inch bitmap rather than a vector pdf file, and has required physical dimensions
# the figure has an text caption under it, so we do not need to use plot(main=) to add a title

 

The first graph

#Graph 1
plot(fr$flagEngagement, fr$voteForChange)

 

The second graph, saving as a tiff

#Graph 2
#saves directly as a tiff
tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(fr$flagEngagement, fr$voteForChange)
dev.off()

 

The third version

#Graph 3
#axis size and labelling
tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(fr$flagEngagement, fr$voteForChange, frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", ylim=c(0,60), xlim=c(0,1))
dev.off()

 

Version four

#Graph 4
#subsets plotted seperately in seethough colours
tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(fr$flagEngagement[gelc], fr$voteForChange[gelc], frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", ylim=c(0,60), xlim=c(0,1), pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
dev.off()

 

Version five

#Graph 5
#as graph 4 but add line of best fit and an CI zone around it
## polygons trace around the outside of the shape, so need to go there and back again
polX <- c(genxpoints, rev(genxpoints))
polY <- c(genypoints$lwr, rev(genypoints$upr))

tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(fr$flagEngagement[gelc], fr$voteForChange[gelc], frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", ylim=c(0,60), xlim=c(0,1), pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
polygon(polX,polY, col="#8888FF33", border=NA)
lines(genxpoints, genypoints$fit)
dev.off()

 

The sixth graph, with the legend correction of the added comma from the video

#Graph 6
#as graph 5 but adding a legend
## being slightly sneaking to use the hyphen symbol to indicate line of best fit
polX <- c(genxpoints, rev(genxpoints))
polY <- c(genypoints$lwr, rev(genypoints$upr))

tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(fr$flagEngagement[gelc], fr$voteForChange[gelc], frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", ylim=c(0,60), xlim=c(0,1), pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
polygon(polX,polY, col="#8888FF33", border=NA)
lines(genxpoints, genypoints$fit)
legend("toopleft", legend= c("Māori Electorates", "South Auckland", "General Electorates", "Line of best fit", "95% CI"), col=c("#FF333366", "#AA00AA66", "#33333366", "#000000", "#22222222"), pch= c(18,17,19,45,15), bty="n", pt.cex=c(1,0.8,0.8,1,0.8), y.intersp=1.5, cex=0.8)
dev.off()

 

The seventh version of the graph

#Graph 7
#graph 6 doesn't really have room for the legend so I extend the graph and split the legend
## being slightly sneaking to use the hyphen symbol to indicate line of best fit
polX <- c(genxpoints, rev(genxpoints))
polY <- c(genypoints$lwr, rev(genypoints$upr))

tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(fr$flagEngagement[gelc], fr$voteForChange[gelc], frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", ylim=c(0,80), xlim=c(-.2,1), pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
polygon(polX,polY, col="#8888FF33", border=NA)
lines(genxpoints, genypoints$fit)
legend("topleft", legend= c("Māori Electorates", "South Auckland", "General Electorates"), col=c("#FF333366", "#AA00AA66", "#33333366"), pch= c(18,17,19), bty="n", pt.cex=c(1,0.8,0.8), y.intersp=1.5, cex=0.8)
legend("topright", legend= c("Line of best fit", "95% CI"), col=c("#000000", "#22222222"), pch= c(45,15), bty="n", pt.cex=c(1,0.8), y.intersp=1.5, cex=0.8)
dev.off()

 

Version eight

#Graph 8
#graph 7 would look better with the CI error range under everything else, so we set the intial graph to plot no points and use points() for the general electorates
polX <- c(genxpoints, rev(genxpoints))
polY <- c(genypoints$lwr, rev(genypoints$upr))

tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(c(-.2,1), c(0,80), frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", type="n", pch=19, col="#33333366")
polygon(polX,polY, col="#8888FF33", border=NA)
points(fr$flagEngagement[gelc], fr$voteForChange[gelc], pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
lines(genxpoints, genypoints$fit)
legend("topleft", legend= c("Māori Electorates", "South Auckland", "General Electorates"), col=c("#FF333366", "#AA00AA66", "#33333366"), pch= c(18,17,19), bty="n", pt.cex=c(1,0.8,0.8), y.intersp=1.5, cex=0.8)
legend("topright", legend= c("Line of best fit", "95% CI"), col=c("#000000", "#22222222"), pch= c(45,15), bty="n", pt.cex=c(1,0.8), y.intersp=1.5, cex=0.8)
dev.off()

 

The penultimate version nine

#Graph 9
#I would prefer custom axes, so I switch them off in plot() and add them manually
polX <- c(genxpoints, rev(genxpoints))
polY <- c(genypoints$lwr, rev(genypoints$upr))

tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(c(-.2,1), c(0,80), frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", type="n", pch=19, col="#33333366", yaxt="n",xaxt="n")
polygon(polX,polY, col="#8888FF33", border=NA)
points(fr$flagEngagement[gelc], fr$voteForChange[gelc], pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
lines(genxpoints, genypoints$fit)
legend("topleft", legend= c("Māori Electorates", "South Auckland", "General Electorates"), col=c("#FF333366", "#AA00AA66", "#33333366"), pch= c(18,17,19), bty="n", pt.cex=c(1,0.8,0.8), y.intersp=1.5, cex=0.8)
legend("topright", legend= c("Line of best fit", "95% CI"), col=c("#000000", "#22222222"), pch= c(45,15), bty="n", pt.cex=c(1,0.8), y.intersp=1.5, cex=0.8)
axis(1, at=c(0,0.5,1), lwd=0, lwd.ticks=1, cex.axis=0.8)
axis(2, at=c(20,30,40,50), lwd=0, lwd.ticks=1, cex.axis=0.8, las=1)
dev.off()

 

The Papyrus ridden version 10, for notes about Windows and the extrafont package see

http://blog.revolutionanalytics.com/2012/09/how-to-use-your-favorite-fonts-in-r-charts.html

#Graph 10

#And then I think the font should be Papyrus. Customising fonts can be tricky (more so for Windows users may need the extrafonts package to easily go beyond the basic R options), so I suggest reading http://blog.revolutionanalytics.com/2012/09/how-to-use-your-favorite-fonts-in-r-charts.html
polX <- c(genxpoints, rev(genxpoints))
polY <- c(genypoints$lwr, rev(genypoints$upr))

tiff(filename = "publication.tiff", width = 12, height = 10, units = "cm", pointsize = 12, res = 300)
plot(c(-.2,1), c(0,80), frame.plot = FALSE, xlab="Electorate Flag Engagement", ylab="Support for Change", type="n", pch=19, col="#33333366", yaxt="n",xaxt="n", family="Papyrus")
polygon(polX,polY, col="#8888FF33", border=NA)
points(fr$flagEngagement[gelc], fr$voteForChange[gelc], pch=19, col="#33333366")
points(fr$flagEngagement[melc], fr$voteForChange[melc], pch=18, cex=1, col="#FF333366")
points(fr$flagEngagement[saelc], fr$voteForChange[saelc], pch=17, cex=0.8, col="#AA00AA66")
lines(genxpoints, genypoints$fit)
legend("topleft", legend= c("Māori Electorates", "South Auckland", "General Electorates"), col=c("#FF333366", "#AA00AA66", "#33333366"), pch= c(18,17,19), bty="n", pt.cex=c(1,0.8,0.8), y.intersp=1.5, cex=0.8)
legend("topright", legend= c("Line of best fit", "95% CI"), col=c("#000000", "#22222222"), pch= c(45,15), bty="n", pt.cex=c(1,0.8), y.intersp=1.5, cex=0.8)
axis(1, at=c(0,0.5,1), lwd=0, lwd.ticks=1, cex.axis=0.8)
axis(2, at=c(20,30,40,50), lwd=0, lwd.ticks=1, cex.axis=0.8, las=1)
dev.off()

Leave a comment