MVKSA | World Bank development indicators

World Bank development indicators for 2011.

library(ks)
library(colorspace)
library(RColorBrewer)

## colour functions
pt.col <- function(pos=1, alpha=1, ...) {return(paste0(brewer.pal(12, "Paired"), format(as.hexmode(round(alpha*255,0)), width=2))[pos])}
seq.col <- function(n, alpha=1, ...){c("transparent", rev(sequential_hcl(n=n-1, h=290, power=1.1, alpha=alpha, ...)))}

## plot parameters 
par(oma=c(0,0,0,0)+0.1, mgp=c(1.8,0.5,0), mar=c(2.9,2.9,0,0)+0.1, cex.axis=1.2, cex.lab=1.2)

## load data
data(worldbank)

## Fig 1.2
## scatter plot matrix
wb <- worldbank[,-1]
wb[,2] <- wb[,2]/1000
var.lab <- c(expression(CO[2]*"/capita"), "GDP/capita", "GDP growth", "Inflation", "Internet", "Agriculture")

pairs(wb, upper.panel=NULL, pch=16, col=pt.col(4,alpha=0.4), labels=var.lab, gap=0, oma=c(1.5,1.5,1.5,1.5)+0.1, mgp=c(0.5,0.5,0), mar=c(0,0,0,0))

## Fig 4.1a
## GDP-inflation scatter plot
xlab <- "GDP growth (%)"
ylab <- "Inflation (%)"
xlim <- c(-70 ,25)
ylim <- c(-25, 70)
zlim <- c(0, 0.022)
wb1 <- as.matrix(na.omit(worldbank[,4:5]))

## kernel density estimate
fhat.wb1 <- kde(x=wb1)

plot(fhat.wb1, cont=0, asp=1, xlim=xlim, ylim=ylim, xaxs="i", yaxs="i", xlab=xlab, ylab=ylab)
points(wb1, cex=1.4, pch=16, col=pt.col(4,alpha=0.5))
plotmixt(mus=c(-60,-10), Sigmas=fhat.wb1$H, props=1, drawlabels=FALSE, add=TRUE)

## Fig 4.1b
## fixed bandwidth kernel density estimate
plot(fhat.wb1, display="persp", thin=5, zlim=zlim, xlab=xlab, ylab=ylab, zlab="", box=TRUE, phi=20, col.fun=function(n){seq.col(n, alpha=1)}, border=grey(0, alpha=0.3))

## Fig 4.1c
## bandwidth varies with estimation point
fhat.wb1.ball <- kde.balloon(x=wb1, verbose=TRUE)
ep.ind1 <- c(11,31,51,71,91,111,131)
ep.ind2 <- c(11,31,51,71,91,111,131)
ep.sub <- as.matrix(expand.grid( fhat.wb1.ball$eval.points[[1]][ep.ind1], fhat.wb1.ball$eval.points[[2]][ep.ind2]))
H.pi.sub <- fhat.wb1.ball$H [as.matrix(expand.grid(ep.ind1, ep.ind2))]

plot(fhat.wb1, cont=0, asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
for (i in 1:length(H.pi.sub))
{
    if (!is.na(H.pi.sub[i]) & H.pi.sub[i]>0)
    {
        plotmixt(mus=ep.sub[i,], Sigmas=H.pi.sub[i]*diag(2), props=1, drawlabels=FALSE, add=TRUE)
         points(ep.sub[i,1], ep.sub[i,2], cex=1, pch=21, col=grey(0,alpha=0.5), bg=pt.col(4))
    }
}

## Fig 4.1d
## balloon kernel density estimate
plot(fhat.wb1.ball, display="persp", thin=5, zlim=zlim, xlab=xlab, ylab=ylab, zlab="", box=TRUE, phi=20, col.fun=function(n){seq.col(n, alpha=1)}, border=grey(0, alpha=0.3))

## Fig 4.1e
## bandwidth varies with data point
fhat.wb1.sp <- kde.sp(x=wb1)
wb1.ind <- c(15, 17, 53, 62, 65 ,92 ,96, 107, 121, 129, 175)
H.ab.sub <- fhat.wb1.sp$H[wb1.ind]

plot(fhat.wb1, cont=0, asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
for (i in 1:length(wb1.ind))
{
    if (H.ab.sub[i]>40) plotmixt(mus=wb1[wb1.ind[i],], Sigmas=H.ab.sub[i]*diag(2), props=1, drawlabels=FALSE, add=TRUE, xlim=c(-40,30), ylim=c(-20,80))
    else plotmixt(mus=wb1[wb1.ind[i],], Sigmas=H.ab.sub[i]*diag(2), props=1, drawlabels=FALSE, add=TRUE)
}
points(wb1[wb1.ind,], cex=1, pch=21, col=grey(0,alpha=0.5), bg=pt.col(4))

## Fig 4.1f
## sample point kernel density estimate
par(mar=c(0,0,0,0))
plot(fhat.wb1.sp, display="persp", thin=5, zlim=zlim, xlab=xlab, ylab=ylab, zlab="", box=TRUE, phi=20, col.fun=function(n){seq.col(n, alpha=1)}, border=grey(0, alpha=0.3))

## Fig 4.2a
## CO2-GDP scatter plot 
xlab <- expression(CO[2]*" emissions per capita ('000 Kg)")
ylab <- "GDP per capita (current '000 USD)"
xlim <- c(-5,55)
ylim <- c(-20,165)
wb2 <- as.matrix(na.omit(worldbank[,2:3]))
wb2[,2] <- wb2[,2]/1000
fhat.wb2 <- kde(x=wb2)

plot(fhat.wb2, cont=0, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
points(wb2, cex=1.4, pch=16, col=pt.col(4,alpha=0.5))
rect(0, 0, 200, 200, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.2b
## fixed kernel density estimate
plot(fhat.wb2, drawlabels=FALSE, display="filled.contour", lwd=1, cont=seq(10,90,by=10), col.fun=function(n, alpha=1, ...){c("transparent", rev(sequential_hcl(n=n-1, h=290, power=0.6, alpha=alpha, ...)))}, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
rect(0, 0, 200, 200, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.2c
## transformation kernel density estimate
fhat.wb2.trans <- kde(x=wb2, adj.positive=c(0,0), positive=TRUE)

plot(fhat.wb2.trans, drawlabels=FALSE, display="filled.contour", lwd=1, cont=seq(10,90,by=10), col.fun=function(n, alpha=1, ...){c("transparent", rev(sequential_hcl(n=n-1, h=290, power=0.6, alpha=alpha, ...)))}, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
rect(0, 0, 200, 200, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.3a 
## Internet-agriculture scatter plot
xlab <- "Internet users (per 100)"
ylab <- "Agriculture value added (% of GDP)"
xlim <- c(-10, 110)
ylim <- c(-10, 110)
wb3 <- as.matrix(na.omit(worldbank[,6:7]))
fhat.wb3 <- kde(x=wb3)

plot(fhat.wb3, cont=0, asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
points(wb3, cex=1.4, pch=16, col=pt.col(4,alpha=0.5))
rect(0, 0, 100, 100, border=grey(0.5, alpha=0.5), lty=2)
plotmixt(mus=c(80, 80), Sigmas=fhat.wb3$H, props=1, xlim=c(0,100), ylim=c(0,100), drawlabels=FALSE, add=TRUE)

## Fig 4.3b
## fixed kernel density estimate
plot(fhat.wb3, display="filled.contour", col.fun=function(n){seq.col(n, alpha=1)}, border=1, lwd=1, drawlabels=FALSE, xaxs="i", yaxs="i", cont=seq(10,90,by=20), asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
rect(0, 0, 100, 100, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.3c
## beta boundary kernels 
fhat.wb3.beta <- kde.boundary(x=wb3, xmin=c(0,0), xmax=c(100,100), boundary.kernel="beta")

bb.sub <- c(1:5,35,150)
for (i in bb.sub)
{
    bb1 <- ks:::dmvbeta.prod.kernel2.2d(x=wb3[i,], xmax=c(100,100), hs=2*sqrt(diag(fhat.wb3.beta$H)))
    plot(bb1, display="slice", lwd=1, drawlabels=FALSE, add=i>1, asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
}
points(wb3[bb.sub,], cex=1, pch=21, col=grey(0,alpha=0.5), bg=pt.col(4))
rect(0, 0, 100, 100, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.3d
## beta boundary kernel density estimate
plot(fhat.wb3.beta, display="filled.contour", col.fun=function(n){seq.col(n, alpha=1)}, border=1, lwd=1, drawlabels=FALSE, xaxs="i", yaxs="i", cont=seq(10,90,by=20), asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
rect(0, 0, 100, 100, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.3e
## linear boundary kernels
fhat.wb3.LB <- kde.boundary(x=wb3, xmin=c(0,0), xmax=c(100,100), boundary.kernel="linear")

bb.sub <- c(1:5,35,150)
for (i in bb.sub)
{
    bb1 <- ks:::dmvnorm.LB.kernel.2d(x=wb3[i,], xmax=c(100,100), H=fhat.wb3.LB$H)
    plot(bb1, display="slice", lwd=1, drawlabels=FALSE, add=i>1, asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
}
points(wb3[bb.sub,], cex=1, pch=21, col=grey(0,alpha=0.5), bg=pt.col(4))
rect(0, 0, 100, 100, border=grey(0.5, alpha=0.5), lty=2)

## Fig 4.3f
## linear boundary kernel density estimate
plot(fhat.wb3.LB, display="filled.contour", col.fun=function(n){seq.col(n, alpha=1)}, border=1, lwd=1, drawlabels=FALSE, cont=seq(10,90,by=20), asp=1, xaxs="i", yaxs="i", xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
rect(0, 0, 100, 100, border=grey(0.5, alpha=0.5), lty=2)