Skip to content

Commit

Permalink
Renaming Lei (2018) examples
Browse files Browse the repository at this point in the history
  • Loading branch information
ryantibs committed Apr 18, 2019
1 parent 77bfa6d commit ca7de81
Show file tree
Hide file tree
Showing 74 changed files with 873 additions and 0 deletions.
Binary file added lei2018/.DS_Store
Binary file not shown.
101 changes: 101 additions & 0 deletions lei2018/fig.local.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Plot an example comparison of usual to locally-weighted conformal, on
# heteroskedastic data
library(conformalInference)

# Generate some example training data, clearly heteroskedastic
set.seed(33)
n = 1000
x = runif(n,0,2*pi)
y = sin(x) + x*pi/30*rnorm(n)

# Generate some example test data
n0 = 5000
x0 = seq(0,2*pi,length=n0)
y0 = sin(x0) + x0*pi/30*rnorm(n0)
o = order(x0)

# Smoothing spline training and prediction functions, where the smoothing
# parameter is chosen via cross-validation
funs = smooth.spline.funs(cv=TRUE)

# Split conformal, using smoothing splines
out.split = conformal.pred.split(x, y, x0, alpha=0.1, seed=10,
train.fun=funs$train, predict.fun=funs$predict)
out.split.cov = out.split$lo <= y0 & y0 <= out.split$up
out.split.len = out.split$up - out.split$lo

pcol = "gray50"
mar = c(4.25,4.25,3.25,1)
w = 5
h = 5.5

pdf(file="fig/sin.split.pdf",w=w,h=h)
par(mar=mar)
plot(c(), c(), xlab="X", ylab="Y", xlim=range(x0),
ylim=range(c(y0,out.split$lo,out.split$up)), col="white",
main=paste0("Split Conformal Prediction Intervals\n",
sprintf("Coverage: %0.3f, Average Length: %0.3f",
mean(out.split.cov),mean(out.split.len))))
polygon(c(x0[o],rev(x0[o])), c(out.split$lo[o],rev(out.split$up[o])),
col="pink", border=NA)
lines(x0[o], out.split$pred[o], lwd=2, col="red")
points(x, y, col=pcol)
graphics.off()
cat("fig/sin.split.pdf\n")

# Split conformal, using smooth splines for both mean and residual
# estimation
out.split.local = conformal.pred.split(x, y, x0, alpha=0.1, seed=10,
train.fun=funs$train, predict.fun=funs$predict,
mad.train.fun=funs$train, mad.predict.fun=funs$predict)
out.split.local.cov = out.split.local$lo <= y0 & y0 <= out.split.local$up
out.split.local.len = out.split.local$up - out.split.local$lo

pdf(file="fig/sin.split.local.pdf",w=w,h=h)
par(mar=mar)
plot(c(), c(), xlab="X", ylab="Y", xlim=range(x0),
ylim=range(c(y0,out.split.local$lo,out.split.local$up)), col="white",
main=paste0("Locally-Weighted Split Conformal\n",
sprintf("Coverage: %0.3f, Average Length: %0.3f",
mean(out.split.local.cov),mean(out.split.local.len))))
polygon(c(x0[o],rev(x0[o])),
c(out.split.local$lo[o],rev(out.split.local$up[o])),
col="lightblue", border=NA)
lines(x0[o], out.split.local$pred[o], lwd=2, col="blue")
points(x, y, col=pcol)
graphics.off()
cat("fig/sin.split.local.pdf\n")

# Plot local coverage and length

# Smoothing spline
s.split.cov = smooth.spline(x0,as.numeric(out.split.cov),cv=TRUE)$y
s.split.local.cov = smooth.spline(x0,as.numeric(out.split.local.cov),cv=TRUE)$y
s.split.len = smooth.spline(x0,as.numeric(out.split.len),cv=TRUE)$y
s.split.local.len = smooth.spline(x0,as.numeric(out.split.local.len),cv=TRUE)$y

pdf(file="fig/sin.coverages.pdf",w=w,h=h)
par(mar=mar)
plot(c(), c(), xlab="X", ylab="Local coverage", xlim=range(x0),
ylim=c(0,1), main="Local Coverage of Prediction Intervals")
lines(x0, s.split.cov, col="red")
lines(x0, s.split.local.cov, col="blue")
legend("bottomright", c("red","blue"), lty=c(1,1), col=c("red","blue"),
legend=c("Usual","Locally-weighted"))
graphics.off()
cat("fig/sin.coverages.pdf\n")

pdf(file="fig/sin.lengths.pdf",w=w,h=h)
par(mar=mar)
plot(c(), c(), xlab="X", ylab="Local length", xlim=range(x0),
ylim=range(c(s.split.len,s.split.local.len)),
main="Local Length of Prediction Intervals")
lines(x0, s.split.len, col="red")
lines(x0, s.split.local.len, col="blue")
legend("bottomright", c("red","blue"), lty=c(1,1), col=c("red","blue"),
legend=c("Usual","Locally-weighted"))
graphics.off()
cat("fig/sin.lengths.pdf\n")



142 changes: 142 additions & 0 deletions lei2018/fig.loco.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Plot a few examples of LOCO analyses
library(conformalInference)

##### First the local type #####

# Generate some training data
set.seed(33)
n = 1000; p = 6
x = matrix(runif(n*p,-1,1),n,p)
f1 = function(u){sin((u+1)*pi)*(u<0)}
f2 = function(u){sin(u*pi)}
f3 = function(u){sin((u+1)*pi)*(u>0)}

mu = rowSums(cbind(f1(x[,1]),f2(x[,2]),f3(x[,3])))
y = mu + rnorm(n,0,0.1)

# Form training and prediction functions for a low-dimensional additive
# model, using the gam package
library(gam)
train.fun = function(x,y) {
p = ncol(x)
datafr = data.frame(x)
str = paste("y ~",paste(paste0("s(X",1:p,",df=",5,")"),collapse="+"))
return(gam(formula(str),data=datafr))
}
predict.fun = function(out,x0) {
return(predict(out,data.frame(x0)))
}

# Use LOCO, with ROO conformal to form the prediction intervals for
# excess test error
out.loco = loco.roo(x, y, alpha=0.1, vars=0,
train.fun=train.fun, predict.fun=predict.fun, verb=TRUE)

# Compute "c-values", the proportion of intervals that have a left endpoint
# less than or equal to 0
cvals = colMeans(out.loco$lo[,,1] <= 0)
cvals

mar = c(4.25,4.25,3.25,1)
w = 4
h = 4.5
xlab = "Location"
ylab = "Interval"

# For each variable in consideration, plot the intervals
for (j in 1:p) {
pdf(file=sprintf("fig/loco.am.comp%i.pdf",j),w=w,h=h)
par(mar=mar)
plot(x[,j],x[,j],ylim=range(c(out.loco$lo[,j,1],out.loco$up[,j,1])),
xlab=xlab,ylab=ylab,main=paste("Component",j),col=NA)
cols = ifelse(out.loco$lo[,j,1] <= 0, 1, 3)
segments(x[,j],out.loco$lo[,j,1],x[,j],out.loco$up[,j,1],col=cols)
abline(h=0, lty=2, lwd=2, col=2)
graphics.off()
cat(sprintf("fig/loco.am.comp%i.pdf\n",j))
}
cat("\n")

##### Next the global type #####

# Generate some example training data
set.seed(33)
n = 200; p = 500; s = 5
x = matrix(rnorm(n*p),n,p)
beta = 2*c(rnorm(s),rep(0,p-s))
y = x %*% beta + rnorm(n)

# Lasso training and prediction functions, using cross-validation over 100
# values of lambda, with the 1se rule
my.lasso.funs = lasso.funs(nlambda=100,cv=TRUE,cv.rule="1se")

# Split-sample LOCO analysis
out.lasso = loco(x, y, alpha=0.1, train.fun=my.lasso.funs$train,
predict.fun=my.lasso.funs$predict, active.fun=my.lasso.funs$active.fun,
verbose=TRUE)

# Print out Wilcoxon results
print(out.lasso, test="wilcox")

# Plot the Wilcoxon intervals
mar = c(4.25,4.25,3.25,1)
w = 5
h = 5.5

pdf(file="fig/loco.lasso.pdf",w=w,h=h)
par(mar=mar)
ylim = range(out.lasso$inf.wilcox[[1]][,2:3])
J = length(out.lasso$active[[1]])
plot(c(), c(), xlim=c(1,J), ylim=ylim, xaxt="n",
main="LOCO Analysis from Lasso + CV Model",
xlab="Variable", ylab="Confidence interval")
axis(side=1, at=1:J, labels=FALSE)
for (j in 1:J) {
axis(side=1, at=j, labels=out.lasso$active[[1]][j], cex.axis=0.75,
line=0.5*j%%2)
}
abline(h=0)
segments(1:J, out.lasso$inf.wilcox[[1]][,2],
1:J, out.lasso$inf.wilcox[[1]][,3],
col="red", lwd=2)
graphics.off()
cat("\nfig/loco.lasso.pdf\n\n")

# Now repeat the analysis with forward stepwise, using cross-validation over
# 50 steps, with the 1se rule

# Forward stepwise training and prediction functions
my.step.funs = lars.funs(type="step",max.steps=50,cv=TRUE,cv.rule="1se")

# Split-sample LOCO analysis
out.step = loco(x, y, alpha=0.1, train.fun=my.step.funs$train,
predict.fun=my.step.funs$predict, active.fun=my.step.funs$active.fun,
verbose=TRUE)

# Print out Wilcoxon results
print(out.step, test="wilcox")

# Plot the Wilcoxon intervals
mar = c(4.25,4.25,3.25,1)
w = 5
h = 5.5

pdf(file="fig/loco.step.pdf",w=w,h=h)
par(mar=mar)
ylim = range(out.step$inf.wilcox[[1]][,2:3])
J = length(out.step$active[[1]])
plot(c(), c(), xlim=c(1,J), ylim=ylim, xaxt="n",
main="LOCO Analysis from Stepwise + CV Model",
xlab="Variable", ylab="Confidence interval")
axis(side=1, at=1:J, labels=FALSE)
for (j in 1:J) {
axis(side=1, at=j, labels=out.step$active[[1]][j], cex.axis=0.75,
line=0.5*j%%2)
}
abline(h=0)
segments(1:J, out.step$inf.wilcox[[1]][,2],
1:J, out.step$inf.wilcox[[1]][,3],
col="red", lwd=2)
graphics.off()
cat("\nfig/loco.step.pdf\n\n")

27 changes: 27 additions & 0 deletions lei2018/fig.many.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Plot all comparisons of conformal methods, across settings A through E,
# low- and high-dimensional
library(conformalInference)

dims = c("lo","hi")
method.nums = c(1,2,5,3,4)
method.names = c("Lasso","Elastic net","Stepwise","Spam","Random forest")
cols = c(1,2,"purple",3,4)
lets = c("A","B","C","D")
w = 5
h = 5.5

for (dim in dims) {
for (j in 1:length(lets)) {
name = paste0("many.",dim,".sim",lets[j])
x = suppressWarnings(tryCatch({readRDS(paste0("rds/",name,".rds"))},
error=function(e) TRUE ))
if (isTRUE(x)) next

main = paste0(c("Coverage","Length","Test Error"),", Setting ",lets[j])
plot(x, se=T, log="", method.nums=method.nums, method.names=method.names,
main=main, cols=cols, make.pdf=T, fig.dir="fig/", file.prefix=name,
cex.main=1.5, w=w, h=h)

cat(paste0("fig/",name,".pdf\n"))
}}

Binary file added lei2018/fig/.DS_Store
Binary file not shown.
Binary file added lei2018/fig/loco.am.comp1.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.am.comp2.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.am.comp3.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.am.comp4.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.am.comp5.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.am.comp6.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.lasso.pdf
Binary file not shown.
Binary file added lei2018/fig/loco.step.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simA.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simA.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simA.len.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simB.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simB.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simB.len.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simC.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simC.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simC.len.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simD.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simD.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.hi.simD.len.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simA.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simA.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simA.len.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simB.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simB.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simB.len.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simC.cov.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simC.err.pdf
Binary file not shown.
Binary file added lei2018/fig/many.lo.simC.len.pdf
Binary file not shown.
Binary file added lei2018/fig/sin.coverages.pdf
Binary file not shown.
Binary file added lei2018/fig/sin.lengths.pdf
Binary file not shown.
Binary file added lei2018/fig/sin.split.local.pdf
Binary file not shown.
Binary file added lei2018/fig/sin.split.pdf
Binary file not shown.
Binary file added lei2018/rds/.DS_Store
Binary file not shown.
Binary file added lei2018/rds/lm.hi.simA.rds
Binary file not shown.
Binary file added lei2018/rds/lm.hi.simB.rds
Binary file not shown.
Binary file added lei2018/rds/lm.hi.simC.rds
Binary file not shown.
Binary file added lei2018/rds/lm.lo.simA.rds
Binary file not shown.
Binary file added lei2018/rds/lm.lo.simB.rds
Binary file not shown.
Binary file added lei2018/rds/lm.lo.simC.rds
Binary file not shown.
Binary file added lei2018/rds/many.hi.simA.rds
Binary file not shown.
Binary file added lei2018/rds/many.hi.simB.rds
Binary file not shown.
Binary file added lei2018/rds/many.hi.simC.rds
Binary file not shown.
Binary file added lei2018/rds/many.hi.simD.rds
Binary file not shown.
Binary file added lei2018/rds/many.lo.simA.rds
Binary file not shown.
Binary file added lei2018/rds/many.lo.simB.rds
Binary file not shown.
Binary file added lei2018/rds/many.lo.simC.rds
Binary file not shown.
106 changes: 106 additions & 0 deletions lei2018/sim.lm.hi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
## Linear regression: test out conformal intervals and parametric intervals
## across a variety of high-dimensional settings
library(conformalInference)

# Set some overall simulation parameters
n = 500; p = 490 # Numbers of observations and features
s = 10 # Number of truly relevant features
n0 = 100 # Number of points at which to make predictions
nrep = 50 # Number of repetitions for a given setting
sigma = 1 # Marginal error standard deviation
bval = 1 # Magnitude of nonzero coefficients
lambda = c(0,1,10,50) # Lambda values to try in ridge regression
alpha = 0.1 # Miscoverage level

# Define conformal inference functions: these are basically just wrappers
# around a particular instatiation of conformal.pred, conformal.pred.jack, or
# conformal.pred.split
my.lm.funs = lm.funs(lambda=lambda)
my.conf.fun = function(x, y, x0) {
conformal.pred(x,y,x0,alpha=alpha,verb="\t\t",
train.fun=my.lm.funs$train,
predict.fun=my.lm.funs$predict)
}
my.jack.fun = function(x, y, x0) {
conformal.pred.jack(x,y,x0,alpha=alpha,verb="\t\t",
train.fun=my.lm.funs$train,
predict.fun=my.lm.funs$predict,
special.fun=my.lm.funs$special)
}
my.lm.funs.2 = lm.funs(lambda=c(1e-8,lambda[-1]))
my.split.fun = function(x, y, x0) {
conformal.pred.split(x,y,x0,alpha=alpha,
train.fun=my.lm.funs.2$train,
predict.fun=my.lm.funs.2$predict)
}

# Hack together our own "conformal" inference function, really, just one that
# returns the parametric intervals
my.param.fun = function(x, y, x0) {
n = nrow(x); n0 = nrow(x0)
out = my.lm.funs$train(x,y)
fit = matrix(my.lm.funs$predict(out,x),nrow=n)
pred = matrix(my.lm.funs$predict(out,x0),nrow=n0)
m = ncol(pred)

x1 = cbind(rep(1,n0),x0)
q = qt(1-alpha/2, n-p-1)
lo = up = matrix(0,n0,m)

for (j in 1:m) {
sig.hat = sqrt(sum((y - fit[,j])^2)/(n-ncol(x1)))
g = diag(x1 %*% chol.solve(out$chol.R[[j]], t(x1)))
lo[,j] = pred[,j] - sqrt(1+g)*sig.hat*q
up[,j] = pred[,j] + sqrt(1+g)*sig.hat*q
}

# Return proper outputs in proper formatting
return(list(pred=pred,lo=lo,up=up,fit=fit))
}

# Lastly, define a split conformal function that uses cross-validation
my.ridge.funs = ridge.funs(cv=TRUE)
my.split.cv.fun = function(x, y, x0) {
return(conformal.pred.split(x,y,x0,alpha=alpha,
train.fun=my.ridge.funs$train,
predict.fun=my.ridge.funs$predict))
}

# Now put together a list with all of our conformal inference functions
conformal.pred.funs = list(my.conf.fun, my.jack.fun, my.split.fun, my.param.fun,
my.split.cv.fun)
names(conformal.pred.funs) = c("Conformal","Jackknife","Split conformal",
"Parametric","Split + CV")

path = "rds/lm.hi2."
#source("sim.setting.a.R")
#source("sim.setting.b.R")
source("sim.setting.c.R")

## # What values of lambda did split+CV choose?
## lambda.a = lambda.b = lambda.c = c()
## for (r in 1:nrep) {
## xy.a = sim.xy(n, p, x.dist="normal", cor="none",
## mean.fun="linear", s=s, error.dist="normal", sigma=sigma,
## bval=bval, sigma.type="const")
## i = sample(n,floor(n/2))
## lambda.a = c(lambda.a, n/2*cv.glmnet(xy.a$x[i,],xy.a$y[i])$lambda.min)

## xy.b = sim.xy(n, p, x.dist="normal", cor="none",
## mean.fun="additive", m=4, s=s, error.dist="t", df=2,
## sigma=sigma, bval=bval, sigma.type="const")
## i = sample(n,floor(n/2))
## lambda.b = c(lambda.b, n/2*cv.glmnet(xy.b$x[i,],xy.b$y[i])$lambda.min)

## xy.c = sim.xy(n, p, x.dist="mix", cor="auto", k=5,
## mean.fun="linear", error.dist="t", df=2, sigma=sigma, bval=bval,
## sigma.type="var")
## i = sample(n,floor(n/2))
## lambda.c = c(lambda.c, n/2*cv.glmnet(xy.c$x[i,],xy.c$y[i])$lambda.min)

## cat(paste0(r,"."))
## }
## cat("\n")
## cat(mean(lambda.a),"\n") # 42.72443
## cat(mean(lambda.b),"\n") # 100.7886
## cat(mean(lambda.c),"\n") # 618.3248
Loading

0 comments on commit ca7de81

Please sign in to comment.