-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
74 changed files
with
873 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# Plot an example comparison of usual to locally-weighted conformal, on | ||
# heteroskedastic data | ||
library(conformalInference) | ||
|
||
# Generate some example training data, clearly heteroskedastic | ||
set.seed(33) | ||
n = 1000 | ||
x = runif(n,0,2*pi) | ||
y = sin(x) + x*pi/30*rnorm(n) | ||
|
||
# Generate some example test data | ||
n0 = 5000 | ||
x0 = seq(0,2*pi,length=n0) | ||
y0 = sin(x0) + x0*pi/30*rnorm(n0) | ||
o = order(x0) | ||
|
||
# Smoothing spline training and prediction functions, where the smoothing | ||
# parameter is chosen via cross-validation | ||
funs = smooth.spline.funs(cv=TRUE) | ||
|
||
# Split conformal, using smoothing splines | ||
out.split = conformal.pred.split(x, y, x0, alpha=0.1, seed=10, | ||
train.fun=funs$train, predict.fun=funs$predict) | ||
out.split.cov = out.split$lo <= y0 & y0 <= out.split$up | ||
out.split.len = out.split$up - out.split$lo | ||
|
||
pcol = "gray50" | ||
mar = c(4.25,4.25,3.25,1) | ||
w = 5 | ||
h = 5.5 | ||
|
||
pdf(file="fig/sin.split.pdf",w=w,h=h) | ||
par(mar=mar) | ||
plot(c(), c(), xlab="X", ylab="Y", xlim=range(x0), | ||
ylim=range(c(y0,out.split$lo,out.split$up)), col="white", | ||
main=paste0("Split Conformal Prediction Intervals\n", | ||
sprintf("Coverage: %0.3f, Average Length: %0.3f", | ||
mean(out.split.cov),mean(out.split.len)))) | ||
polygon(c(x0[o],rev(x0[o])), c(out.split$lo[o],rev(out.split$up[o])), | ||
col="pink", border=NA) | ||
lines(x0[o], out.split$pred[o], lwd=2, col="red") | ||
points(x, y, col=pcol) | ||
graphics.off() | ||
cat("fig/sin.split.pdf\n") | ||
|
||
# Split conformal, using smooth splines for both mean and residual | ||
# estimation | ||
out.split.local = conformal.pred.split(x, y, x0, alpha=0.1, seed=10, | ||
train.fun=funs$train, predict.fun=funs$predict, | ||
mad.train.fun=funs$train, mad.predict.fun=funs$predict) | ||
out.split.local.cov = out.split.local$lo <= y0 & y0 <= out.split.local$up | ||
out.split.local.len = out.split.local$up - out.split.local$lo | ||
|
||
pdf(file="fig/sin.split.local.pdf",w=w,h=h) | ||
par(mar=mar) | ||
plot(c(), c(), xlab="X", ylab="Y", xlim=range(x0), | ||
ylim=range(c(y0,out.split.local$lo,out.split.local$up)), col="white", | ||
main=paste0("Locally-Weighted Split Conformal\n", | ||
sprintf("Coverage: %0.3f, Average Length: %0.3f", | ||
mean(out.split.local.cov),mean(out.split.local.len)))) | ||
polygon(c(x0[o],rev(x0[o])), | ||
c(out.split.local$lo[o],rev(out.split.local$up[o])), | ||
col="lightblue", border=NA) | ||
lines(x0[o], out.split.local$pred[o], lwd=2, col="blue") | ||
points(x, y, col=pcol) | ||
graphics.off() | ||
cat("fig/sin.split.local.pdf\n") | ||
|
||
# Plot local coverage and length | ||
|
||
# Smoothing spline | ||
s.split.cov = smooth.spline(x0,as.numeric(out.split.cov),cv=TRUE)$y | ||
s.split.local.cov = smooth.spline(x0,as.numeric(out.split.local.cov),cv=TRUE)$y | ||
s.split.len = smooth.spline(x0,as.numeric(out.split.len),cv=TRUE)$y | ||
s.split.local.len = smooth.spline(x0,as.numeric(out.split.local.len),cv=TRUE)$y | ||
|
||
pdf(file="fig/sin.coverages.pdf",w=w,h=h) | ||
par(mar=mar) | ||
plot(c(), c(), xlab="X", ylab="Local coverage", xlim=range(x0), | ||
ylim=c(0,1), main="Local Coverage of Prediction Intervals") | ||
lines(x0, s.split.cov, col="red") | ||
lines(x0, s.split.local.cov, col="blue") | ||
legend("bottomright", c("red","blue"), lty=c(1,1), col=c("red","blue"), | ||
legend=c("Usual","Locally-weighted")) | ||
graphics.off() | ||
cat("fig/sin.coverages.pdf\n") | ||
|
||
pdf(file="fig/sin.lengths.pdf",w=w,h=h) | ||
par(mar=mar) | ||
plot(c(), c(), xlab="X", ylab="Local length", xlim=range(x0), | ||
ylim=range(c(s.split.len,s.split.local.len)), | ||
main="Local Length of Prediction Intervals") | ||
lines(x0, s.split.len, col="red") | ||
lines(x0, s.split.local.len, col="blue") | ||
legend("bottomright", c("red","blue"), lty=c(1,1), col=c("red","blue"), | ||
legend=c("Usual","Locally-weighted")) | ||
graphics.off() | ||
cat("fig/sin.lengths.pdf\n") | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
# Plot a few examples of LOCO analyses | ||
library(conformalInference) | ||
|
||
##### First the local type ##### | ||
|
||
# Generate some training data | ||
set.seed(33) | ||
n = 1000; p = 6 | ||
x = matrix(runif(n*p,-1,1),n,p) | ||
f1 = function(u){sin((u+1)*pi)*(u<0)} | ||
f2 = function(u){sin(u*pi)} | ||
f3 = function(u){sin((u+1)*pi)*(u>0)} | ||
|
||
mu = rowSums(cbind(f1(x[,1]),f2(x[,2]),f3(x[,3]))) | ||
y = mu + rnorm(n,0,0.1) | ||
|
||
# Form training and prediction functions for a low-dimensional additive | ||
# model, using the gam package | ||
library(gam) | ||
train.fun = function(x,y) { | ||
p = ncol(x) | ||
datafr = data.frame(x) | ||
str = paste("y ~",paste(paste0("s(X",1:p,",df=",5,")"),collapse="+")) | ||
return(gam(formula(str),data=datafr)) | ||
} | ||
predict.fun = function(out,x0) { | ||
return(predict(out,data.frame(x0))) | ||
} | ||
|
||
# Use LOCO, with ROO conformal to form the prediction intervals for | ||
# excess test error | ||
out.loco = loco.roo(x, y, alpha=0.1, vars=0, | ||
train.fun=train.fun, predict.fun=predict.fun, verb=TRUE) | ||
|
||
# Compute "c-values", the proportion of intervals that have a left endpoint | ||
# less than or equal to 0 | ||
cvals = colMeans(out.loco$lo[,,1] <= 0) | ||
cvals | ||
|
||
mar = c(4.25,4.25,3.25,1) | ||
w = 4 | ||
h = 4.5 | ||
xlab = "Location" | ||
ylab = "Interval" | ||
|
||
# For each variable in consideration, plot the intervals | ||
for (j in 1:p) { | ||
pdf(file=sprintf("fig/loco.am.comp%i.pdf",j),w=w,h=h) | ||
par(mar=mar) | ||
plot(x[,j],x[,j],ylim=range(c(out.loco$lo[,j,1],out.loco$up[,j,1])), | ||
xlab=xlab,ylab=ylab,main=paste("Component",j),col=NA) | ||
cols = ifelse(out.loco$lo[,j,1] <= 0, 1, 3) | ||
segments(x[,j],out.loco$lo[,j,1],x[,j],out.loco$up[,j,1],col=cols) | ||
abline(h=0, lty=2, lwd=2, col=2) | ||
graphics.off() | ||
cat(sprintf("fig/loco.am.comp%i.pdf\n",j)) | ||
} | ||
cat("\n") | ||
|
||
##### Next the global type ##### | ||
|
||
# Generate some example training data | ||
set.seed(33) | ||
n = 200; p = 500; s = 5 | ||
x = matrix(rnorm(n*p),n,p) | ||
beta = 2*c(rnorm(s),rep(0,p-s)) | ||
y = x %*% beta + rnorm(n) | ||
|
||
# Lasso training and prediction functions, using cross-validation over 100 | ||
# values of lambda, with the 1se rule | ||
my.lasso.funs = lasso.funs(nlambda=100,cv=TRUE,cv.rule="1se") | ||
|
||
# Split-sample LOCO analysis | ||
out.lasso = loco(x, y, alpha=0.1, train.fun=my.lasso.funs$train, | ||
predict.fun=my.lasso.funs$predict, active.fun=my.lasso.funs$active.fun, | ||
verbose=TRUE) | ||
|
||
# Print out Wilcoxon results | ||
print(out.lasso, test="wilcox") | ||
|
||
# Plot the Wilcoxon intervals | ||
mar = c(4.25,4.25,3.25,1) | ||
w = 5 | ||
h = 5.5 | ||
|
||
pdf(file="fig/loco.lasso.pdf",w=w,h=h) | ||
par(mar=mar) | ||
ylim = range(out.lasso$inf.wilcox[[1]][,2:3]) | ||
J = length(out.lasso$active[[1]]) | ||
plot(c(), c(), xlim=c(1,J), ylim=ylim, xaxt="n", | ||
main="LOCO Analysis from Lasso + CV Model", | ||
xlab="Variable", ylab="Confidence interval") | ||
axis(side=1, at=1:J, labels=FALSE) | ||
for (j in 1:J) { | ||
axis(side=1, at=j, labels=out.lasso$active[[1]][j], cex.axis=0.75, | ||
line=0.5*j%%2) | ||
} | ||
abline(h=0) | ||
segments(1:J, out.lasso$inf.wilcox[[1]][,2], | ||
1:J, out.lasso$inf.wilcox[[1]][,3], | ||
col="red", lwd=2) | ||
graphics.off() | ||
cat("\nfig/loco.lasso.pdf\n\n") | ||
|
||
# Now repeat the analysis with forward stepwise, using cross-validation over | ||
# 50 steps, with the 1se rule | ||
|
||
# Forward stepwise training and prediction functions | ||
my.step.funs = lars.funs(type="step",max.steps=50,cv=TRUE,cv.rule="1se") | ||
|
||
# Split-sample LOCO analysis | ||
out.step = loco(x, y, alpha=0.1, train.fun=my.step.funs$train, | ||
predict.fun=my.step.funs$predict, active.fun=my.step.funs$active.fun, | ||
verbose=TRUE) | ||
|
||
# Print out Wilcoxon results | ||
print(out.step, test="wilcox") | ||
|
||
# Plot the Wilcoxon intervals | ||
mar = c(4.25,4.25,3.25,1) | ||
w = 5 | ||
h = 5.5 | ||
|
||
pdf(file="fig/loco.step.pdf",w=w,h=h) | ||
par(mar=mar) | ||
ylim = range(out.step$inf.wilcox[[1]][,2:3]) | ||
J = length(out.step$active[[1]]) | ||
plot(c(), c(), xlim=c(1,J), ylim=ylim, xaxt="n", | ||
main="LOCO Analysis from Stepwise + CV Model", | ||
xlab="Variable", ylab="Confidence interval") | ||
axis(side=1, at=1:J, labels=FALSE) | ||
for (j in 1:J) { | ||
axis(side=1, at=j, labels=out.step$active[[1]][j], cex.axis=0.75, | ||
line=0.5*j%%2) | ||
} | ||
abline(h=0) | ||
segments(1:J, out.step$inf.wilcox[[1]][,2], | ||
1:J, out.step$inf.wilcox[[1]][,3], | ||
col="red", lwd=2) | ||
graphics.off() | ||
cat("\nfig/loco.step.pdf\n\n") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Plot all comparisons of conformal methods, across settings A through E, | ||
# low- and high-dimensional | ||
library(conformalInference) | ||
|
||
dims = c("lo","hi") | ||
method.nums = c(1,2,5,3,4) | ||
method.names = c("Lasso","Elastic net","Stepwise","Spam","Random forest") | ||
cols = c(1,2,"purple",3,4) | ||
lets = c("A","B","C","D") | ||
w = 5 | ||
h = 5.5 | ||
|
||
for (dim in dims) { | ||
for (j in 1:length(lets)) { | ||
name = paste0("many.",dim,".sim",lets[j]) | ||
x = suppressWarnings(tryCatch({readRDS(paste0("rds/",name,".rds"))}, | ||
error=function(e) TRUE )) | ||
if (isTRUE(x)) next | ||
|
||
main = paste0(c("Coverage","Length","Test Error"),", Setting ",lets[j]) | ||
plot(x, se=T, log="", method.nums=method.nums, method.names=method.names, | ||
main=main, cols=cols, make.pdf=T, fig.dir="fig/", file.prefix=name, | ||
cex.main=1.5, w=w, h=h) | ||
|
||
cat(paste0("fig/",name,".pdf\n")) | ||
}} | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
## Linear regression: test out conformal intervals and parametric intervals | ||
## across a variety of high-dimensional settings | ||
library(conformalInference) | ||
|
||
# Set some overall simulation parameters | ||
n = 500; p = 490 # Numbers of observations and features | ||
s = 10 # Number of truly relevant features | ||
n0 = 100 # Number of points at which to make predictions | ||
nrep = 50 # Number of repetitions for a given setting | ||
sigma = 1 # Marginal error standard deviation | ||
bval = 1 # Magnitude of nonzero coefficients | ||
lambda = c(0,1,10,50) # Lambda values to try in ridge regression | ||
alpha = 0.1 # Miscoverage level | ||
|
||
# Define conformal inference functions: these are basically just wrappers | ||
# around a particular instatiation of conformal.pred, conformal.pred.jack, or | ||
# conformal.pred.split | ||
my.lm.funs = lm.funs(lambda=lambda) | ||
my.conf.fun = function(x, y, x0) { | ||
conformal.pred(x,y,x0,alpha=alpha,verb="\t\t", | ||
train.fun=my.lm.funs$train, | ||
predict.fun=my.lm.funs$predict) | ||
} | ||
my.jack.fun = function(x, y, x0) { | ||
conformal.pred.jack(x,y,x0,alpha=alpha,verb="\t\t", | ||
train.fun=my.lm.funs$train, | ||
predict.fun=my.lm.funs$predict, | ||
special.fun=my.lm.funs$special) | ||
} | ||
my.lm.funs.2 = lm.funs(lambda=c(1e-8,lambda[-1])) | ||
my.split.fun = function(x, y, x0) { | ||
conformal.pred.split(x,y,x0,alpha=alpha, | ||
train.fun=my.lm.funs.2$train, | ||
predict.fun=my.lm.funs.2$predict) | ||
} | ||
|
||
# Hack together our own "conformal" inference function, really, just one that | ||
# returns the parametric intervals | ||
my.param.fun = function(x, y, x0) { | ||
n = nrow(x); n0 = nrow(x0) | ||
out = my.lm.funs$train(x,y) | ||
fit = matrix(my.lm.funs$predict(out,x),nrow=n) | ||
pred = matrix(my.lm.funs$predict(out,x0),nrow=n0) | ||
m = ncol(pred) | ||
|
||
x1 = cbind(rep(1,n0),x0) | ||
q = qt(1-alpha/2, n-p-1) | ||
lo = up = matrix(0,n0,m) | ||
|
||
for (j in 1:m) { | ||
sig.hat = sqrt(sum((y - fit[,j])^2)/(n-ncol(x1))) | ||
g = diag(x1 %*% chol.solve(out$chol.R[[j]], t(x1))) | ||
lo[,j] = pred[,j] - sqrt(1+g)*sig.hat*q | ||
up[,j] = pred[,j] + sqrt(1+g)*sig.hat*q | ||
} | ||
|
||
# Return proper outputs in proper formatting | ||
return(list(pred=pred,lo=lo,up=up,fit=fit)) | ||
} | ||
|
||
# Lastly, define a split conformal function that uses cross-validation | ||
my.ridge.funs = ridge.funs(cv=TRUE) | ||
my.split.cv.fun = function(x, y, x0) { | ||
return(conformal.pred.split(x,y,x0,alpha=alpha, | ||
train.fun=my.ridge.funs$train, | ||
predict.fun=my.ridge.funs$predict)) | ||
} | ||
|
||
# Now put together a list with all of our conformal inference functions | ||
conformal.pred.funs = list(my.conf.fun, my.jack.fun, my.split.fun, my.param.fun, | ||
my.split.cv.fun) | ||
names(conformal.pred.funs) = c("Conformal","Jackknife","Split conformal", | ||
"Parametric","Split + CV") | ||
|
||
path = "rds/lm.hi2." | ||
#source("sim.setting.a.R") | ||
#source("sim.setting.b.R") | ||
source("sim.setting.c.R") | ||
|
||
## # What values of lambda did split+CV choose? | ||
## lambda.a = lambda.b = lambda.c = c() | ||
## for (r in 1:nrep) { | ||
## xy.a = sim.xy(n, p, x.dist="normal", cor="none", | ||
## mean.fun="linear", s=s, error.dist="normal", sigma=sigma, | ||
## bval=bval, sigma.type="const") | ||
## i = sample(n,floor(n/2)) | ||
## lambda.a = c(lambda.a, n/2*cv.glmnet(xy.a$x[i,],xy.a$y[i])$lambda.min) | ||
|
||
## xy.b = sim.xy(n, p, x.dist="normal", cor="none", | ||
## mean.fun="additive", m=4, s=s, error.dist="t", df=2, | ||
## sigma=sigma, bval=bval, sigma.type="const") | ||
## i = sample(n,floor(n/2)) | ||
## lambda.b = c(lambda.b, n/2*cv.glmnet(xy.b$x[i,],xy.b$y[i])$lambda.min) | ||
|
||
## xy.c = sim.xy(n, p, x.dist="mix", cor="auto", k=5, | ||
## mean.fun="linear", error.dist="t", df=2, sigma=sigma, bval=bval, | ||
## sigma.type="var") | ||
## i = sample(n,floor(n/2)) | ||
## lambda.c = c(lambda.c, n/2*cv.glmnet(xy.c$x[i,],xy.c$y[i])$lambda.min) | ||
|
||
## cat(paste0(r,".")) | ||
## } | ||
## cat("\n") | ||
## cat(mean(lambda.a),"\n") # 42.72443 | ||
## cat(mean(lambda.b),"\n") # 100.7886 | ||
## cat(mean(lambda.c),"\n") # 618.3248 |
Oops, something went wrong.