# Cyrus Samii # RD with missing data, with IPW sensivity analysis rm(list = ls()) # For ignorable missingness # Fake data set.seed(1234) n <- 100 x <- runif(n)*20 E.y <- .1*(x-max(x))^2 + 2 + x y.all.sc <- E.y + rnorm(n,0,sd=2) y.all <- sd(x)*(y.all.sc/sd(y.all.sc)) index <- 1:n # Missingness invlogit <- function(x){(1+exp(-x))^(-1)} p.r <- 1-invlogit(-2+.25*x) r <- rbinom(n,1,p.r) y <- y.all y[r==0] <- NA # Suppose the goal is to use a local linear approximation # to estimate the value at the cutpoint (=0). # Go through each of the below step by step and look at what # happens on the graph. # This gives the linear approximation if we had no missingness: par(mfrow=c(1,2)) plot(x,y.all, pch=19, col="gray", xlim=c(-2,max(x)), main="Ignorable case") abline(v=0, lty="dashed") abline(lm(y.all~x), col="gray") text(0, coef(lm(y.all~x))[1], labels="all data", cex=.75) # This is the linear approx with the complete cases: points(x,y, col="red", pch=19) abline(lm(y~x), col="red") text(0, coef(lm(y~x))[1], labels="compl. cases", col="red", cex=.75) # These are worst case bounds: y.max <- y y.max[r==0] <- max(y, na.rm=T) points(x, y.max, col="blue", cex=.5) abline(lm(y.max~x), col="blue") text(0, coef(lm(y.max~x))[1], labels="imputation lower bound", col="blue", cex=.75) y.min <- y y.min[r==0] <- min(y, na.rm=T) points(x, y.min, col="green", cex=.5) abline(lm(y.min~x), col="green") text(0, coef(lm(y.min~x))[1], labels="imputation upper bound", col="green", cex=.75) # This is the IPW adjusted linear approximation: logitfit <- glm(r~x, family="binomial") p.hat <- predict(logitfit, type="response") pweight <- 1/p.hat points(x,y, col="purple", cex=sqrt(pweight)) abline(lm(y~x, w=pweight), col="purple") text(0, coef(lm(y~x, w=pweight))[1], labels="IPW", col="purple", cex=.75) # IPW sensitivity analysis: # Get residualized y: olsfit <- lm(y~x) resids <- y-predict(olsfit, newdata=data.frame(x)) resids.xscaled <- sd(x)*resids/sd(resids, na.rm=T) # This is IPW adjusted linear approximation but assuming # that ignorability is violated to an extent that residualized # y has as much explanatory power as x in the positive direction: p.hat.hi <- invlogit(predict(logitfit, type="link")+.25*resids.xscaled) pweight.hi <- 1/p.hat.hi points(x,y, col="purple", cex=sqrt(pweight.hi)) abline(lm(y~x, w=pweight.hi), col="orange") text(0, coef(lm(y~x, w=pweight.hi))[1], labels="IPW low", col="orange", cex=.75) # This is IPW adjusted linear approximation but assuming # that ignorability is violated to an extent that residualized # y has as much explanatory power as x in the negative direction: p.hat.lo <- invlogit(predict(logitfit, type="link")-.25*resids.xscaled) pweight.lo <- 1/p.hat.lo points(x,y, col="purple", cex=sqrt(pweight.lo)) abline(lm(y~x, w=pweight.lo), col="pink") text(0, coef(lm(y~x, w=pweight.lo))[1], labels="IPW high", col="pink", cex=.75) # Now non-ignorable missingness # Missingness invlogit <- function(x){(1+exp(-x))^(-1)} p.r <- 1-invlogit(-2+.25*(y.all-mean(y.all))) r <- rbinom(n,1,p.r) y <- y.all y[r==0] <- NA # Suppose the goal is to use a local linear approximation # to estimate the value at the cutpoint (=0). # Go through each of the below step by step and look at what # happens on the graph. # This gives the linear approximation if we had no missingness: plot(x,y.all, pch=19, col="gray", xlim=c(-2,max(x)), main="Non-ignorable case") abline(v=0, lty="dashed") abline(lm(y.all~x), col="gray") text(0, coef(lm(y.all~x))[1], labels="all data", col="gray", cex=.75) # This is the linear approx with the complete cases: points(x,y, col="red", pch=19) abline(lm(y~x), col="red") text(0, coef(lm(y~x))[1], labels="compl. cases", col="red", cex=.75) # These are worst case bounds: y.max <- y y.max[r==0] <- max(y, na.rm=T) points(x, y.max, col="blue", cex=.5) abline(lm(y.max~x), col="blue") text(0, coef(lm(y.max~x))[1], labels="imputation lower bound", col="blue", cex=.75) y.min <- y y.min[r==0] <- min(y, na.rm=T) points(x, y.min, col="green", cex=.5) abline(lm(y.min~x), col="green") text(0, coef(lm(y.min~x))[1], labels="imputation upper bound", col="green", cex=.75) # This is the IPW adjusted linear approximation: logitfit <- glm(r~x, family="binomial") p.hat <- predict(logitfit, type="response") pweight <- 1/p.hat points(x,y, col="purple", cex=sqrt(pweight)) abline(lm(y~x, w=pweight), col="purple") text(0, coef(lm(y~x, w=pweight))[1], labels="IPW", col="purple", cex=.75) # IPW sensitivity analysis: # Get residualized y: olsfit <- lm(y~x) resids <- y-predict(olsfit, newdata=data.frame(x)) resids.xscaled <- sd(x)*resids/sd(resids, na.rm=T) # This is IPW adjusted linear approximation but assuming # that ignorability is violated to an extent that residualized # y has as much explanatory power as x in the positive direction: p.hat.hi <- invlogit(predict(logitfit, type="link")+.25*resids.xscaled) pweight.hi <- 1/p.hat.hi points(x,y, col="purple", cex=sqrt(pweight.hi)) abline(lm(y~x, w=pweight.hi), col="orange") text(0, coef(lm(y~x, w=pweight.hi))[1], labels="IPW low", col="orange", cex=.75) # This is IPW adjusted linear approximation but assuming # that ignorability is violated to an extent that residualized # y has as much explanatory power as x in the negative direction: p.hat.lo <- invlogit(predict(logitfit, type="link")-.25*resids.xscaled) pweight.lo <- 1/p.hat.lo points(x,y, col="purple", cex=sqrt(pweight.lo)) abline(lm(y~x, w=pweight.lo), col="pink") text(0, coef(lm(y~x, w=pweight.lo))[1], labels="IPW high", col="pink", cex=.75)