# Cyrus Samii
# RD with missing data, with IPW sensivity analysis

rm(list = ls())

# For ignorable missingness

# Fake data
set.seed(1234)
n <- 100
x <- runif(n)*20
E.y <- .1*(x-max(x))^2 + 2 + x
y.all.sc <- E.y + rnorm(n,0,sd=2)
y.all <- sd(x)*(y.all.sc/sd(y.all.sc))
index <- 1:n 

# Missingness
invlogit <- function(x){(1+exp(-x))^(-1)}
p.r <- 1-invlogit(-2+.25*x)
r <- rbinom(n,1,p.r)
y <- y.all
y[r==0] <- NA

# Suppose the goal is to use a local linear approximation
# to estimate the value at the cutpoint (=0).

# Go through each of the below step by step and look at what
# happens on the graph.

# This gives the linear approximation if we had no missingness:
par(mfrow=c(1,2))
plot(x,y.all, pch=19, col="gray", xlim=c(-2,max(x)), main="Ignorable case")
abline(v=0, lty="dashed")
abline(lm(y.all~x), col="gray")
text(0, coef(lm(y.all~x))[1], labels="all data", cex=.75)

# This is the linear approx with the complete cases:
points(x,y, col="red", pch=19)
abline(lm(y~x), col="red")
text(0, coef(lm(y~x))[1], labels="compl. cases", col="red", cex=.75)

# These are worst case bounds:
y.max <- y
y.max[r==0] <- max(y, na.rm=T)
points(x, y.max, col="blue", cex=.5)
abline(lm(y.max~x), col="blue")
text(0, coef(lm(y.max~x))[1], labels="imputation lower bound", col="blue", cex=.75)

y.min <- y
y.min[r==0] <- min(y, na.rm=T)
points(x, y.min, col="green", cex=.5)
abline(lm(y.min~x), col="green")
text(0, coef(lm(y.min~x))[1], labels="imputation upper bound", col="green", cex=.75)

# This is the IPW adjusted linear approximation:
logitfit <- glm(r~x, family="binomial")
p.hat <- predict(logitfit, type="response")
pweight <- 1/p.hat
points(x,y, col="purple", cex=sqrt(pweight))
abline(lm(y~x, w=pweight), col="purple")
text(0, coef(lm(y~x, w=pweight))[1], labels="IPW", col="purple", cex=.75)

# IPW sensitivity analysis:
# Get residualized y:

olsfit <- lm(y~x)
resids <- y-predict(olsfit, newdata=data.frame(x))
resids.xscaled <- sd(x)*resids/sd(resids, na.rm=T)

# This is IPW adjusted linear approximation but assuming
# that ignorability is violated to an extent that residualized
# y has as much explanatory power as x in the positive direction:

p.hat.hi <- invlogit(predict(logitfit, type="link")+.25*resids.xscaled)
pweight.hi <- 1/p.hat.hi
points(x,y, col="purple", cex=sqrt(pweight.hi))
abline(lm(y~x, w=pweight.hi), col="orange")
text(0, coef(lm(y~x, w=pweight.hi))[1], labels="IPW low", col="orange", cex=.75)


# This is IPW adjusted linear approximation but assuming
# that ignorability is violated to an extent that residualized
# y has as much explanatory power as x in the negative direction:

p.hat.lo <- invlogit(predict(logitfit, type="link")-.25*resids.xscaled)
pweight.lo <- 1/p.hat.lo
points(x,y, col="purple", cex=sqrt(pweight.lo))
abline(lm(y~x, w=pweight.lo), col="pink")
text(0, coef(lm(y~x, w=pweight.lo))[1], labels="IPW high", col="pink", cex=.75)


# Now non-ignorable missingness

# Missingness
invlogit <- function(x){(1+exp(-x))^(-1)}
p.r <- 1-invlogit(-2+.25*(y.all-mean(y.all)))
r <- rbinom(n,1,p.r)
y <- y.all
y[r==0] <- NA

# Suppose the goal is to use a local linear approximation
# to estimate the value at the cutpoint (=0).

# Go through each of the below step by step and look at what
# happens on the graph.

# This gives the linear approximation if we had no missingness:

plot(x,y.all, pch=19, col="gray", xlim=c(-2,max(x)), main="Non-ignorable case")
abline(v=0, lty="dashed")
abline(lm(y.all~x), col="gray")
text(0, coef(lm(y.all~x))[1], labels="all data", col="gray", cex=.75)

# This is the linear approx with the complete cases:
points(x,y, col="red", pch=19)
abline(lm(y~x), col="red")
text(0, coef(lm(y~x))[1], labels="compl. cases", col="red", cex=.75)

# These are worst case bounds:
y.max <- y
y.max[r==0] <- max(y, na.rm=T)
points(x, y.max, col="blue", cex=.5)
abline(lm(y.max~x), col="blue")
text(0, coef(lm(y.max~x))[1], labels="imputation lower bound", col="blue", cex=.75)

y.min <- y
y.min[r==0] <- min(y, na.rm=T)
points(x, y.min, col="green", cex=.5)
abline(lm(y.min~x), col="green")
text(0, coef(lm(y.min~x))[1], labels="imputation upper bound", col="green", cex=.75)

# This is the IPW adjusted linear approximation:
logitfit <- glm(r~x, family="binomial")
p.hat <- predict(logitfit, type="response")
pweight <- 1/p.hat
points(x,y, col="purple", cex=sqrt(pweight))
abline(lm(y~x, w=pweight), col="purple")
text(0, coef(lm(y~x, w=pweight))[1], labels="IPW", col="purple", cex=.75)

# IPW sensitivity analysis:
# Get residualized y:

olsfit <- lm(y~x)
resids <- y-predict(olsfit, newdata=data.frame(x))
resids.xscaled <- sd(x)*resids/sd(resids, na.rm=T)

# This is IPW adjusted linear approximation but assuming
# that ignorability is violated to an extent that residualized
# y has as much explanatory power as x in the positive direction:

p.hat.hi <- invlogit(predict(logitfit, type="link")+.25*resids.xscaled)
pweight.hi <- 1/p.hat.hi
points(x,y, col="purple", cex=sqrt(pweight.hi))
abline(lm(y~x, w=pweight.hi), col="orange")
text(0, coef(lm(y~x, w=pweight.hi))[1], labels="IPW low", col="orange", cex=.75)


# This is IPW adjusted linear approximation but assuming
# that ignorability is violated to an extent that residualized
# y has as much explanatory power as x in the negative direction:

p.hat.lo <- invlogit(predict(logitfit, type="link")-.25*resids.xscaled)
pweight.lo <- 1/p.hat.lo
points(x,y, col="purple", cex=sqrt(pweight.lo))
abline(lm(y~x, w=pweight.lo), col="pink")
text(0, coef(lm(y~x, w=pweight.lo))[1], labels="IPW high", col="pink", cex=.75)