# Clear your workspace: rm(list=ls()) # Sample size N <- 10 index <- 1:N # Treated proportion and number treated pM <- 1/2 M <- floor(N*pM) # Arbitrary covariates (in this case there are two) X1 <- as.matrix(rnorm(N)) X2 <- as.matrix(rnorm(N)) X <- cbind(X1,X2) # Let's randomly assign the treatment: Z <- as.matrix(rep(0,N)) Z[sample(index, M)] <- 1 # First, let's reproduce the RITools chi-square # approximation: d.Z.X <- ((t(X)%*%Z)/M)-(t(X)%*%(1-Z)/(N-M)) Cov.X <- (N/(M^2))*cov(cbind(X1,X2)) chi.sq <- t(d.Z.X)%*%solve(Cov.X)%*%d.Z.X rank.Cov.X <- ifelse(det(Cov.X)!=0,ncol(Cov.X),-Inf) p.approx <- pchisq(chi.sq,rank.Cov.X,lower.tail=F) out <- matrix(c(chi.sq,rank.Cov.X,p.approx), nrow=1) colnames(out) <- c("chi.sq","df","p.val") mydata <- data.frame(Z=Z,X1=X1,X2=X2) xbal <- xBalance(Z~X1+X2, data=mydata, report="all") print(out) print(xbal[[2]]) # Now let's perform the exact test. First construct # the full set of assignment possibilities: K <- choose(N,M) Omega <- matrix(0,nrow=N, ncol=K) Z.poss <- combn(index, M) for(i in 1:K){ Omega[Z.poss[,i],i] <- 1 } # Now compute the X^2 statistic for each of these # possibilities: chi.sq.i <- rep(NA,K) for(i in 1:K){ Z.i <- as.matrix(Omega[,i]) d.Z.X.i <- ((t(X)%*%Z.i)/M)-(t(X)%*%(1-Z.i)/(N-M)) chi.sq.i[i] <- t(d.Z.X.i)%*%solve(Cov.X)%*%d.Z.X.i } # Obtain the exact p-value: p.exact <- sum(chi.sq.i>=as.numeric(chi.sq))/K # Compare the approximation to the exact: out2 <- cbind(p.approx,p.exact) colnames(out2) <- c("p.approx","p.exact") print(out2) n.sims <- 500 # Normal covariates: p.approx.sim <- p.exact.sim <- chi.sq.sim <- rep(NA,n.sims) for(j in 1:n.sims){ N <- 10 index <- 1:N pM <- 1/2 M <- floor(N*pM) X1 <- as.matrix(rnorm(N)) X2 <- as.matrix(rnorm(N)) X <- cbind(X1,X2) Z <- as.matrix(rep(0,N)) Z[sample(index, M)] <- 1 d.Z.X <- ((t(X)%*%Z)/M)-(t(X)%*%(1-Z)/(N-M)) Cov.X <- (N/(M^2))*cov(cbind(X1,X2)) chi.sq <- t(d.Z.X)%*%solve(Cov.X)%*%d.Z.X rank.Cov.X <- ifelse(det(Cov.X)!=0,ncol(Cov.X),-Inf) p.approx.sim[j] <- pchisq(chi.sq,rank.Cov.X,lower.tail=F) K <- choose(N,M) Omega <- matrix(0,nrow=N, ncol=K) Z.poss <- combn(index, M) for(i in 1:K){ Omega[Z.poss[,i],i] <- 1 } chi.sq.i <- rep(NA,K) for(i in 1:K){ Z.i <- as.matrix(Omega[,i]) d.Z.X.i <- ((t(X)%*%Z.i)/M)-(t(X)%*%(1-Z.i)/(N-M)) chi.sq.i[i] <- t(d.Z.X.i)%*%solve(Cov.X)%*%d.Z.X.i } p.exact.sim[j] <- sum(chi.sq.i>=as.numeric(chi.sq))/K chi.sq.sim[j] <- chi.sq } # Skew covariates: p.approx.sim2 <- p.exact.sim2 <- chi.sq.sim2 <- rep(NA,n.sims) for(j in 1:n.sims){ N <- 10 index <- 1:N pM <- 1/2 M <- floor(N*pM) X1 <- as.matrix(rgamma(N,2,2)) X2 <- as.matrix(exp(rnorm(N))) X <- cbind(X1,X2) Z <- as.matrix(rep(0,N)) Z[sample(index, M)] <- 1 d.Z.X <- ((t(X)%*%Z)/M)-(t(X)%*%(1-Z)/(N-M)) Cov.X <- (N/(M^2))*cov(cbind(X1,X2)) chi.sq <- t(d.Z.X)%*%solve(Cov.X)%*%d.Z.X rank.Cov.X <- ifelse(det(Cov.X)!=0,ncol(Cov.X),-Inf) p.approx.sim2[j] <- pchisq(chi.sq,rank.Cov.X,lower.tail=F) K <- choose(N,M) Omega <- matrix(0,nrow=N, ncol=K) Z.poss <- combn(index, M) for(i in 1:K){ Omega[Z.poss[,i],i] <- 1 } chi.sq.i <- rep(NA,K) for(i in 1:K){ Z.i <- as.matrix(Omega[,i]) d.Z.X.i <- ((t(X)%*%Z.i)/M)-(t(X)%*%(1-Z.i)/(N-M)) chi.sq.i[i] <- t(d.Z.X.i)%*%solve(Cov.X)%*%d.Z.X.i } p.exact.sim2[j] <- sum(chi.sq.i>=as.numeric(chi.sq))/K chi.sq.sim2[j] <- chi.sq } # Binary covariates p.approx.sim3 <- p.exact.sim3 <- chi.sq.sim3 <- rep(NA,n.sims) for(j in 1:n.sims){ N <- 10 index <- 1:N pM <- 1/2 M <- floor(N*pM) X1 <- as.matrix(ifelse(index%in%sample(index, 3), 1,0)) X2 <- as.matrix(ifelse(index%in%sample(index, 5), 1,0)) X <- cbind(X1,X2) Z <- as.matrix(rep(0,N)) Z[sample(index, M)] <- 1 d.Z.X <- ((t(X)%*%Z)/M)-(t(X)%*%(1-Z)/(N-M)) Cov.X <- (N/(M^2))*cov(cbind(X1,X2)) chi.sq <- t(d.Z.X)%*%solve(Cov.X)%*%d.Z.X rank.Cov.X <- ifelse(det(Cov.X)!=0,ncol(Cov.X),-Inf) p.approx.sim3[j] <- pchisq(chi.sq,rank.Cov.X,lower.tail=F) K <- choose(N,M) Omega <- matrix(0,nrow=N, ncol=K) Z.poss <- combn(index, M) for(i in 1:K){ Omega[Z.poss[,i],i] <- 1 } chi.sq.i <- rep(NA,K) for(i in 1:K){ Z.i <- as.matrix(Omega[,i]) d.Z.X.i <- ((t(X)%*%Z.i)/M)-(t(X)%*%(1-Z.i)/(N-M)) chi.sq.i[i] <- t(d.Z.X.i)%*%solve(Cov.X)%*%d.Z.X.i } p.exact.sim3[j] <- sum(chi.sq.i>=as.numeric(chi.sq))/K chi.sq.sim3[j] <- chi.sq } par(pty="s", mfrow=c(3,3)) hist(p.approx.sim, col="gray", border=F, main="1. Normal covariates", xlab="approx gray, exact black", xlim=c(0,1)) hist(p.exact.sim, add=T, col=F) plot(p.exact.sim,p.approx.sim, main="Exact vs. Approx.", col="gray", xlim=c(0,1), ylim=c(0,1)) abline(a=0, b=1) plot(density(rchisq(n.sims, rank.Cov.X)), type="l",lty="dashed", main="chi.sq against asymptotic dist.") points(density(chi.sq.sim),type="l") hist(p.approx.sim2, col="gray", border=F, main="2. Gamma and log-norm. covs.", xlab="approx gray, exact black", xlim=c(0,1)) hist(p.exact.sim2, add=T, col=F) plot(p.exact.sim2,p.approx.sim2, main="Exact vs. Approx.", col="gray", xlim=c(0,1), ylim=c(0,1)) abline(a=0, b=1) plot(density(rchisq(n.sims, rank.Cov.X)), type="l",lty="dashed", main="chi.sq against asymptotic dist.") points(density(chi.sq.sim2),type="l") hist(p.approx.sim3, col="gray", border=F, main="3. Binary covs.", xlab="approx gray, exact black", xlim=c(0,1)) hist(p.exact.sim3, add=T, col=F) plot(jitter(p.exact.sim3, factor=5),jitter(p.approx.sim3, factor=5), main="Exact vs. Approx.", col="gray", xlim=c(0,1), ylim=c(0,1)) abline(a=0, b=1) plot(density(rchisq(n.sims, rank.Cov.X)), type="l",lty="dashed", main="chi.sq against asymptotic dist.") points(density(chi.sq.sim3),type="l")