Code to create Figures 5 and 6 (and Appendix Figures 12, 13,
14)
library(MASS)
library(mixtools)
library(igraph)
library(iGraphMatch)
library(class)
library(factoextra)
library(tree)
library(randomForest)
library(caret)
library(e1071)
library(mclust)
library(irlba)
library(ggplot2)
library(tidyverse)
library(socviz)
library(Matrix)
library(flexclust)
derangement <- function( n ){
ch <- 0
while( ch == 0 ){
x <- sample(n)
if( sum( x == 1:n) == 0){
ch <- 1 }
}
return(x)
}
ase <- function(A, dim) {
require(irlba)
#diag(A) <- rowSums(A) / (nrow(A)-1) # diagaug line
if(nrow(A) >= 1000){
A.svd <- irlba(A, nu = dim, nv = dim)
A.svd.values <- A.svd$d[1:dim]
A.svd.vectors <- A.svd$v[,1:dim,drop=F]
if(dim == 1)
A.coords <- sqrt(A.svd.values) * A.svd.vectors
else
A.coords <- A.svd.vectors %*% diag(sqrt(A.svd.values))
} else{
A.svd <- svd(A)
A.svd.values <- A.svd$d[1:dim]
if(dim == 1)
A.coords <- matrix(A.svd$v[,1,drop=F] * sqrt(A.svd$d[1]),ncol=dim)
else
A.coords <- A.svd$v[,1:dim] %*% diag(sqrt(A.svd$d[1:dim]))
}
return(list(X=A.coords,D=A.svd.values))
}
make_Omni<- function(A,w)
{
n<-dim(A[[1]])[1]
m<-length(A)
ind<-list()
for(i in 1:m){
ind[[i]]<-c( (n*(i-1)+1): (n*i))
}
O<-matrix(0,n*m,n*m)
for(i in 1:m){
for(j in 1:m){
O[ ind[[i]] , ind[[j]] ]<-as.matrix((w[i]*A[[i]]+w[j]*A[[j]])/(w[i]+w[j]))
}}
for(i in 1:m){O[ ind[[i]] , ind[[i]] ]<-as.matrix(A[[i]])}
return(O)
}
procrustes<-function(X,Y, type = "I"){
if(type == "C"){
X <- X/norm(X, type = "F")*sqrt(nrow(X))
Y <- Y/norm(Y, type = "F")*sqrt(nrow(Y))
}
if(type == "D"){
tX <- rowSums(X^2)
tX[tX <= 1e-15] <- 1
tY <- rowSums(Y^2)
tY[tY <= 1e-15] <- 1
X <- X/sqrt(tX)
Y <- Y/sqrt(tY)
}
tmp <- t(X) %*% Y
tmp.svd <- svd(tmp)
W <- tmp.svd$u %*% t(tmp.svd$v)
return(list(error = norm(X%*%W - Y, type = "F"), W = W))
}
buildOmni <- function(Alist)
{
# elements of A are assumed symmetric!
require(Matrix)
m <- length(Alist)
nvec <- sapply(Alist, nrow)
nsum <- c(0, cumsum(nvec))
omni <- as.matrix(bdiag(Alist))
for (i in 1:(m-1)) {
irng <- (nsum[i]+1):nsum[i+1]
for (j in (i+1):m) {
jrng <- (nsum[j]+1):nsum[j+1]
omni[irng,jrng] <- (as.matrix(Alist[[i]]) + as.matrix(Alist[[j]]))
}
}
omni <- (omni + t(omni)) / 2
return(omni)
}
getElbows <- function(dat, n = 3, threshold = FALSE, plot = F) {
if (is.matrix(dat))
d <- sort(apply(dat,2,sd), decreasing=T)
else
d <- sort(dat,decreasing=TRUE)
if (!is.logical(threshold))
d <- d[d > threshold]
p <- length(d)
if (p == 0)
stop(paste("d must have elements that are larger than the threshold ",
threshold), "!", sep="")
lq <- rep(0.0, p) # log likelihood, function of q
for (q in 1:p) {
mu1 <- mean(d[1:q])
mu2 <- mean(d[-(1:q)]) # = NaN when q = p
sigma2 <- (sum((d[1:q] - mu1)^2) + sum((d[-(1:q)] - mu2)^2)) / (p - 1 - (q < p))
lq[q] <- sum( dnorm( d[1:q ], mu1, sqrt(sigma2), log=TRUE) ) +
sum( dnorm(d[-(1:q)], mu2, sqrt(sigma2), log=TRUE) )
}
q <- which.max(lq)
if (n > 1 && q < p) {
q <- c(q, q + getElbows(d[(q+1):p], n=n-1, plot=FALSE))
}
if (plot==TRUE) {
if (is.matrix(dat)) {
sdv <- d # apply(dat,2,sd)
plot(sdv,type="b",xlab="dim",ylab="stdev")
points(q,sdv[q],col=2,pch=19)
} else {
plot(dat, type="b")
points(q,dat[q],col=2,pch=19)
}
}
return(q)
}
shuff_ord <- function(x,vec1, vec2)
#function that swaps * number of vertices from block 1 and 2
{
temp <- vec2
x[vec2] <- vec1
x[vec1] <- temp
return(x)
}
TS_O <- function(A,B,d){
O <- buildOmni(list(A,B))
dd <- dim(A)[1]
X <- ase(O, d)$X
return(norm(X[1:dd,]-X[(dd+1):(2*dd),], "F")^2)
}
TS_P<-function(A,B,d){
x <- ase(A,d)$X
y <- ase(B,d)$X
return(norm(x%*%t(x)-y%*%t(y), "F")^2)
}
TS_A<-function(A,B){
return(norm(A-B, "F")^2)
}
TS_S<-function(A,B,d){
x <- ase(A,d)$X
y <- ase(B,d)$X
w <- procrustes(x,y)
return(w$error^2)
}
# similar(ish) experimental setup as Levin et al. figure 11
n <- 100 #total number of vertices in each graph
nf <- 5
kshuff <- c(10, 20, 30, 50, 75, 100) #number shuffled in null
lshuff <- c(5, 10, 15, 20, 25, 30,
40, 50, 75, 100) #number shuffled in alternative
nMC<-100
nMC2<-50
set.seed(1234)
P_Adj <- list()
P_Phat <- list()
P_Semipar <- list()
P_Omni <- list()
kk <- length(kshuff)
ll <- length(lshuff)
for(i in 1:5){
P_Adj[[i]] <- matrix(0,kk,ll)
P_Phat[[i]] <- matrix(0,kk,ll)
P_Semipar[[i]] <- matrix(0,kk,ll)
P_Omni[[i]] <- matrix(0,kk,ll)
}
for(yyy in 1:nMC2){
X <- sample_dirichlet(n,c(1,1,1))
X[,1:nf] <- c(0.8,0.1,0.1)
Y <- X
oo <- sample(n)
ShuffL<-list()
for(i in 1:ll){
ShuffL[[i]]<-derangement(lshuff[i])
}
ShuffK<-list()
ShuffK[[1]]<-ShuffL[[2]]
ShuffK[[2]]<-ShuffL[[4]]
ShuffK[[3]]<-ShuffL[[6]]
ShuffK[[4]]<-ShuffL[[8]]
ShuffK[[5]]<-ShuffL[[9]]
ShuffK[[6]]<-ShuffL[[10]]
#### get the critical values
null_Adj <- matrix(NA, nMC, length(kshuff))
null_Phat <- matrix(NA, nMC, length(kshuff))
null_Semipar <- matrix(NA, nMC, length(kshuff))
null_Omni <- matrix(NA, nMC, length(kshuff))
for(i in 1:nMC){
# make the graphs
g1 <- sample_dot_product(X)
g2 <- sample_dot_product(X)
A <- g1[]
B <- g2[]
for(ii in 1:kk)
{
ok <- ShuffK[[ii]]
ooo <- oo
ek<-length(ok)
ooo[1:ek] <- ooo[ok]
AA <- A[oo,oo]
BB <- B[ooo,ooo]
null_Adj[i,ii] <- TS_A(AA,BB)
null_Phat[i,ii] <- TS_P(AA,BB,3)
null_Semipar[i,ii] <- TS_S(AA,BB,3)
null_Omni[i,ii] <- TS_O(AA,BB,3)
}
}
c95<-function(x) quantile(x,0.95,na.rm=T)
crit_Adj<-apply(null_Adj,2 , c95)
crit_Phat<-apply(null_Phat,2 , c95)
crit_Semipar<-apply(null_Semipar,2 , c95)
crit_Omni<-apply(null_Omni,2 , c95)
drft <- c(0,0.25,0.5,0.75,1)
# alternative sampling
alt_Adj <- list()
alt_Phat <- list()
alt_Semipar <- list()
alt_Omni <- list()
for(i in 1:5){
alt_Adj[[i]] <- matrix(NA, nMC, length(lshuff))
alt_Phat[[i]] <- matrix(NA, nMC, length(lshuff))
alt_Semipar[[i]] <- matrix(NA, nMC, length(lshuff))
alt_Omni[[i]] <- matrix(NA, nMC, length(lshuff))
}
for(i in 1:5){
eps<-drft[i]
Y[,1:nf] <- (1-eps)*c(0.8,0.1,0.1)+eps*c(0.1,0.1,0.8)
for(j in 1:nMC){
# make the graphs
A<-sample_dot_product(X)
B<-sample_dot_product(Y)
for(jj in 1:ll){
ol <- ShuffL[[jj]]
el<-length(ol)
ooo <- oo
ooo[1:el] <- ooo[ol]
AA <- A[oo,oo]
BB <- B[ooo,ooo]
alt_Adj[[i]][j,jj] <- TS_A(AA,BB)
alt_Phat[[i]][j,jj] <- TS_P(AA,BB,3)
alt_Semipar[[i]][j,jj] <- TS_S(AA,BB,3)
alt_Omni[[i]][j,jj] <- TS_O(AA,BB,3)
}
}
}
#######################
### Computing power ###
#######################
Pow_Adj <- list()
Pow_Phat <- list()
Pow_Semipar <- list()
Pow_Omni <- list()
for(i in 1:5){
Pow_Adj[[i]] <- matrix(NA,kk,ll)
Pow_Phat[[i]] <- matrix(NA,kk,ll)
Pow_Semipar[[i]] <- matrix(NA,kk,ll)
Pow_Omni[[i]] <- matrix(NA,kk,ll)
}
for(i in 1:5){
for(j in 1:kk){
kl <- kshuff[j]
lll<-sum(lshuff<=kl)
for(jj in 1:lll){
Pow_Adj[[i]][j,jj] <- sum(alt_Adj[[i]][,jj]
>crit_Adj[j])/nMC
Pow_Phat[[i]][j,jj] <- sum(alt_Phat[[i]][,jj]
>crit_Phat[j])/nMC
Pow_Semipar[[i]][j,jj] <- sum(alt_Semipar[[i]][,jj]
>crit_Semipar[j])/nMC
Pow_Omni[[i]][j,jj] <- sum(alt_Omni[[i]][,jj]
>crit_Omni[j])/nMC
}
}
}
for(i in 1:5){
P_Adj[[i]] <- P_Adj[[i]] +Pow_Adj[[i]]
P_Phat[[i]] <- P_Phat[[i]]+Pow_Phat[[i]]
P_Semipar[[i]] <- P_Semipar[[i]]+Pow_Semipar[[i]]
P_Omni[[i]] <- P_Omni[[i]]+Pow_Omni[[i]]
}
}
for(i in 1:5){
P_Adj[[i]] <- P_Adj[[i]]/nMC2
P_Phat[[i]] <- P_Phat[[i]]/nMC2
P_Semipar[[i]] <- P_Semipar[[i]]/nMC2
P_Omni[[i]] <- P_Omni[[i]]/nMC2
}
#=================================================================================================================================
#------------------------Plotting Power-----------------------------------
#================================================================================================================================
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(viridis)
## Loading required package: viridisLite
library(colorspace)
library(extrafont)
## Registering fonts with R
this <- rev(qualitative_hcl(5, "Dark 3"))
Plot_Adj <- list()
Plot_Phat <- list()
Plot_Semipar <- list()
Plot_Omni <- list()
MM<-rep(0,length(drft))
for(j in 1:5){
MM[j]<-max(
P_Adj[[j]],
P_Phat[[j]],
P_Semipar[[j]],
P_Omni[[j]],
na.rm=T
)
}
MM<-MM+2*sqrt(MM*(1-MM)/nMC2)+0.05
colors <- (qualitative_hcl(6, "Dark 3"))
for(j in 1:5){
dat <- as.data.frame(t(rbind(lshuff,P_Adj[[j]])))
colnames(dat) <- c("lshuff", kshuff)
dat <- melt(dat, id.vars = "lshuff")
dat$Null_Shuff <- factor(dat$variable, levels = unique(dat$variable))
Plot_Adj[[j]] <- ggplot(dat, aes(x=lshuff, y=value, color=variable)) +
geom_line(aes(linetype=variable),size=1.05) +
geom_errorbar(aes(ymin=value-2*sqrt(value*(1-value)/nMC2), ymax=value+2*sqrt(value*(1-value)/nMC2)),width=10)+
geom_point(aes(shape=variable),size=4)+ ylim(-0.025,MM[j]) +
labs(family="serif", x="Number of Vertices Shuffled in H1", y="Power",
color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')+
ggtitle(paste("Power loss in Adjacency test\n n=",n," lambda=", drft[j]))+
theme_classic() +
theme(plot.title = element_text(family="serif",
face = "bold", size = 15,
hjust=0.5),
panel.background = element_rect(fill = "#FFFFFF"),
legend.title = element_text(family="serif",
face="bold", color = "#000000",
size = 12),
legend.background = element_rect(fill = "#FFFFFF"),
axis.title = element_text(family = "serif", size = 15,
colour = "#000000"),
plot.background = element_rect(fill = "#FFFFFF"),
axis.line = element_line(colour = "#000000", size = 1),
legend.text = element_text( family="serif",
color = "#000000", size=12))+ scale_colour_manual(values=colors)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
for(j in 1:5){
dat <- as.data.frame(t(rbind(lshuff,P_Phat[[j]])))
colnames(dat) <- c("lshuff", kshuff)
dat <- melt(dat, id.vars = "lshuff")
dat$Null_Shuff <- factor(dat$variable, levels = unique(dat$variable))
Plot_Phat[[j]] <- ggplot(dat, aes(x=lshuff, y=value, color=variable)) +
geom_line(aes(linetype=variable),size=1.05) +
geom_errorbar(aes(ymin=value-2*sqrt(value*(1-value)/nMC2), ymax=value+2*sqrt(value*(1-value)/nMC2)),width=10)+
geom_point(aes(shape=variable),size=4)+ ylim(-0.025,MM[j]) +
labs(family="serif", x="Number of Vertices Shuffled in H1", y="Power",
color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')+
ggtitle(paste("Power loss in Phat test\n n=",n," lambda=", drft[j]))+
theme_classic() +
theme(plot.title = element_text(family="serif",
face = "bold", size = 15,
hjust=0.5),
panel.background = element_rect(fill = "#FFFFFF"),
legend.title = element_text(family="serif",
face="bold", color = "#000000",
size = 12),
legend.background = element_rect(fill = "#FFFFFF"),
axis.title = element_text(family = "serif", size = 15,
colour = "#000000"),
plot.background = element_rect(fill = "#FFFFFF"),
axis.line = element_line(colour = "#000000", size = 1),
legend.text = element_text( family="serif",
color = "#000000", size=12)) + scale_colour_manual(values=colors)
}
for(j in 1:5){
dat <- as.data.frame(t(rbind(lshuff,P_Semipar[[j]])))
colnames(dat) <- c("lshuff", kshuff)
dat <- melt(dat, id.vars = "lshuff")
dat$Null_Shuff <- factor(dat$variable, levels = unique(dat$variable))
Plot_Semipar[[j]] <- ggplot(dat, aes(x=lshuff, y=value, color=variable)) +
geom_line(aes(linetype=variable),size=1.05) +
geom_point(aes(shape=variable),size=4)+ ylim(-0.025,MM[j]) +
geom_errorbar(aes(ymin=value-2*sqrt(value*(1-value)/nMC2), ymax=value+2*sqrt(value*(1-value)/nMC2)),width=10)+
labs(family="serif", x="Number of Vertices Shuffled in H1", y="Power",
color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')+
ggtitle(paste("Power loss in Semipar. test\n n=",n," lambda=", drft[j]))+
theme_classic() +
theme(plot.title = element_text(family="serif",
face = "bold", size = 15,
hjust=0.5),
panel.background = element_rect(fill = "#FFFFFF"),
legend.title = element_text(family="serif",
face="bold", color = "#000000",
size = 12),
legend.background = element_rect(fill = "#FFFFFF"),
axis.title = element_text(family = "serif", size = 15,
colour = "#000000"),
plot.background = element_rect(fill = "#FFFFFF"),
axis.line = element_line(colour = "#000000", size = 1),
legend.text = element_text( family="serif",
color = "#000000", size=12)) + scale_colour_manual(values=colors)
}
for(j in 1:5){
dat <- as.data.frame(t(rbind(lshuff,P_Omni[[j]])))
colnames(dat) <- c("lshuff", kshuff)
dat <- melt(dat, id.vars = "lshuff")
dat$Null_Shuff <- factor(dat$variable, levels = unique(dat$variable))
Plot_Omni[[j]] <- ggplot(dat, aes(x=lshuff, y=value, color=variable)) +
geom_line(aes(linetype=variable),size=1.05) +
geom_errorbar(aes(ymin=value-2*sqrt(value*(1-value)/nMC2), ymax=value+2*sqrt(value*(1-value)/nMC2)),width=10)+
geom_point(aes(shape=variable),size=4)+ ylim(-0.025,MM[j]) +
labs(family="serif", x="Number of Vertices Shuffled in H1", y="Power",
color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')+
ggtitle(paste("Power loss in Omni test\n n=",n," lambda=", drft[j]))+
theme_classic() +
theme(plot.title = element_text(family="serif",
face = "bold", size = 15,
hjust=0.5),
panel.background = element_rect(fill = "#FFFFFF"),
legend.title = element_text(family="serif",
face="bold", color = "#000000",
size = 12),
legend.background = element_rect(fill = "#FFFFFF"),
axis.title = element_text(family = "serif", size = 15,
colour = "#000000"),
plot.background = element_rect(fill = "#FFFFFF"),
axis.line = element_line(colour = "#000000", size = 1),
legend.text = element_text( family="serif",
color = "#000000", size=12))+ scale_colour_manual(values=colors)
}
i=1
library(patchwork)
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:MASS':
##
## area
(Plot_Adj[[i]] + Plot_Phat[[i]] + Plot_Semipar[[i]] +theme(legend.position = "none")) + Plot_Omni[[i]] + plot_layout(ncol = 2, guides ="collect")
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
i=2
library(patchwork)
(Plot_Adj[[i]] + Plot_Phat[[i]] + Plot_Semipar[[i]] +theme(legend.position = "none")) + Plot_Omni[[i]] + plot_layout(ncol = 2, guides ="collect")
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).
## Warning: Removed 21 rows containing missing values (`geom_line()`).
## Warning: Removed 21 rows containing missing values (`geom_point()`).