Social Media Example
library(MASS)
library(mixtools)
library(igraph)
library(iGraphMatch)
library(class)
library(factoextra)
library(tree)
library(randomForest)
library(caret)
library(e1071)
library(mclust)
library(irlba)
library(ggplot2)
library(tidyverse)
library(socviz)
library(Matrix)
library(flexclust)
ase <- function(A, dim) {
require(irlba)
#diag(A) <- rowSums(A) / (nrow(A)-1) # diagaug line
if(nrow(A) >= 400){
A.svd <- irlba(A, nu = dim, nv = dim)
A.svd.values <- A.svd$d[1:dim]
A.svd.vectors <- A.svd$v[,1:dim,drop=F]
if(dim == 1)
A.coords <- sqrt(A.svd.values) * A.svd.vectors
else
A.coords <- A.svd.vectors %*% diag(sqrt(A.svd.values))
} else{
A.svd <- svd(A)
A.svd.values <- A.svd$d[1:dim]
if(dim == 1)
A.coords <- matrix(A.svd$v[,1,drop=F] * sqrt(A.svd$d[1]),ncol=dim)
else
A.coords <- A.svd$v[,1:dim] %*% diag(sqrt(A.svd$d[1:dim]))
}
return(list(X=A.coords,D=A.svd.values))
}
make_Omni<- function(A,w)
{
n<-dim(A[[1]])[1]
m<-length(A)
ind<-list()
for(i in 1:m){
ind[[i]]<-c( (n*(i-1)+1): (n*i))
}
O<-matrix(0,n*m,n*m)
for(i in 1:m){
for(j in 1:m){
O[ ind[[i]] , ind[[j]] ]<-as.matrix((w[i]*A[[i]]+w[j]*A[[j]])/(w[i]+w[j]))
}}
for(i in 1:m){O[ ind[[i]] , ind[[i]] ]<-as.matrix(A[[i]])}
return(O)
}
procrustes<-function(X,Y, type = "I"){
if(type == "C"){
X <- X/norm(X, type = "F")*sqrt(nrow(X))
Y <- Y/norm(Y, type = "F")*sqrt(nrow(Y))
}
if(type == "D"){
tX <- rowSums(X^2)
tX[tX <= 1e-15] <- 1
tY <- rowSums(Y^2)
tY[tY <= 1e-15] <- 1
X <- X/sqrt(tX)
Y <- Y/sqrt(tY)
}
tmp <- t(X) %*% Y
tmp.svd <- svd(tmp)
W <- tmp.svd$u %*% t(tmp.svd$v)
return(list(error = norm(X%*%W - Y, type = "F"), W = W))
}
buildOmni <- function(Alist)
{
# elements of A are assumed symmetric!
require(Matrix)
m <- length(Alist)
nvec <- sapply(Alist, nrow)
nsum <- c(0, cumsum(nvec))
omni <- as.matrix(bdiag(Alist))
for (i in 1:(m-1)) {
irng <- (nsum[i]+1):nsum[i+1]
for (j in (i+1):m) {
jrng <- (nsum[j]+1):nsum[j+1]
omni[irng,jrng] <- (as.matrix(Alist[[i]]) + as.matrix(Alist[[j]]))
}
}
omni <- (omni + t(omni)) / 2
return(omni)
}
getElbows <- function(dat, n = 3, threshold = FALSE, plot = F) {
if (is.matrix(dat))
d <- sort(apply(dat,2,sd), decreasing=T)
else
d <- sort(dat,decreasing=TRUE)
if (!is.logical(threshold))
d <- d[d > threshold]
p <- length(d)
if (p == 0)
stop(paste("d must have elements that are larger than the threshold ",
threshold), "!", sep="")
lq <- rep(0.0, p) # log likelihood, function of q
for (q in 1:p) {
mu1 <- mean(d[1:q])
mu2 <- mean(d[-(1:q)]) # = NaN when q = p
sigma2 <- (sum((d[1:q] - mu1)^2) + sum((d[-(1:q)] - mu2)^2)) / (p - 1 - (q < p))
lq[q] <- sum( dnorm( d[1:q ], mu1, sqrt(sigma2), log=TRUE) ) +
sum( dnorm(d[-(1:q)], mu2, sqrt(sigma2), log=TRUE) )
}
q <- which.max(lq)
if (n > 1 && q < p) {
q <- c(q, q + getElbows(d[(q+1):p], n=n-1, plot=FALSE))
}
if (plot==TRUE) {
if (is.matrix(dat)) {
sdv <- d # apply(dat,2,sd)
plot(sdv,type="b",xlab="dim",ylab="stdev")
points(q,sdv[q],col=2,pch=19)
} else {
plot(dat, type="b")
points(q,dat[q],col=2,pch=19)
}
}
return(q)
}
set.seed(123)
TS2 <- function(A,B){
return(norm(A-B, "F")^2)
}
derange<-function(n){
# returns a derangement of 1:n
c=0
while( c==0 ){d<-sample(n);
if(sum(d==1:n)==0){c=1}}
return(d)
}
social <- read.csv("~/Documents/social.csv", header=FALSE)
ind_tw <- which(social[,3]=="tw")
ind_ff <- which(social[,3]=="ff")
ind_yt <- which(social[,3]=="yt")
social_tw <- social[ind_tw, c(1:2) ]
g_tw <- graph_from_edgelist(cbind(social_tw$V1,social_tw$V2) , directed=FALSE)
v_tw <- names(V(g_tw))
social_ff <- social[ind_ff, c(1:2) ]
g_ff <- graph_from_edgelist(cbind(social_ff$V1,social_ff$V2), directed=FALSE)
v_ff <- names(V(g_ff))
social_yt <- social[ind_yt, c(1:2) ]
g_yt <- graph_from_edgelist(cbind(social_yt$V1,social_yt$V2), directed=FALSE)
v_yt <- names(V(g_yt))
v_co <- intersect(intersect(v_tw, v_ff), v_yt)
n <- length(v_co)
ind1<-rep(0,n)
ind2<-rep(0,n)
ind3<-rep(0,n)
for(i in 1:n){
ind1[i] <- which(v_yt == v_co[i])
ind2[i] <- which(v_ff == v_co[i])
ind3[i] <- which(v_tw == v_co[i])
}
A <- g_yt[]
A <- A[ind1,ind1]
B <- g_ff[]
B <- B[ind2,ind2]
C <- g_tw[]
C <- C[ind3,ind3]
diag(A) <- 0
diag(B) <- 0
diag(C) <- 0
a<-1
b<-1
c<-1
while((a+b+c) > 0){
zA <- which( as.numeric(rowSums(A)) == 0)
zB <- which( as.numeric(rowSums(B)) == 0)
zC <- which( as.numeric(rowSums(C)) == 0)
zt <- union(union(zA, zB), zC)
a<- sum(rowSums(A[-zt,-zt])==0)
b<- sum(rowSums(B[-zt,-zt])==0)
c<- sum(rowSums(C[-zt,-zt])==0)
A <- A[-zt,-zt]
B <- B[-zt,-zt]
C <- C[-zt,-zt]}
ee<-irlba(A,30)
da1<-getElbows(ee$d)[2]
ee<-irlba(B,30)
da2<-getElbows(ee$d)[2]
ee<-irlba(C,30)
db1<-getElbows(ee$d)[2]
d <- max(da1, da2, db1)
X1 <- ase(B,d)$X
X2 <- ase(A,d)$X
Y1 <- ase(C,d)$X
nn<-dim(A)[1]
PPP2C <- matrix(0,6,6)
#PPP2a_un <- matrix(0,6,6)
#PPP2n_un <- matrix(0,6,6)
for(eee in 1:50){
print(eee)
perm<-sample(nn)
X1<-X1[perm,]
X2<-X2[perm,]
Y1<-Y1[perm,]
k <- c(25, 50, 100, 150, 200, 250)
PP <- list()
for(ii in 1:6){
if(k[ii] != 0){
pp <- derange(k[ii])
pp <- c(pp,(k[ii]+1):nn)
PP[[ii]] <- pp}else{PP[[ii]]<-c(1:nn)}
}
crit_ts2 <- rep(0,6)
for(iii in 1:6){
t2 <- rep(0,200)
for(j in 1:200){
# supress warnings of <0 and >1 entries in X1%*%t(X1)
suppressWarnings({ AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
XX1<-ase(AA1,d)$X
Phat1<-XX1%*%t(XX1)
# supress warnings of <0 and >1 entries in X1%*%t(X1)
suppressWarnings({AA2 <- sample_dot_product(t(X1),directed=FALSE)[]})
XX2<-ase(AA2,d)$X
Phat2<-XX2%*%t(XX2)
Phat2shuff<-Phat2[PP[[iii]], PP[[iii]]]
t2[j]<-TS2(Phat1,Phat2shuff)
#Mk<-c((k[iii]+1):nn)
#if(k[iii]!=0){
#unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
#t2unshuff[j]<-norm(as.matrix(Phat1 - Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B]), "F")^2}else{
# t2unshuff[j]<-t2[j]}
}
crit_ts2[iii]<-quantile(t2,0.95)
#crit_ts2unshuff[iii]<-quantile(t2unshuff,0.95)
}
alt_ts2C <- matrix(0,6,200)
for(iii in 1:6){
for(j in 1:200){
# supress warnings of <0 and >1 entries in X1%*%t(X1)
suppressWarnings({AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
XX1<-ase(AA1,d)$X
Phat1<-XX1%*%t(XX1)
# supress warnings of <0 and >1 entries in Y1%*%t(Y1)
suppressWarnings({BB1 <- sample_dot_product(t(Y1),directed=FALSE)[]})
YY1<-ase(BB1,d)$X
Phat3<-YY1%*%t(YY1)
Phat3shuff<-Phat3[PP[[iii]], PP[[iii]]]
alt_ts2C[iii,j]<-TS2(Phat1,Phat3shuff)
#if(k[iii]!=0){
#Mk<-c((k[iii]+1):nn)
#unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
#alt_ts2n_unshuff[iii,j]<-TS2(Phat1,Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B])
#unshuffperm <- gm(Phat1, Phat3shuff, seeds = Mk)
#alt_ts2a_unshuff[iii,j]<-TS2(Phat1,Phat3shuff[unshuffperm$corr_B,unshuffperm$corr_B])}else{
#alt_ts2n_unshuff[iii,j]<-alt_ts2n[iii,j]
#alt_ts2a_unshuff[iii,j]<-alt_ts2a[iii,j]}
}
}
Pow2C <- matrix(NA,6,6 )
#Pow2a_unshuff <- matrix(NA,6,6 )
#Pow2n_unshuff <- matrix(NA,6,6 )
for(i in 1:6){
for(j in 1:i){
Pow2C[i,j] <- sum( alt_ts2C[j,] > crit_ts2[i])/200
#Pow2a_unshuff[i,j] <- sum( alt_ts2a_unshuff[j,] > crit_ts2unshuff[i])/200
#Pow2n_unshuff[i,j] <- sum( alt_ts2n_unshuff[j,] > crit_ts2unshuff[i])/200
}
}
PPP2C <-PPP2C+Pow2C
#PPP2a_un <-PPP2a_un+Pow2a_unshuff
#PPP2n_un <-PPP2n_un+Pow2n_unshuff
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
PPP2B <- matrix(0,6,6)
#PPP2a_un <- matrix(0,6,6)
#PPP2n_un <- matrix(0,6,6)
for(eee in 1:50){
print(eee)
perm<-sample(nn)
X1<-X1[perm,]
X2<-X2[perm,]
Y1<-Y1[perm,]
k <- c(5, 10, 15, 20, 25, 50)
PP <- list()
for(ii in 1:6){
if(k[ii] != 0){
pp <- derange(k[ii])
pp <- c(pp,(k[ii]+1):nn)
PP[[ii]] <- pp}else{PP[[ii]]<-c(1:nn)}
}
crit_ts2 <- rep(0,6)
for(iii in 1:6){
t2 <- rep(0,200)
for(j in 1:200){
# supress warnings of <0 and >1 entries in X1%*%t(X1)
suppressWarnings({AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
XX1<-ase(AA1,d)$X
Phat1<-XX1%*%t(XX1)
# supress warnings of <0 and >1 entries in X1%*%t(X1)
suppressWarnings({AA2 <- sample_dot_product(t(X1),directed=FALSE)[]})
XX2<-ase(AA2,d)$X
Phat2<-XX2%*%t(XX2)
Phat2shuff<-Phat2[PP[[iii]], PP[[iii]]]
t2[j]<-TS2(Phat1,Phat2shuff)
#Mk<-c((k[iii]+1):nn)
#if(k[iii]!=0){
#unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
#t2unshuff[j]<-norm(as.matrix(Phat1 - Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B]), "F")^2}else{
# t2unshuff[j]<-t2[j]}
}
crit_ts2[iii]<-quantile(t2,0.95)
#crit_ts2unshuff[iii]<-quantile(t2unshuff,0.95)
}
alt_ts2B <- matrix(0,6,200)
for(iii in 1:6){
for(j in 1:200){
# supress warnings of <0 and >1 entries in X1%*%t(X1)
suppressWarnings({AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
XX1<-ase(AA1,d)$X
Phat1<-XX1%*%t(XX1)
# supress warnings of <0 and >1 entries in X2%*%t(X2)
suppressWarnings({AA2 <- sample_dot_product(t(X2),directed=FALSE)[]})
XX2<-ase(AA2,d)$X
Phat2<-XX2%*%t(XX2)
Phat2shuff<-Phat2[PP[[iii]], PP[[iii]]]
alt_ts2B[iii,j]<-TS2(Phat1,Phat2shuff)
#if(k[iii]!=0){
#Mk<-c((k[iii]+1):nn)
#unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
#alt_ts2n_unshuff[iii,j]<-TS2(Phat1,Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B])
#unshuffperm <- gm(Phat1, Phat3shuff, seeds = Mk)
#alt_ts2a_unshuff[iii,j]<-TS2(Phat1,Phat3shuff[unshuffperm$corr_B,unshuffperm$corr_B])}else{
#alt_ts2n_unshuff[iii,j]<-alt_ts2n[iii,j]
#alt_ts2a_unshuff[iii,j]<-alt_ts2a[iii,j]}
}
}
Pow2B <- matrix(NA,6,6 )
#Pow2a_unshuff <- matrix(NA,6,6 )
#Pow2n_unshuff <- matrix(NA,6,6 )
for(i in 1:6){
for(j in 1:i){
Pow2B[i,j] <- sum( alt_ts2B[j,] > crit_ts2[i])/200
#Pow2a_unshuff[i,j] <- sum( alt_ts2a_unshuff[j,] > crit_ts2unshuff[i])/200
#Pow2n_unshuff[i,j] <- sum( alt_ts2n_unshuff[j,] > crit_ts2unshuff[i])/200
}
}
PPP2B <-PPP2B+Pow2B
#PPP2a_un <-PPP2a_un+Pow2a_unshuff
#PPP2n_un <-PPP2n_un+Pow2n_unshuff
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
PPP2B<-PPP2B/50
PPP2C<-PPP2C/50
##############################################################################
#------------------------ GRAPHING ------------------------------------------
###########################################################################
library(ggplot2)
library(reshape2)
library(viridis)
## Loading required package: viridisLite
library(colorspace)
library(extrafont)
## Registering fonts with R
colors <- (qualitative_hcl(6, "Dark 3"))
points <- c(19, 17, 15, 8, 15, 18)
pow_C <- na.omit(melt(PPP2C))$value
nshuffH1<-c(rep(25,6),rep(50,5), rep(100,4), rep(150,3), rep(200,2), rep(250,1))
X<-cbind(pow_C,nshuffH1)
nshuffH0<-c(25, 50, 100, 150, 200, 250, 50, 100, 150, 200, 250,100, 150, 200, 250,150, 200, 250,200, 250,250)
X<-cbind(X,nshuffH0)
X<-data.frame(X)
X$nshuffH0<-as.factor(X$nshuffH0)
gpa<-ggplot(X, aes(x=nshuffH1, y=pow_C, color=nshuffH0))+theme_classic()
gpa <- gpa + geom_line(aes(linetype=nshuffH0),size=1.05) +
geom_point(aes(shape=nshuffH0),size=4) +
geom_errorbar(aes(ymin=pow_C-2*sqrt(pow_C*(1-pow_C)/50),ymax=pow_C+2*sqrt(pow_C*(1-pow_C)/50)),
width=20)+
labs(family="serif", title="Power Testing: Twitter vs. Friendfeed", x="Number of Vertices Shuffled in H1", y="Power", color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0') +
theme(plot.title = element_text(family="serif", face = "bold", size = 17, hjust=0.5),
panel.background = element_rect(fill = "#FFFFFF"),
legend.title = element_text(family="serif", face="bold", color = "#000000", size = 12),
legend.background = element_rect(fill = "#FFFFFF"),
text = element_text(size=20, family = "serif"),
axis.title = element_text(family = "serif", size = 15, colour = "#000000"),
plot.background = element_rect(fill = "#FFFFFF"),
axis.line = element_line(colour = "#000000", size = 1),
legend.text = element_text( family="serif", color = "#000000", size=15)) + scale_colour_manual(values=colors) + scale_shape_manual(values=points)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
pow_B <- na.omit(melt(PPP2B))$value
nshuffH1<-c(rep(5,6),rep(10,5), rep(15,4), rep(20,3), rep(25,2), rep(50,1))
Y<-cbind(pow_B,nshuffH1)
nshuffH0<-c(5, 10, 15, 20, 25, 50, 10, 15, 20, 25, 50,15, 20, 25, 50, 20, 25, 50,25, 50,50)
Y<-cbind(Y,nshuffH0)
Y<-data.frame(Y)
Y$nshuffH0<-as.factor(Y$nshuffH0)
gpa2<-ggplot(Y, aes(x=nshuffH1, y=pow_B, color=nshuffH0)) + theme_classic()
gpa2<- gpa2 + geom_line(aes(linetype=nshuffH0),size=1.05) +
geom_errorbar(aes(ymin=pow_B-2*sqrt(pow_B*(1-pow_B)/50),ymax=pow_B+2*sqrt(pow_B*(1-pow_B)/50)),
width=5)+
geom_point(aes(shape=nshuffH0),size=4) +
labs(family="serif", title="Power Testing: Youtube vs. Friendfeed", x="Number of Vertices Shuffled in H1", y="Power", color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')+
theme(plot.title = element_text(family="serif", face = "bold", size = 17, hjust=0.5),
panel.background = element_rect(fill = "#FFFFFF"),
legend.title = element_text(family="serif", face="bold", color = "#000000", size = 12),
legend.background = element_rect(fill = "#FFFFFF"),
text = element_text(size=20, family = "serif"),
axis.title = element_text(family = "serif", size = 15, colour = "#000000"),
plot.background = element_rect(fill = "#FFFFFF"),
axis.line = element_line(colour = "#000000", size = 1),
legend.text = element_text( family="serif", color = "#000000", size=15)) + scale_colour_manual(values=colors) + scale_shape_manual(values=points)
library(patchwork)
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:MASS':
##
## area
(gpa+theme(legend.position = "bottom")) + (gpa2 +theme(legend.position = "bottom"))
Social Media Example