Class Log for Dec. 11 on MCMC in Contingency-Table Example =========================================================== set.seed(33333) Xarr = array(sample(0:3, 35, rep=T), c(5,7)) > Xarr [,1] [,2] [,3] [,4] [,5] [,6] [,7] [1,] 3 0 2 0 1 2 0 [2,] 1 2 2 0 3 2 1 [3,] 1 1 3 3 0 0 2 [4,] 3 1 2 1 1 2 2 [5,] 0 1 0 2 0 2 0 ### We want to sample randomly, equiprobably, from all Tables ### with the same marginals. > marg1 = apply(Xarr,1,sum) marg1 [1] 8 11 10 12 5 > marg2 = apply(Xarr,2,sum) marg2 [1] 8 5 9 6 5 8 5 ### We define Markov chain to select two positions (i,j) and (ip,jp) ### where Xarr[i,j] > 0 and Xarr[ip,jp] > 0 (i and ip different, ### and j and jp different) and decrease each of these table entries ### by 1 and add one to Xarr[i,jp] and to Xarr[ip,j] which gives ### another table with the same marginals. MatPrs = function(xarr) { ### function to calculate and index all (i,j) and (ip,jp) such ## that xarr has positive entry at both and i!=ip & j!=jp tmp1 = which( xarr > 0 , arr.ind=T) indmat = outer(1:nrow(tmp1),1:nrow(tmp1), function(x,y) tmp1[x,1]!=tmp1[y,1] & tmp1[x,2]!=tmp1[y,2]) pairs = which(indmat, arr.ind=T) list(ijinds=tmp1, pairinds=pairs) } ## in the output of MatPrs, the possible (i,j) and (ip,jp) pairs are ## ijinds[pairinds[k,1],] and ijinds[pairinds[k,2],] ## and nrow(pairinds) gives total number of (i,j,ip,jp) comb's we select among > tmp = MatPrs(Xarr) > names(tmp) [1] "ijinds" "pairinds" > nrow(tmp[[1]]) [1] 25 > dim(tmp[[2]]) [1] 424 2 qsamp = function(xarr) { tmp = MatPrs(xarr) ncomb = nrow(tmp[[2]]) new = sample(ncomb,1) ij = tmp$ijinds[tmp$pairinds[new,1],] ipjp = tmp$ijinds[tmp$pairinds[new,2],] yarr = xarr yarr[ij[1],ij[2]] = yarr[ij[1],ij[2]]-1 yarr[ipjp[1],ipjp[2]] = yarr[ipjp[1],ipjp[2]]-1 yarr[ij[1],ipjp[2]] = yarr[ij[1],ipjp[2]]+1 yarr[ipjp[1],ij[2]] = yarr[ipjp[1],ij[2]]+1 list(yarr=yarr, ncomb=ncomb) } ### Then the q kernel value is 1/ncomb at xarr,yarr for a feasible yarr ## Now code the Metropolis-Hastings step: MHstep = function(xarr) { out1 = qsamp(xarr) tmp2 = MatPrs(out1$yarr) ### we know that xarr and yarr are mutually feasible rho = out1$ncomb/nrow(tmp2$pairinds) xout = if(runif(1) tmpunif = array(runif(35),c(5,7)) length(unique( apply(TabArr,3, function(mat) sum(c(mat*tmpunif))) )) ### 9571 distinct ones. ### Now let's try to estimate the total number of unique tables: do 1e6+1e4 > Hashvec = apply(TabArr,3, function(mat) sum(c(mat*tmpunif))) for(j in 1:1e2) { for(i in 1:1e4) { xtmp = MHstep(xtmp) TabArr[,,i] = xtmp } Hashvec = c(Hashvec, apply(TabArr,3, function(mat) sum(c(mat*tmpunif)))) } ### 1e6 tables take several minutes to generate ### Now length 1,010,000 but # distinct = 966462 system.time({ for(j in 1:1e2) { for(i in 1:1e4) { xtmp = MHstep(xtmp) TabArr[,,i] = xtmp } Hashvec = c(Hashvec, apply(TabArr,3, function(mat) sum(c(mat*tmpunif)))) } }) user system elapsed 155.19 0.29 156.30 ### 3 minutes per million > length(Hashvec) [1] 2010000 > length(unique(Hashvec)) [1] 1923308 ### Still very few repeats !! > marg1 [1] 8 11 10 12 5 > marg2 [1] 8 5 9 6 5 8 5 > apply(xtmp,1,sum) [1] 8 11 10 12 5 > apply(xtmp,2,sum) [1] 8 5 9 6 5 8 5 > table(apply(TabArr,3, function(mat) max(c(mat)))) 3 4 5 6 7 8 9 110 2344 4025 2357 813 324 27 > table(apply(TabArr,3, function(mat) sum(c(mat)==0))) 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 1 3 4 50 151 391 798 1223 1683 1865 1621 1111 681 276 110 19 20 21 22 19 8 2 3 #### There is a wide variety of tables to sample ! Metropolis-Hastings ### gets us to new ones effectively, but there is no sense of ### converging to equilibrium until we have sampled all ...