Class Log for Dec. 11 on MCMC  in Contingency-Table Example
===========================================================

set.seed(33333)
Xarr = array(sample(0:3, 35, rep=T), c(5,7))

> Xarr
     [,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,]    3    0    2    0    1    2    0
[2,]    1    2    2    0    3    2    1
[3,]    1    1    3    3    0    0    2
[4,]    3    1    2    1    1    2    2
[5,]    0    1    0    2    0    2    0

### We want to sample randomly, equiprobably, from all Tables 
###    with the same marginals.

> marg1 = apply(Xarr,1,sum)
  marg1
[1]  8 11 10 12  5
> marg2 = apply(Xarr,2,sum)
  marg2
[1] 8 5 9 6 5 8 5

### We define Markov chain to select two positions (i,j) and  (ip,jp)
###     where Xarr[i,j] > 0   and  Xarr[ip,jp] > 0  (i and ip different, 
###     and j and jp different) and decrease each of these table entries
###     by 1 and add one to Xarr[i,jp]  and to Xarr[ip,j]  which gives 
###     another table with the same marginals.

MatPrs = function(xarr) {
         ### function to calculate and index all (i,j) and (ip,jp) such
       ## that xarr has positive entry at both and i!=ip & j!=jp
         tmp1 = which( xarr > 0 , arr.ind=T)   
         indmat = outer(1:nrow(tmp1),1:nrow(tmp1), function(x,y) 
                    tmp1[x,1]!=tmp1[y,1] & tmp1[x,2]!=tmp1[y,2])
         pairs = which(indmat, arr.ind=T)
         list(ijinds=tmp1, pairinds=pairs) }

## in the output of MatPrs, the possible (i,j) and (ip,jp) pairs are
##    ijinds[pairinds[k,1],]  and  ijinds[pairinds[k,2],]
##    and  nrow(pairinds) gives total number of (i,j,ip,jp) comb's we select among

> tmp = MatPrs(Xarr)
> names(tmp)
[1] "ijinds"   "pairinds"
> nrow(tmp[[1]])
[1] 25
> dim(tmp[[2]])
[1] 424   2

qsamp = function(xarr) {
         tmp = MatPrs(xarr)
         ncomb = nrow(tmp[[2]])
         new = sample(ncomb,1)
         ij = tmp$ijinds[tmp$pairinds[new,1],]
         ipjp = tmp$ijinds[tmp$pairinds[new,2],]
         yarr = xarr
         yarr[ij[1],ij[2]] = yarr[ij[1],ij[2]]-1
         yarr[ipjp[1],ipjp[2]] = yarr[ipjp[1],ipjp[2]]-1
         yarr[ij[1],ipjp[2]] = yarr[ij[1],ipjp[2]]+1
         yarr[ipjp[1],ij[2]] = yarr[ipjp[1],ij[2]]+1
         list(yarr=yarr, ncomb=ncomb)  }

### Then the q kernel value is 1/ncomb at xarr,yarr for a feasible yarr

## Now code the Metropolis-Hastings step:

MHstep = function(xarr) {
         out1 = qsamp(xarr)
         tmp2 = MatPrs(out1$yarr)
      ### we know that xarr and yarr are mutually feasible
         rho = out1$ncomb/nrow(tmp2$pairinds)
         xout = if(runif(1)<rho) out1$yarr else xarr }

TabArr = array(0, c(dim(Xarr),1e4))

xtmp = Xarr
for(i in 1:1e4) {
         xtmp = MHstep(xtmp)
         TabArr[,,i] = xtmp }       ### very quick !!
      
### Check number of distinct Tables created:
> tmpunif = array(runif(35),c(5,7))
  length(unique( apply(TabArr,3, function(mat) sum(c(mat*tmpunif))) ))
### 9571 distinct ones.

### Now let's try to estimate the total number of unique tables: do 1e6+1e4
> Hashvec = apply(TabArr,3, function(mat) sum(c(mat*tmpunif)))
  for(j in 1:1e2) {
       for(i in 1:1e4) {
            xtmp = MHstep(xtmp)
            TabArr[,,i] = xtmp } 
       Hashvec = c(Hashvec, apply(TabArr,3, function(mat) sum(c(mat*tmpunif))))
  }

### 1e6 tables take several minutes to generate
### Now length  1,010,000 but # distinct = 966462

system.time({  for(j in 1:1e2) {
       for(i in 1:1e4) {
            xtmp = MHstep(xtmp)
            TabArr[,,i] = xtmp } 
       Hashvec = c(Hashvec, apply(TabArr,3, function(mat) sum(c(mat*tmpunif))))
  }  })
   user  system elapsed 
 155.19    0.29  156.30       ### 3 minutes per million

> length(Hashvec)
[1] 2010000
> length(unique(Hashvec))
[1] 1923308                 ### Still very few repeats !!

> marg1
[1]  8 11 10 12  5
> marg2
[1] 8 5 9 6 5 8 5
> apply(xtmp,1,sum)
[1]  8 11 10 12  5
> apply(xtmp,2,sum)
[1] 8 5 9 6 5 8 5

> table(apply(TabArr,3, function(mat) max(c(mat))))

   3    4    5    6    7    8    9 
 110 2344 4025 2357  813  324   27 
> table(apply(TabArr,3, function(mat) sum(c(mat)==0)))

   4    5    6    7    8    9   10   11   12   13   14   15   16   17   18 
   1    3    4   50  151  391  798 1223 1683 1865 1621 1111  681  276  110 
  19   20   21   22 
  19    8    2    3 

#### There is a wide variety of tables to sample ! Metropolis-Hastings
###    gets us to new ones effectively, but there is no sense of 
###    converging to equilibrium until we have sampled all ...