Solution of Homework Problem 4, STAT 705,   Fall 2017.
=====================================================

## (a) You should assume that all of the Mxy[,"x"]  and altx elements 
##  are distinct, or else that duplicated x values always have the 
##  same y values. 

### First, here is some code using for-loops to do the assigned linear interpolation task.
## 
## We begin with preliminary steps to make sure that we remove duplicate rows of Mxy and
##  sort the rows in order of increasing x = Mxy[,"x"], and also sort the  xalt  values 
##  within the interval from min(x) to max(x).
##
## Then the main idea of the loops is to do separately, in a single pass, an updating over
## the combined and sorted  x   and  xnew  vectors to find in two columns the maximum  x-value 
## less than or equal to each x or xnew value (and also th associated y-value) and also in 
## another two columns column the minimum x-value greater than or equal to each x or xnew value, 
## and its associated y-value. We keep the associated x-columns and y-columns together.
##
## Then in a final for-loop, we use the constructed columns of x-values most closely surrounding
## each xnew value to do the linear interpolation between their associate y-values.

   LinInterpA = function(Mxy, xalt) {
        newmat = Mxy[!duplicated(Mxy[,"x"]),]
        oldx = newmat[,"x"]
        oldy = newmat[,"y"] 
        rng = range(oldx)
        xnew = sort(xalt[xalt >= rng[1] & xalt <= rng[2] ])
        xcomb = c(oldx, xnew)
        ycomb = c(Mxy[,"y"], rep(NA,length(xnew)))
        oldind = c(rep(1,nrow(Mxy)), rep(0,length(xnew)))
        indord = order(xcomb)
        xcomb = xcomb[indord]
        ycomb = ycomb[indord]
        oldind = oldind[indord]
        new.T = (oldind==0)
        N = length(xcomb)
        lowx = hix = lowy = hiy = numeric(N)
        xlow = xcomb[1]
        ylow = oldy[which.min(oldx)]        
        for(i in 1:N)  {
             if(oldind[i] & xcomb[i] > xlow) {
                   xlow = xcomb[i]
                   ylow = ycomb[i] }
             lowx[i] = xlow
             lowy[i] = ylow
        }
        xhi = xcomb[N]
        yhi = oldy[which.max(oldx)]        
        for(i in N:1)  {
             if(oldind[i] & xcomb[i] < xhi) {
                   xhi = xcomb[i]
                   yhi = ycomb[i] }
             hix[i] = xhi
             hiy[i] = yhi
        }
        ynew = numeric(length(xnew))
        auxind = which(new.T)
        for(j in 1:length(xnew)) {
             i = auxind[j]
             frac = if(lowx[i]==hix[i]) 0 else
                      (xcomb[i]-lowx[i])/(hix[i]-lowx[i])
             ynew[j] = lowy[i] + frac*(hiy[i]-lowy[i])
        }
        cbind(newx = xnew, newy = ynew)
        }

> x0 = runif(15)
  M0 = cbind(x=xseq, y=log(1+5*xseq))
  plot(M0, type="l", col="blue")
  points(LinInterpA(M0,x0))

### This shows that the interpolated points fall very nicely on the curve
##--------------------------------------------------------------------------

### Now repeat the coding using cummax, cummin, and match to vectorize all 
###   operations, avoiding the need for for-loops

## in this code, the updating of largest x less than or equal to the current
##   x or xnew value is done "vectorially" via cummax, and a similar idea is
##    used in reverse order to find smallest x larger than or equal to each 
##   x or xnew, using cummin.
##
## But in this way of doing the calculation, we could NOT keep columns of 
## y-values associated with the bracketing x-values. To fix this, we needed to use
## "match" to find the index of x-value associated with each lower and upper 
## bracketing x-value surrounding each  xnew value.


   LinInterpB = function(Mxy, xalt) {
        newmat = Mxy[!duplicated(Mxy[,"x"]),]
        oldx = newmat[,"x"]
        oldy = newmat[,"y"] 
        rng = range(oldx)
        xnew = sort(xalt[xalt >= rng[1] & xalt <= rng[2] ])
        xcomb = c(oldx, xnew)
        ycomb = c(Mxy[,"y"], rep(NA,length(xnew)))
        oldind = c(rep(1,nrow(Mxy)), rep(0,length(xnew)))
        indord = order(xcomb)
        xcomb = xcomb[indord]
        ycomb = ycomb[indord]
        oldind = oldind[indord]
        new.T = (oldind==0)
     ### up to here, just as in LinInterpA
        lowx = cummax(c(rng[1],ifelse(new.T,-Inf,xcomb)))[-1]
        hix = rev(cummin(c(rng[2],rev(ifelse(oldind==1,xcomb,Inf))))[-1])
   #### now these are the oldx-values respectively bracketing all of the
   #### new x values below and above
        lowind = match(lowx[new.T], oldx)
        upind = match(hix[new.T], oldx)
        fracx = ifelse(upind==lowind,0, (xcomb[new.T]-oldx[lowind])/
                 (oldx[upind]-oldx[lowind]))
        cbind(newx = xcomb[new.T], newy = oldy[lowind]+
                 fracx*(oldy[upind]-oldy[lowind]))
        }

> plot(M0, type="l", col="red")
  points(LinInterpB(M0,x0))           ### RESULT as before


> xalt = runif(1000, .05, .95)
> M1 = cbind(x=xv, y=sin(3*xv)^2+rnorm(1e5)*0.1)
> system.time({ out1 = LinInterpA(M1,xalt) } )
   user  system elapsed 
   1.54    0.00    1.55 
> system.time({ out2 = LinInterpB(M1,xalt) } )
   user  system elapsed 
   0.14    0.08    0.21 
 
> sum(abs(c(out1-out2)))
[1] 0

### Try again with larger  xalt

> xalt2 = 0.5 + rbeta(3e4, .5, .5)

>  system.time({ out1 = LinInterpA(M1,xalt2) } )
   user  system elapsed 
   1.75    0.00    1.76 
>  system.time({ out2 = LinInterpB(M1,xalt2) } )
   user  system elapsed 
   0.17    0.00    0.17 

> sum(abs(c(out1-out2)))
[1] 0
> dim(out1)
[1] 14934     2
> dim(out2)
[1] 14934     2


> xalt3 = 0.05 + rbeta(3e4, 1.25, .25)

>  system.time({ out1 = LinInterpA(M1,xalt3) } )
   user  system elapsed 
   1.69    0.00    1.70 
>  system.time({ out2 = LinInterpB(M1,xalt3) } )
   user  system elapsed 
    0.2     0.0     0.2 


### Further testing with the built-in function "approx" shows that 
###  it is roughly about equal in speed to   LinInterpB.

### NOTE: you could see from the statement of the problem that I would strongly
prefer that, even if you can find a library function in R that does essentially 
the same task that I am assigning in a Homework problem, you should try to code 
it yourself using more basic R commands. If you know a library function and 
cannot code the task yourself, you may use the library function if you provide 
careful checks that it does what you think it does. But you will get less credit 
for that than if you had done the coding yourself as requested.