## $Id: ppw.R,v 1.47 2008/12/12 14:52:17 jracine Exp jracine $

## Original code in Matlab by A. Patton, R translation and
## modifications by C. Parmeter and J. Racine.
##
## We are grateful to Andrew Patton and Dimitris Politis for their
## assistance and feedback. Kindly report features, deficiencies, and
## improvements to racinej@mcmaster.ca.
##
## The citation is A. Patton, D.N. Politis, and H. White (2008,
## forthcoming), "CORRECTION TO `Automatic Block-Length Selection for
## the Dependent Bootstrap' by D.N. Politis and H. White". This is
## based on the article by Politis, D.N., and H. White (2004),
## "Automatic block-length selection for the dependent bootstrap."
## Econometric Reviews, vol. 23.
##
## INPUTS:  data, an n x k matrix.
##
## OUTPUTS: b.star, a 2 x k vector of optimal bootstrap block lengths
## for the stationary bootstrap and circular bootstrap (BstarSB,
## BstarCB).

## The function lam() is used to construct a "flat-top" lag window for
## spectral estimation based on Politis, D.N. and J.P. Romano (1995),
## "Bias-Corrected Nonparametric Spectral Estimation", Journal of Time
## Series Analysis, vol. 16, No. 1.

lam <- function(s){
  return((abs(s)>=0)*(abs(s)<0.5)+2*(1-abs(s))*(abs(s)>=0.5)*(abs(s)<=1))
}

## The function b.star() returns the optimal bootstrap block
## lengths. Note that an example for usage appears at the bottom of
## this file. If you use this function as input into a routine such as
## tsboot() in the boot library (Angelo Canty and Brian Ripley
## (2008). boot: Bootstrap R (S-Plus) Functions. R package version
## 1.2-34.) you ought to use the option round=TRUE.

b.star <- function(data,
                   Kn = NULL,
                   mmax= NULL,
                   Bmax = NULL,
                   c = NULL,
                   round = FALSE){

  ## Convert the data object to a data frame to handle both vectors
  ## and matrices.

  data <- data.frame(data)
  n <- nrow(data)
  k <- ncol(data)

  ## Set Defaults. Note that in footnote c, page 59, for Kn Politis
  ## and White (2004) use max(5,log10(n)). Since this must be an
  ## integer we use ceiling(log10(n)).

  if (is.null(Kn)) Kn <- max(5,ceiling(log10(n)))
  if (is.null(mmax)) mmax <- ceiling(sqrt(n))+Kn
  if (is.null(Bmax)) Bmax <- ceiling(min(3*sqrt(n),n/3))
  if (is.null(c)) c <- qnorm(0.975)

  ## Create two vectors of length k in which we store results.

  BstarSB <- numeric(length=k)
  BstarCB <- numeric(length=k)

  ## Now we loop through each variable in data (i.e., column,
  ## data[,i]).

  for(i in 1:k) {

    ## We first obtain the autocorrelations rho(1),...,rho(mmax) (we
    ## need to drop the first autocorrelation as it is rho(0), hence
    ## acf[-1]). This is the default in acf [type="correlation"]. Note
    ## that Patton uses sample correlations after dropping the first
    ## mmax observations, while we instead use the acf to obtain
    ## rho(k).

    rho.k <- acf(data[,i],
                 lag.max = mmax,
                 type = "correlation",
                 plot = FALSE)$acf[-1]

    ## Next we compute mhat. The use of c*sqrt(log10(n)/n) for
    ## critical values is given in footnote c of Politis and White
    ## (2004, page 59), and the approach for determining mhat is
    ## described in footnote c.

    rho.k.crit <- c*sqrt(log10(n)/n)

    ## Compute the number of insignificant runs following each rho(k),
    ## k=1,...,mmax.
    
    num.insignificant <- sapply(1:(mmax-Kn+1),
                                function(j){
                                  sum((abs(rho.k) < rho.k.crit)[j:(j+Kn-1)])
                                })

    ## If there are any values of rho(k) for which the Kn proceeding
    ## values of rho(k+j), j=1,...,Kn are all insignificant, take the
    ## smallest rho(k) such that this holds (see footnote c for
    ## further details).

    if(any(num.insignificant==Kn)) {
      mhat <- which(num.insignificant==Kn)[1]
    } else {

      ## If no runs of length Kn are insignificant, take the smallest
      ## value of rho(k) that is significant.

      if(any(abs(rho.k) > rho.k.crit)) {
        
        lag.sig <- which(abs(rho.k) > rho.k.crit)
        k.sig <- length(lag.sig)
        
        if(k.sig == 1) {
          
          ## When only one lag is significant, mhat is the sole
          ## significant rho(k).
          
          mhat <- lag.sig
          
        } else {

          ## If there are more than one significant lags but no runs
          ## of length Kn, take the largest value of rho(k) that is
          ## significant.
          
          mhat <- max(lag.sig)
          
        }
        
      } else {
        
        ## When there are no significant lags, mhat must be the
        ## smallest positive integer (footnote c), hence mhat is set
        ## to one.
        
        mhat <- 1
        
      }

    }

    ## Compute M (mhat is at least one).

    M <- ifelse(2*mhat > mmax, mmax, 2*mhat)

    ## We compute BstarSB and BstarCB using the formulas in the above
    ## references. Now we require the autocovariance R(k) (hence
    ## type="covariance" in the acf call). Note that Patton uses
    ## sample covariances after dropping the first mmax observations,
    ## while we instead use the acf with type="covariance" to obtain
    ## R(k). Note also that we require R(0) hence we do not drop it as
    ## we did for rho(k) via acf(...)$acf[-1].
      
    kk <- seq(-M,M)

    R.k <- ccf(data[,i], data[,i],
               lag.max = M,
               type = "covariance",
               plot = FALSE)$acf
    
    Ghat <- sum(lam(kk/M)*abs(kk)*R.k)
    DCBhat <- 4/3*sum(lam(kk/M)*R.k)^2
    DSBhat <- 2*sum(lam(kk/M)*R.k)^2
    BstarSB[i] <- ((2*Ghat^2)/DSBhat)^(1/3)*n^(1/3)
    BstarCB[i] <- ((2*(Ghat^2)/DCBhat)^(1/3))*(n^(1/3))
      
  }

  ## The user can choose whether they want rounded values returned or
  ## not. BstarCB is rounded up, BstarSB simply rounded but both must
  ## be positive integers.

  if(round == FALSE) {
    
    BstarSB <- ifelse(BstarSB > Bmax, Bmax, BstarSB)
    BstarCB <- ifelse(BstarCB > Bmax, Bmax, BstarCB)

  } else {

    BstarSB <- ifelse(BstarSB > Bmax, Bmax, ifelse(BstarSB < 1, 1, round(BstarSB)))
    BstarCB <- ifelse(BstarCB > Bmax, Bmax, ifelse(BstarCB < 1, 1, ceiling(BstarCB)))

  }
  
  return(cbind(BstarSB,BstarCB)) 
  
}

## Here is a simple example with an n x 2 matrix containing n=10^5
## observations, where column 2 of x is more persistent than column
## 1. This requires that you first install the forecast library (i.e.,
## install.packages("forecast")).
##
##  library(forecast)
##  set.seed(123)
##  x <- cbind(arima.sim(n = 100000, list(ar = c(.5,.0), ma = c(0,0)),sd = 1),
##             arima.sim(n = 100000, list(ar = c(.5,.4), ma = c(0,0)),sd = 1))
##  b.star(x)  
##  b.star(x,round=TRUE)
##>   b.star(x)  
##       BstarSB   BstarCB
##[1,]  50.39272  57.68526
##[2,] 251.62894 288.04323
##>   b.star(x,round=TRUE)
##     BstarSB BstarCB
##[1,]      50      58
##[2,]     252     289