## this code involves two parts: 
## Part I:  parameter estimation
## Part II: Viterbi-based state decoding
## input required: "obs", a matrix with three columns, 
## the first giving the time series of depths, 
## the second giving the "times since the last deep dive" (as described in Section 4.1 of the main manuscript) 
## and the third indicating which observations were actually missing (by 0s, and note these need to be interpolated in the first column to allow for the calculation of the then approximate state transition probabilities)

## the real data analyzed in the paper are available from the authors at request



 library(boot) # not necessary, one can instead just replace logit and inv.logit by qlogis and plogis, respectively


  
###
# Part I - fitting the model described in Section 4.1 of the main manuscript to data (stored in "obs")
###


## function that returns the essential part of the t.p.m. of the HMM that approximates the model described in Section 4.1 of the paper (i.e., the part that represents the dwell-time distribution in the semi-Markovian state 3) 
 genOmega0 <- function(m,sm){
  Omega <- diag(m+6)*0
  probs <- dnbinom(0:(m-1),mu=sm[1],size=sm[2])
  den   <- 1-c(0,pnbinom(0:(m-1),mu=sm[1],size=sm[2]))
  for (i in 1:m){
    Omega[2+i,3+m] <- probs[i]/den[i]
    ifelse(i!=m,Omega[2+i,3+i]<-1-Omega[2+i,3+m],Omega[2+i,2+i]<-1-Omega[2+i,3+m])
                }
  return(Omega)
 }


## function that transforms each of the (possibly constrained) model parameters to the real line
 pn2pw <- function(mu,sigma,omega,sm,zeta,alpha,gamma,beta){
  tmu    <- c(log(-mu[1]),log(mu[2]),mu[3],log(-mu[4]),log(mu[5]),mu[6],log(-mu[7]))
  tsigma <- log(sigma)
  tomega <- logit(omega)
  tsm    <- log(sm)
  tzeta  <- zeta
  talpha <- alpha
  tgamma <- gamma
  tbeta  <- beta
  parvect <- c(tmu,tsigma,tomega,tsm,tzeta,talpha,tgamma,tbeta)
  return(parvect)
 }


## inverse transformation back to the natural parameter space
 pw2pn <- function(parvect){
  mu     <- c(-exp(parvect[1]),exp(parvect[2]),parvect[3],-exp(parvect[4]),exp(parvect[5]),parvect[6],-exp(parvect[7]))
  sigma  <- exp(parvect[8:14])
  omega  <- inv.logit(parvect[15:16])
  sm     <- exp(parvect[17:18])
  zeta   <- parvect[19:20]
  alpha  <- parvect[21:23]
  gamma  <- parvect[24:25]
  beta   <- parvect[26:28]
  return(list(mu=mu,sigma=sigma,omega=omega,sm=sm,zeta=zeta,alpha=alpha,gamma=gamma,beta=beta))
 }


## function that generates a time-dependent t.p.m. by incorporating time-dependent transition probabilities into baseline t.p.m. that includes only the parts that are constant over time
 genOmega<-function(Omega0,N,o12t,o15t,o23t,o41t,o56t){
  Omega          <- Omega0
  Omega[1,2]     <- o12t
  Omega[1,N-2]   <- o15t
  Omega[2,3]     <- o23t
  Omega[2,2]     <- 1-Omega[2,3]
  Omega[N,1]     <- Omega[N-3,1]   <- o41t
  Omega[N,N]     <- Omega[N-3,N-3] <- 1-o41t
  Omega[N-2,N-1] <- o56t
  Omega[N-2,N-2] <- 1-Omega[N-2,N-1]
  return(Omega)
 }


## function that computes minus the log likelihood of the model described in Section 4.1 of the manuscript, for given observations and (transformed) parameters 
 mllk <- function(parvect,obs){
  n   <- dim(obs)[1]
  N   <- m+6
  lpn <- pw2pn(parvect)
  if (m==1) lpn$sm[2]<-1
  allprobs <- matrix(rep(1,N*(n-1)),nrow=n-1)
  Omega0 <- genOmega0(m,lpn$sm)
  Omega0[1,1]     <- lpn$omega[1]
  Omega0[N-1,N-1] <- lpn$omega[2]
  Omega0[N-1,N]   <- 1-lpn$omega[2]
  delta <- rep(0,N); delta[2]<-1
  # evaluate densities of the state-dependent distributions
     which(obs[,3]==0)->ind   
     ind<-c(ind,ind+1)        
     ind<-unique(sort(ind))   
     for (k in (2:n)[-ind]){  
        probs<-rep(NA,7)
        for (j in 1:7){
        probs[j]<-dgamma(obs[k,1],shape=max(obs[k-1,1]+lpn$mu[j],1)^2/lpn$sigma[j]^2,scale=lpn$sigma[j]^2/max(obs[k-1,1]+lpn$mu[j],1))
                      }
        allprobs[k-1,1]            <- probs[1]
        allprobs[k-1,2]            <- probs[2]
        allprobs[k-1,3:(N-4)]      <- rep(probs[3],m)
        allprobs[k-1,N-3]          <- probs[4]
        allprobs[k-1,N-2]          <- probs[5]
        allprobs[k-1,N-1]          <- probs[6]
        allprobs[k-1,N]            <- probs[7]
                  }
        # below are given some conditions to improve the numerical stability of the procedure leveraging that we know by assumption that conditional on a given depth the animal cannot be in a number of behavioural states          
     which(obs[2:n,1]> 30) -> ind; allprobs[ind,1] <- 0
     which(obs[2:n,1]>600) -> ind; allprobs[ind,c(1,N-2,N-1,N)] <- 0
     which(obs[2:n,1]<400) -> ind; allprobs[ind,3:(N-4)] <- 0
  # compute vectors of (time-dependent) transition probabilities that are affected by feedback
     pvec<-inv.logit(10*lpn$zeta[1]+1/10*lpn$zeta[2]*obs[,2])
     o12 <-pvec*(1-lpn$omega[1])
     o15 <-(1-pvec)*(1-lpn$omega[1])
     o23 <- inv.logit(10*lpn$alpha[1]+1/10*lpn$alpha[2]*obs[,1]+1/1000*lpn$alpha[3]*obs[,1]^2)
     EX<- -1/10*lpn$alpha[2]/(2*1/1000*lpn$alpha[3]); which(obs[,1]>EX)->ind
     if (length(ind)>0) o23[ind] <- inv.logit(10*lpn$alpha[1]-(1/10*lpn$alpha[2])^2/(4*1/1000*lpn$alpha[3]))
     o41 <- inv.logit(10*lpn$gamma[1]+1/10*lpn$gamma[2]*obs[,1])
     o56 <- inv.logit(10*lpn$beta[1]+1/10*lpn$beta[2]*obs[,1]+1/1000*lpn$beta[3]*obs[,1]^2)
  # run forward algorithm (applying a scaling strategy to obtain the log-likelihood)
     lscale <- 0
     foo <- delta
     foo <- foo*allprobs[1,]
     for (i in 3:n){
       foo <- foo%*%genOmega(Omega0,N,o12[i-1],o15[i-1],o23[i-1],o41[i-1],o56[i-1])*allprobs[i-1,]
       sumfoo <- sum(foo)
       lscale <- lscale+log(sumfoo)
       foo <- foo/sumfoo
     }
  lscale
  mllk <- -lscale
  return(mllk)
 }


## function that runs the numerical maximization of the above likelihood function and returns the results
 mle <- function(obs,mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0){
  parvect <- pn2pw(mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0)
  mod     <- nlm(mllk,parvect,obs,print.level=2,iterlim=2000) 
  mllk    <- mod$minimum
  lpn     <- pw2pn(mod$estimate)
  list(mu=lpn$mu,sigma=lpn$sigma,omega=lpn$omega,sm=lpn$sm,zeta=lpn$zeta,alpha=lpn$alpha,gamma=lpn$gamma,beta=lpn$beta,mllk=mllk,H=mod$hessian)
 }


## initial parameter values used in the numerical maximization (several different sets of initial values should be tried in order to ensure hitting the global maximum)
 mu0    <- c(-5,15,0,-8,4,0,-4)
 sigma0 <- c(0.5,5,8,4,4,1,4)
 omega0 <- c(0.93,0.97)
 sm0    <- c(135,27)
 zeta0  <- c(-0.38,0.03)
 alpha0 <- c(-2.3,0.46,-0.026)
 gamma0 <- c(0.5,-8)
 beta0  <- c(-0.15,-0.2,0.06)


  m  <- 250 # size of the state aggregate in the HMM-based approximation of the model with semi-Markovian state 3 (needs to be sufficiently large, but not excessively large, as the HSMM component substantially increases the computational effort)

  pw2pn(pn2pw(mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0)) # check transformations
 
  Sys.time()->s
  mle(obs,mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0) -> fHSMM # run numerical maximization of the likelihood
  Sys.time()-s # display computing time
  
  fHSMM # display results

              
              
###
# Part II - decoding the underlying state sequence based on the fitted model using the Viterbi algorithm 
###


# function that applies the Viterbi algorithm to find the most likely state sequence to underlie the observations (under fitted model 'fHSMM')
 viterbi <- function(obs,fHSMM,m){
  n   <- dim(obs)[1]
  N   <- m+6
  lpn <- fHSMM
  if (m==1) lpn$sm[2]<-1
  allprobs <- matrix(rep(1,N*(n-1)),nrow=n-1)
  Omega0 <- genOmega0(m,fHSMM$sm)
  Omega0[1,1]     <- fHSMM$omega[1]
  Omega0[N-1,N-1] <- fHSMM$omega[2]
  Omega0[N-1,N]   <- 1-fHSMM$omega[2]
  delta <- rep(0,N); delta[2]<-1
  which(obs[,3]==0)->ind   
     ind<-c(ind,ind+1)        
     ind<-unique(sort(ind))   
     for (k in (2:n)[-ind]){ 
      probs<-rep(NA,7)
      for (j in 1:7){
      probs[j]<-dgamma(obs[k,1],shape=max(obs[k-1,1]+fHSMM$mu[j],1)^2/fHSMM$sigma[j]^2,scale=fHSMM$sigma[j]^2/max(obs[k-1,1]+fHSMM$mu[j],1))
                    }
      allprobs[k-1,1]            <- probs[1]
      allprobs[k-1,2]            <- probs[2]
      allprobs[k-1,3:(N-4)]      <- rep(probs[3],m)
      allprobs[k-1,N-3]          <- probs[4]
      allprobs[k-1,N-2]          <- probs[5]
      allprobs[k-1,N-1]          <- probs[6]
      allprobs[k-1,N]            <- probs[7]
                }
  which(obs[2:n,1]> 30) -> ind; allprobs[ind,1] <- 0
  which(obs[2:n,1]>600) -> ind; allprobs[ind,(N-2):N] <- 0
  which(obs[2:n,1]<400) -> ind; allprobs[ind,3:(N-4)] <- 0
  pvec<-inv.logit(10*lpn$zeta[1]+1/10*lpn$zeta[2]*obs[,2])
  o12 <-pvec*(1-lpn$omega[1])
  o15 <-(1-pvec)*(1-lpn$omega[1])
  o23 <- inv.logit(10*lpn$alpha[1]+1/10*lpn$alpha[2]*obs[,1]+1/1000*lpn$alpha[3]*obs[,1]^2)
  EX<- -1/10*lpn$alpha[2]/(2*1/1000*lpn$alpha[3]); which(obs[,1]>EX)->ind
  if (length(ind)>0) o23[ind] <- inv.logit(10*lpn$alpha[1]-(1/10*lpn$alpha[2])^2/(4*1/1000*lpn$alpha[3]))
  o41 <- inv.logit(10*lpn$gamma[1]+1/10*lpn$gamma[2]*obs[,1])
  o56 <- inv.logit(10*lpn$beta[1]+1/10*lpn$beta[2]*obs[,1]+1/1000*lpn$beta[3]*obs[,1]^2)
  xi <- matrix(0,n-1,N)
  foo <- delta*allprobs[1,]
  xi[1,] <- foo/sum(foo)
  gamma <- vector("list")
  for (i in 3:n){
  foo <- apply(xi[i-2,]*genOmega(Omega0,N,o12[i-1],o15[i-1],o23[i-1],o41[i-1],o56[i-1]),2,max)*allprobs[i-1,]
  xi[i-1,] <- foo/sum(foo)
                }
  iv <- numeric(n-1)
  iv[n-1] <-which.max(xi[n-1,])
  for (i in (n-1):2){
  iv[i-1] <- which.max(genOmega(Omega0,N,o12[i-1],o15[i-1],o23[i-1],o41[i-1],o56[i-1])[,iv[i]]*xi[i-1,])
                    }
  iv
  }


 states <- viterbi(obs,fHSMM,m) # run Viterbi

 which(states<(m+3) & states>2)->ind
 states[ind]<-rep(3,length(ind))
 which(states==(m+3))->ind
 states[ind]<-rep(4,length(ind))
 which(states==(m+4))->ind
 states[ind]<-rep(5,length(ind))
 which(states==(m+5))->ind
 states[ind]<-rep(6,length(ind))
 which(states==(m+6))->ind
 states[ind]<-rep(7,length(ind))

 states # display decoded state sequence