## this code involves two parts: ## Part I: parameter estimation ## Part II: Viterbi-based state decoding ## input required: "obs", a matrix with three columns, ## the first giving the time series of depths, ## the second giving the "times since the last deep dive" (as described in Section 4.1 of the main manuscript) ## and the third indicating which observations were actually missing (by 0s, and note these need to be interpolated in the first column to allow for the calculation of the then approximate state transition probabilities) ## the real data analyzed in the paper are available from the authors at request library(boot) # not necessary, one can instead just replace logit and inv.logit by qlogis and plogis, respectively ### # Part I - fitting the model described in Section 4.1 of the main manuscript to data (stored in "obs") ### ## function that returns the essential part of the t.p.m. of the HMM that approximates the model described in Section 4.1 of the paper (i.e., the part that represents the dwell-time distribution in the semi-Markovian state 3) genOmega0 <- function(m,sm){ Omega <- diag(m+6)*0 probs <- dnbinom(0:(m-1),mu=sm[1],size=sm[2]) den <- 1-c(0,pnbinom(0:(m-1),mu=sm[1],size=sm[2])) for (i in 1:m){ Omega[2+i,3+m] <- probs[i]/den[i] ifelse(i!=m,Omega[2+i,3+i]<-1-Omega[2+i,3+m],Omega[2+i,2+i]<-1-Omega[2+i,3+m]) } return(Omega) } ## function that transforms each of the (possibly constrained) model parameters to the real line pn2pw <- function(mu,sigma,omega,sm,zeta,alpha,gamma,beta){ tmu <- c(log(-mu[1]),log(mu[2]),mu[3],log(-mu[4]),log(mu[5]),mu[6],log(-mu[7])) tsigma <- log(sigma) tomega <- logit(omega) tsm <- log(sm) tzeta <- zeta talpha <- alpha tgamma <- gamma tbeta <- beta parvect <- c(tmu,tsigma,tomega,tsm,tzeta,talpha,tgamma,tbeta) return(parvect) } ## inverse transformation back to the natural parameter space pw2pn <- function(parvect){ mu <- c(-exp(parvect[1]),exp(parvect[2]),parvect[3],-exp(parvect[4]),exp(parvect[5]),parvect[6],-exp(parvect[7])) sigma <- exp(parvect[8:14]) omega <- inv.logit(parvect[15:16]) sm <- exp(parvect[17:18]) zeta <- parvect[19:20] alpha <- parvect[21:23] gamma <- parvect[24:25] beta <- parvect[26:28] return(list(mu=mu,sigma=sigma,omega=omega,sm=sm,zeta=zeta,alpha=alpha,gamma=gamma,beta=beta)) } ## function that generates a time-dependent t.p.m. by incorporating time-dependent transition probabilities into baseline t.p.m. that includes only the parts that are constant over time genOmega<-function(Omega0,N,o12t,o15t,o23t,o41t,o56t){ Omega <- Omega0 Omega[1,2] <- o12t Omega[1,N-2] <- o15t Omega[2,3] <- o23t Omega[2,2] <- 1-Omega[2,3] Omega[N,1] <- Omega[N-3,1] <- o41t Omega[N,N] <- Omega[N-3,N-3] <- 1-o41t Omega[N-2,N-1] <- o56t Omega[N-2,N-2] <- 1-Omega[N-2,N-1] return(Omega) } ## function that computes minus the log likelihood of the model described in Section 4.1 of the manuscript, for given observations and (transformed) parameters mllk <- function(parvect,obs){ n <- dim(obs)[1] N <- m+6 lpn <- pw2pn(parvect) if (m==1) lpn$sm[2]<-1 allprobs <- matrix(rep(1,N*(n-1)),nrow=n-1) Omega0 <- genOmega0(m,lpn$sm) Omega0[1,1] <- lpn$omega[1] Omega0[N-1,N-1] <- lpn$omega[2] Omega0[N-1,N] <- 1-lpn$omega[2] delta <- rep(0,N); delta[2]<-1 # evaluate densities of the state-dependent distributions which(obs[,3]==0)->ind ind<-c(ind,ind+1) ind<-unique(sort(ind)) for (k in (2:n)[-ind]){ probs<-rep(NA,7) for (j in 1:7){ probs[j]<-dgamma(obs[k,1],shape=max(obs[k-1,1]+lpn$mu[j],1)^2/lpn$sigma[j]^2,scale=lpn$sigma[j]^2/max(obs[k-1,1]+lpn$mu[j],1)) } allprobs[k-1,1] <- probs[1] allprobs[k-1,2] <- probs[2] allprobs[k-1,3:(N-4)] <- rep(probs[3],m) allprobs[k-1,N-3] <- probs[4] allprobs[k-1,N-2] <- probs[5] allprobs[k-1,N-1] <- probs[6] allprobs[k-1,N] <- probs[7] } # below are given some conditions to improve the numerical stability of the procedure leveraging that we know by assumption that conditional on a given depth the animal cannot be in a number of behavioural states which(obs[2:n,1]> 30) -> ind; allprobs[ind,1] <- 0 which(obs[2:n,1]>600) -> ind; allprobs[ind,c(1,N-2,N-1,N)] <- 0 which(obs[2:n,1]<400) -> ind; allprobs[ind,3:(N-4)] <- 0 # compute vectors of (time-dependent) transition probabilities that are affected by feedback pvec<-inv.logit(10*lpn$zeta[1]+1/10*lpn$zeta[2]*obs[,2]) o12 <-pvec*(1-lpn$omega[1]) o15 <-(1-pvec)*(1-lpn$omega[1]) o23 <- inv.logit(10*lpn$alpha[1]+1/10*lpn$alpha[2]*obs[,1]+1/1000*lpn$alpha[3]*obs[,1]^2) EX<- -1/10*lpn$alpha[2]/(2*1/1000*lpn$alpha[3]); which(obs[,1]>EX)->ind if (length(ind)>0) o23[ind] <- inv.logit(10*lpn$alpha[1]-(1/10*lpn$alpha[2])^2/(4*1/1000*lpn$alpha[3])) o41 <- inv.logit(10*lpn$gamma[1]+1/10*lpn$gamma[2]*obs[,1]) o56 <- inv.logit(10*lpn$beta[1]+1/10*lpn$beta[2]*obs[,1]+1/1000*lpn$beta[3]*obs[,1]^2) # run forward algorithm (applying a scaling strategy to obtain the log-likelihood) lscale <- 0 foo <- delta foo <- foo*allprobs[1,] for (i in 3:n){ foo <- foo%*%genOmega(Omega0,N,o12[i-1],o15[i-1],o23[i-1],o41[i-1],o56[i-1])*allprobs[i-1,] sumfoo <- sum(foo) lscale <- lscale+log(sumfoo) foo <- foo/sumfoo } lscale mllk <- -lscale return(mllk) } ## function that runs the numerical maximization of the above likelihood function and returns the results mle <- function(obs,mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0){ parvect <- pn2pw(mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0) mod <- nlm(mllk,parvect,obs,print.level=2,iterlim=2000) mllk <- mod$minimum lpn <- pw2pn(mod$estimate) list(mu=lpn$mu,sigma=lpn$sigma,omega=lpn$omega,sm=lpn$sm,zeta=lpn$zeta,alpha=lpn$alpha,gamma=lpn$gamma,beta=lpn$beta,mllk=mllk,H=mod$hessian) } ## initial parameter values used in the numerical maximization (several different sets of initial values should be tried in order to ensure hitting the global maximum) mu0 <- c(-5,15,0,-8,4,0,-4) sigma0 <- c(0.5,5,8,4,4,1,4) omega0 <- c(0.93,0.97) sm0 <- c(135,27) zeta0 <- c(-0.38,0.03) alpha0 <- c(-2.3,0.46,-0.026) gamma0 <- c(0.5,-8) beta0 <- c(-0.15,-0.2,0.06) m <- 250 # size of the state aggregate in the HMM-based approximation of the model with semi-Markovian state 3 (needs to be sufficiently large, but not excessively large, as the HSMM component substantially increases the computational effort) pw2pn(pn2pw(mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0)) # check transformations Sys.time()->s mle(obs,mu0,sigma0,omega0,sm0,zeta0,alpha0,gamma0,beta0) -> fHSMM # run numerical maximization of the likelihood Sys.time()-s # display computing time fHSMM # display results ### # Part II - decoding the underlying state sequence based on the fitted model using the Viterbi algorithm ### # function that applies the Viterbi algorithm to find the most likely state sequence to underlie the observations (under fitted model 'fHSMM') viterbi <- function(obs,fHSMM,m){ n <- dim(obs)[1] N <- m+6 lpn <- fHSMM if (m==1) lpn$sm[2]<-1 allprobs <- matrix(rep(1,N*(n-1)),nrow=n-1) Omega0 <- genOmega0(m,fHSMM$sm) Omega0[1,1] <- fHSMM$omega[1] Omega0[N-1,N-1] <- fHSMM$omega[2] Omega0[N-1,N] <- 1-fHSMM$omega[2] delta <- rep(0,N); delta[2]<-1 which(obs[,3]==0)->ind ind<-c(ind,ind+1) ind<-unique(sort(ind)) for (k in (2:n)[-ind]){ probs<-rep(NA,7) for (j in 1:7){ probs[j]<-dgamma(obs[k,1],shape=max(obs[k-1,1]+fHSMM$mu[j],1)^2/fHSMM$sigma[j]^2,scale=fHSMM$sigma[j]^2/max(obs[k-1,1]+fHSMM$mu[j],1)) } allprobs[k-1,1] <- probs[1] allprobs[k-1,2] <- probs[2] allprobs[k-1,3:(N-4)] <- rep(probs[3],m) allprobs[k-1,N-3] <- probs[4] allprobs[k-1,N-2] <- probs[5] allprobs[k-1,N-1] <- probs[6] allprobs[k-1,N] <- probs[7] } which(obs[2:n,1]> 30) -> ind; allprobs[ind,1] <- 0 which(obs[2:n,1]>600) -> ind; allprobs[ind,(N-2):N] <- 0 which(obs[2:n,1]<400) -> ind; allprobs[ind,3:(N-4)] <- 0 pvec<-inv.logit(10*lpn$zeta[1]+1/10*lpn$zeta[2]*obs[,2]) o12 <-pvec*(1-lpn$omega[1]) o15 <-(1-pvec)*(1-lpn$omega[1]) o23 <- inv.logit(10*lpn$alpha[1]+1/10*lpn$alpha[2]*obs[,1]+1/1000*lpn$alpha[3]*obs[,1]^2) EX<- -1/10*lpn$alpha[2]/(2*1/1000*lpn$alpha[3]); which(obs[,1]>EX)->ind if (length(ind)>0) o23[ind] <- inv.logit(10*lpn$alpha[1]-(1/10*lpn$alpha[2])^2/(4*1/1000*lpn$alpha[3])) o41 <- inv.logit(10*lpn$gamma[1]+1/10*lpn$gamma[2]*obs[,1]) o56 <- inv.logit(10*lpn$beta[1]+1/10*lpn$beta[2]*obs[,1]+1/1000*lpn$beta[3]*obs[,1]^2) xi <- matrix(0,n-1,N) foo <- delta*allprobs[1,] xi[1,] <- foo/sum(foo) gamma <- vector("list") for (i in 3:n){ foo <- apply(xi[i-2,]*genOmega(Omega0,N,o12[i-1],o15[i-1],o23[i-1],o41[i-1],o56[i-1]),2,max)*allprobs[i-1,] xi[i-1,] <- foo/sum(foo) } iv <- numeric(n-1) iv[n-1] <-which.max(xi[n-1,]) for (i in (n-1):2){ iv[i-1] <- which.max(genOmega(Omega0,N,o12[i-1],o15[i-1],o23[i-1],o41[i-1],o56[i-1])[,iv[i]]*xi[i-1,]) } iv } states <- viterbi(obs,fHSMM,m) # run Viterbi which(states<(m+3) & states>2)->ind states[ind]<-rep(3,length(ind)) which(states==(m+3))->ind states[ind]<-rep(4,length(ind)) which(states==(m+4))->ind states[ind]<-rep(5,length(ind)) which(states==(m+5))->ind states[ind]<-rep(6,length(ind)) which(states==(m+6))->ind states[ind]<-rep(7,length(ind)) states # display decoded state sequence