Besides methodologies developing, modeling and data analysis are important parts of my researches. You can find a few examples at my adviser's website. The following is a case study that I am currently working on.

Malaria with control

Licensed under the Creative Commons attribution-noncommercial license, http://creativecommons.org/licenses/by-nc/3.0/. Please share and remix noncommercially, mentioning its origin.
CC-BY_NC


Objectives

This will be a malaria with control case study, following closely to Manojit Roy’s SEIQS model with additional control covariate.

Introduction

Here, we consider the data for Kheda, one district in North West India with monthly reported malaria cases by Plasmodium vivax and Plasmodium falciparum, monthly rainfall, control and population. Adopting the well-established practice, we divide the studied population of size P(t) into the following distinctive classes: susceptible to infection, S(t), exposure E(t), infected and gametocytemic individuals, I(t), dormant classes H1(t), H2(t), H3(t) and recovered individuals, Q(t). To allow flexibility in representing losing immunity, both infected and recovered individuals could be reinfected and three dormant classes Hi(i=1, n) are included. Malaria mortality during each month is tracked with a variable y(t). Complete parameter definitions and units are provided in supplement S-1. Assume P(t) is known from the census data and birth rate for the S class satifies S(t)+E(t)+I(t)+Q(t)+iHi(t)=P(t), the state process X(t)=(S(t),E(t),I(t),Q(t),H1(t),H2(t),H3(t),κ1(t),μSE(t)), follows a stochastic differential equation,

humandS/dt=δP+dP/dt+μISI+μQSQ+aμIHI+bμEIEμSE(t)SδS,dE/dt=μSE(t)SμEIEδE,dI/dt=(1b)μEIE+nμHIHn(μIH+μIS+μIQ)IδI,dH1/dt=(1a)μIHInμHIH1δH1,dHi/dt=nμHIHi1nμHIHiδHi, for i 2,..,n,dQ/dt=μIQIμQSQδQ,

where μSE(t) are defined as current rate of transmission.

# define scaling functions
logit <- function(p){log(p/(1-p))}      # maps interval [0,1] to [-infty,infty]
expit <- function(p){1/(1+exp(-p))}     # reverse map of logit()
# import data
dat     <- read.csv('Kheda-data.csv')
census  <- read.csv('Kheda-census.csv')
control <- read.table('Kheda-control.csv',header=TRUE,comment.char='#')

We consider time original from Jan 1987 forward.

time.dat <- dat$time[which(dat$time>1987)]
pf.dat   <- dat$pf[which(dat$time>1987)]
rain.dat <- dat$rainfall[which(dat$time>1986)]
pop.year <- census[which(census$year>=1987),1]
pop.dat  <- census[which(census$year>=1987),2]

We replace last 24 NA rainfall values (for ’09-’10) by 2-month averages. We use July-Aug rain accumulate rainfall (adding preceding 4 months of rain) # use data from Sep 1986

nn <- length(rain.dat)
for(i in 1:24){
  avr <- mean(rain.dat[seq(i,nn-24,24)])
  rain.dat[nn-24+i] <- avr
}
for(i in seq(1,nn,12)) 
  rain.dat[c(i:(i+5),(i+8):(i+11))] <- 0   
rain <- rain.dat[9:nn]          
nn <- length(pf.dat)
acrain.dat <- rep(0,nn)
for(i in 1:nn) 
  acrain.dat[i] <- sum(rain[i:(i+3)])

We compute separate periodic basis functions for the two time segments, pre-control regime(1987-1995) & post-control regime(1996-2010).

time1.dat <- time.dat[which(time.dat<=1996)]    # pre-control regime
time2.dat <- time.dat[which(time.dat>1996)]         # post-control regime
tbasis.step <- 1/360
##combine here
tbasis <- seq(min(time1.dat)-1/24,max(time2.dat)+1/24,by=tbasis.step)
nbasis <- 6
basis <- periodic.bspline.basis(tbasis,nbasis=nbasis,degree=3,period=1)
tbasis1 <- seq(min(time1.dat)-1/24,max(time1.dat)+1/24,by=tbasis.step)
tbasis2 <- seq(min(time2.dat)-1/24,max(time2.dat)+1/24,by=tbasis.step)
nbasis <- 6
basis1 <- periodic.bspline.basis(tbasis1,nbasis=nbasis,degree=3,period=1)
basis2 <- periodic.bspline.basis(tbasis2,nbasis=nbasis,degree=3,period=1)
colnames(basis) <- colnames(basis1) <- colnames(basis2) <- paste("season",1:nbasis,sep="")

We smoothen (decadal) population data.

pop.spline <- smooth.spline(x=pop.year,y=pop.dat)
pop <- predict(pop.spline,tbasis)$y
dpopdt <- c(0,diff(pop))/tbasis.step
pop.spline <- smooth.spline(x=pop.year,y=pop.dat)
pop1 <- predict(pop.spline,tbasis1)$y
dpopdt1 <- c(0,diff(pop1))/tbasis.step
pop2 <- predict(pop.spline,tbasis2)$y
dpopdt2 <- c(0,diff(pop2))/tbasis.step

We construct separate rainfall covariates.

acrain1.dat <- acrain.dat[which(time.dat<=1996)]
acrain2.dat <- acrain.dat[which(time.dat>1996)]
acrain1 <- approx(x=time1.dat-1/12,y=acrain1.dat,xout=tbasis1,rule=2)$y     # left-shifted 1 month
acrain1 <- (acrain1-mean(acrain1))/sd(acrain1)
acrain2 <- approx(x=time2.dat-1/12,y=acrain2.dat,xout=tbasis2,rule=2)$y
acrain2 <- (acrain2-mean(acrain2))/sd(acrain2)
acrain <- approx(x=time.dat-1/12,y=acrain.dat,xout=tbasis,rule=2)$y     # left-shifted 1 month
acrain <- (acrain-mean(acrain))/sd(acrain)

We build pre-control pomp object (for time segment 1987-1995) and post-control pomp object (for time segment 1996-2010).

pf1.dat <- pf.dat[which(time.dat<=1996)]
covar1 <- cbind(pop=pop1,dpopdt=dpopdt1,rainfall=acrain1,basis=basis1)

pop3 <- predict(pop.spline,time2.dat)$y
irs.dat <- control$covpop*1000/pop3[which(time2.dat<=2010)]
# append 7-yr average, over intervals 1996-2002 and 2003-2009, to 2010 IRS data
irs.dat <- c(irs.dat,rep(0,12))
nn2 <- length(irs.dat)
for(i in 1:12){
  avc <- mean(irs.dat[seq(i,nn2-12,84)])
  irs.dat[(nn2-12+i):nn2] <- avc
}
irs <- approx(x=time2.dat,y=irs.dat,xout=tbasis2,rule=2)$y
irs <- (irs-mean(irs))/sd(irs)

irsa.dat <-c(rep(0,length(time1.dat)),irs.dat)
irsa <- approx(x=time.dat,y=irsa.dat,xout=tbasis,rule=2)$y
irsa <- irsa/sd(irsa)

pf2.dat <- pf.dat[which(time.dat>1996)]
covar2 <- cbind(pop=pop2,dpopdt=dpopdt2,rainfall=acrain2,basis=basis2, irs=irs)

covar <- cbind(pop=pop,dpopdt=dpopdt,rainfall=acrain,basis=basis,irs=irsa)

Model

We randomize (and transform where needed) initial parameter values (“b1…b6” are untransformed, “delta” and “initpop” not estimated).

par.ini.tr <- c(
  log.muEI=log(24), log.muIQ=log(runif(1,0,10)), log.muIS=log(runif(1,0,10)), log.muQS=log(runif(1,0,10)),
  log.sigPRO=log(runif(1,0.02,0.5)), log.sigOBS=log(runif(1,0.02,0.5)), log.tau=log(runif(1,0.02,0.5)),
  log.K0=log(runif(1,0,0.9)), log.F0=log(runif(1,0,0.9)),
  logit.rho=logit(runif(1,0.02,0.9)), logit.q=logit(0.001), logit.br=logit(0.003), logit.bc=logit(runif(1,0.02,0.9)),
  logit.S0=0, logit.E0=0, logit.I0=0, logit.Q0=0, 
  b1=runif(1,-5,5), b2=runif(1,-5,5), b3=runif(1,-5,5), b4=runif(1,-5,5),
  b5=runif(1,-5,5), b6=runif(1,-5,5), delta=0.02, initpop=3261828
)
# randomize S0,E0,I0,Q0 with constraint S0+E0+I0+Q0=1
S0  <- runif(1,0,1);
E0  <- runif(1,0,1)
I0  <- runif(1,0,1)
Q0  <- runif(1,0,1)
fac <- sum(c(S0,E0,I0,Q0))
S0 <- S0/fac; E0 <- E0/fac; I0 <- I0/fac; Q0 <- Q0/fac 
par.ini.tr[c("logit.S0","logit.E0","logit.I0","logit.Q0")] <- logit(c(S0,E0,I0,Q0))
# log-transformed parameters
log.par <- c(
  "log.muEI", "log.muIQ", "log.muIS", "log.muQS", "log.tau",
  "log.sigPRO", "log.sigOBS", "log.K0", "log.F0"
)
# logit-transformed parameters
logit.par <- c(
  "logit.rho", "logit.q", "logit.br", "logit.bc","logit.S0", "logit.E0", "logit.I0", "logit.Q0"
)
# untransformed parameters
untr.par <- c("b1", "b2","b3","b4","b5","b6", "delta", "initpop")
# untransformed initial parameters
par.ini <- c(exp(par.ini.tr[log.par]),expit(par.ini.tr[logit.par]),par.ini.tr[untr.par])
names(par.ini) <- c(
  "muEI",  "muIQ", "muIS", "muQS", "tau", "sigPRO", "sigOBS", "K0", "F0", "rho",
  "q", "br","bc",  "S0", "E0", "I0", "Q0", "b1","b2","b3","b4","b5","b6",
  "delta", "initpop"
)

# parameters to be estimated
par.est <- c(
  "log.muEI", "log.muIQ", "log.muIS", "log.muQS", "log.sigPRO", "log.sigOBS",
  "log.tau", "logit.rho", "logit.q", "logit.br", "logit.bc","b1","b2","b3","b4","b5","b6"
)
# initial states to be estimated
ivp.est <- c("logit.S0", "logit.E0", "logit.I0", "logit.Q0",  "log.K0", "log.F0")

# strength of random walk
sd <- vector(mode='list', length=length(par.ini.tr))
#sd <- matrix(rep(0,length(par.ini.tr)*(length(time.dat)+1)), ncol=length(par.ini.tr))

names(sd) <- names(par.ini.tr)
sd[names(sd)] <-0
sd[par.est] <- 0.03    # for parameters
sd[ivp.est] <- 0.1      # for init-states



# parameters to be held fixed at pre-assigned value
# vivax incubation period ~= 15 days (=15/365 yr); q takes a low value <<1 
#par.ini.tr[c("log.muEI","logit.q","logit.br")] <- c(log(24),logit(0.001),logit(((run%%99)+1)/100))
sd[c("log.muEI","logit.q","logit.br")] <- 0.000001
sd[["logit.bc"]]<-c(rep(0.000001,length(time1.dat)),rep(0.03,length(time2.dat)+1)) 
sd[["logit.bc"]][length(time1.dat)]<-3*0.03

We now consider step function.

SEIQS_ct <- Csnippet("
  double dW, beta, foi;
  double muEI, muIQ, muIS, muQS, tau, q, br,bc, rho, sigPRO, varPRO, delt;
    double dBS, dSD, dSE, dEI, dED, dIQ, dIS, dID, dQS, dQD, dK, dF;
    
    // untransfomr parameters
    muEI   = exp(LOG_MUEI);
    muIQ   = exp(LOG_MUIQ);
    muIS   = exp(LOG_MUIS);
    muQS   = exp(LOG_MUQS);
    tau    = exp(LOG_TAU);
    sigPRO = exp(LOG_SIGPRO);
    rho    = expit(LOGIT_RHO);
    q      = expit(LOGIT_Q);
    br     = expit(LOGIT_BR);
    bc     = expit(LOGIT_BC);
    // compute transition rates
    dBS   = DELTA*POP + DPOPDT;
    dSD   = DELTA * S;
    dSE   = F * S;
    dEI   = muEI * E;
    dED   = DELTA * E;
    dIQ   = muIQ * I;
    dIS   = muIS * I;
    dID   = DELTA * I;
    dQS   = muQS * Q;
    dQD   = DELTA * Q;

")

We consider time original from Jan 1987 forward.

po <- pomp(
      data = data.frame(cbind(time=time.dat,reports=pf.dat)),
      times = "time",
      t0 = 2*time.dat[1] - time.dat[2],
      tcovar = tbasis,
      covar = covar,
      paramnames = c(
        "log.sigOBS","log.muEI","log.muIQ","log.muIS","log.muQS","log.tau","log.sigPRO",
        "logit.rho","logit.q","logit.br","delta","b1","b2","b3","b4","b5","b6", "logit.bc"
      ),
      statenames = c("cases","err","S","E","I","Q","K","F","W"),
      covarnames = c("pop","dpopdt","rainfall","season1", "irs"),
      zeronames = c("cases","W","err"),
      obsnames = c("reports"),
      rprocess = euler.sim(step.fun=SEIQS_ct,delta.t=2/365),
      rmeasure = "nbinom_rmeasure",
      dmeasure = "nbinom_dmeasure",
      initializer = function(params,t0,statenames,covars,...){
        frac1 <- expit(params[c("logit.S0","logit.E0","logit.I0","logit.Q0")])
        frac2 <- exp(params[c("log.K0","log.F0")])
        x0 <- numeric(length(statenames))
        names(x0) <- statenames
        x0[c("S","E","I","Q","K","F")] <- c(round(covars["pop"]*frac1/sum(frac1)),frac2)
        x0
      }
    )

References