This code is quite time consuming, so please be patient.
rm(list=ls())
require(survival)
require(flexsurv)
require(doParallel)
no_cores <- detectCores() - 1
registerDoParallel(cores=no_cores)
set.seed(230)
n <- 5000 #or fully 5000 # the size of a cohort
# simulating 20 covariates =
p <- 20 # the number of covariates
# coefficient vector
betavec <- c(0.5, 0.5, 0.35, -0.35, rep(0, 16))
z <- matrix(NA, nrow=n, ncol=p)
for(i in 1:p) z[,i] <- rbinom(n, 1, 0.5)
mu <- z%*%betavec
epsilon.star <- rweibull(n, 0.82, 29.27)
epsilon <- log(epsilon.star)/sqrt(1.57)
mytime <- exp( 1 + mu + exp(-0.5*mu^2)*epsilon )
cen.time <- z[,1] + z[,2] + runif(n, 0, 130)
myv <- apply(cbind(mytime, cen.time), 1, min)
myv=myv/max(myv)
mydel <- as.numeric(mytime<cen.time)
#+++++++++ prepare your data. CAUTION: put covariate matrix z at first
dat <- data.frame(z, time=myv, status=mydel)
#************** stepAIC-genF
fstepAIC=function(forward= TRUE){ #TRUE=forward selection, FALSE=backward elimination
if(forward){
cl <- makeCluster(no_cores, type="FORK")
#+++++ forward selection
fit <-flexsurvreg(Surv(time, status) ~ 1, data=dat, dist='genf',control=list(fnscale=2500, reltol=1e-15))
myfit <- fit
len <- length(indc <- 1:p)
q <- length(inds <- numeric(0))
aic0 <- AIC(myfit)
aics <- aic0 - 1
ff1=function(i){
inds0 <- c(inds, indc[i])
myform0 <- as.formula(paste("Surv(time, status) ~ ",
paste(names(dat)[inds0], collapse= "+")))
fit0 <- flexsurvreg(myform0, data=dat, dist='genf',
method="Nelder-Mead",
control=list(fnscale=2500, reltol=1e-15))
myaic <- AIC(fit0)
myout=c(i, myaic)
#names(myout)<-c("myfit", "myaic")
return(myout)
}
while(q<=p){
#cat('indc=', indc, '\n' )
aics <- numeric(len)
fits <- list(len)
storef <- foreach(it = 1:len, .combine="rbind") %dopar% ff1(i = it)
aics=storef[, 2]
optind <- storef[min(which(aics == min(aics))), 1]
if(min(aics) < aic0){
aic0=min(aics)
q <- length( inds <- c(inds, indc[optind]) )
#cat('inds=', inds, '\n')
len <- length(indc <- indc[-optind])
}else q <- p+1 #stop
}
stopCluster(cl)
cat('genF forward selection of variables using AIC: ', paste0('X', sort(inds)), '\n')
}else{ #----- backward selection
cl <- makeCluster(no_cores, type="FORK")
myform0 <- as.formula(paste("Surv(time, status) ~ ", paste(names(dat)[1:p], collapse= "+")))
fit0 <- flexsurvreg(myform0, data=dat, dist='genf',
method = "Nelder-Mead",
control=list(fnscale=2500, reltol=1e-15))
fit <- fit0
myfit <- fit
q <- length(inds <- 1:p)
aic0 <- AIC(myfit)
aics <- aic0-1
fb1=function(i){
inds0 <- inds[-i]
myform0 <- as.formula(paste("Surv(time, status) ~ ", paste(names(dat)[inds0], collapse= "+")))
fit0 <- flexsurvreg(myform0, data=dat, dist='genf',
method = "Nelder-Mead",
control=list(reltol=1e-15))
myaic <- AIC(fit0)
myout=c(i, myaic)
return(myout)
}
while(q>0){
aics <- numeric(q)
fits <- list(q)
storeb <- foreach(it = 1:q, .combine="rbind") %dopar% fb1(i = it)
aics=storeb[, 2]
aics.n=aics[aics<aic0]
if(length(aics.n)>0){
mn=min(aics.n)
optind <- storeb[min(which(aics ==mn)), 1]
#cat('optind= ', optind, '\n')
aic0=mn
q <- length( inds <- inds[-optind] )
#cat('aic0= ', aic0, '\n')
} else q<-0 # stop
} # for the while
stopCluster(cl)
cat('genF backward elimination of variables using AIC: ', paste0('X', sort(inds)), '\n')
}
}
####
####
fstepAIC(T)
## genF forward selection of variables using AIC: X7 X15
fstepAIC(F)
## genF backward elimination of variables using AIC: X2 X6 X7 X9 X10 X13 X15 X16 X17 X20