rm(list=ls())
bumpus=read.delim("bumpus_c.txt",dec=",")
summary(bumpus)
plot(bumpus[,c("TL","AE","W","LBH")])                                       # simple plot of 4 variables

W.lm1=lm(W~TL+AE+LBH,bumpus)                                                # three models with/without interactions
W.lm2=lm(W~TL+AE+LBH+sex,bumpus)
W.lm3=lm(W~(TL+AE+LBH)*sex,bumpus)
par(mfrow=c(2,2))
plot(W.lm3)
summary(W.lm3)                                                              # model with interaction; interpret the coefficients (reference values vs contrasts)

W.glm1=glm(W~TL+AE+LBH,data=bumpus)                                         # same linear regression models with/without interactions
W.glm2=glm(W~TL+AE+LBH+sex,data=bumpus)                                     # function cv.glm for cross validation requires a glm object
W.glm3=glm(W~(TL+AE+LBH)*sex,data=bumpus)
require(boot)
cv.glm(bumpus,W.glm1)$delta                                                 # leave one out cross validation

require(AICcmodavg)
Cand.models<-list()                                                         # library to generate AIC tables based on a set of specified models
Cand.models[[1]] = W.lm1
Cand.models[[2]] = W.lm2
Cand.models[[3]] = W.lm3

## create a vector of names to trace back models in set
Modnames<-paste("mod", 1:length(Cand.models), sep="")

## generate AICc table
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)

# using the library MuMIn to consider all models
require(MuMIn)
W.dr=dredge(lm(W~ TL+AE+LBH+LH+LF+LT+WS+LKS,data=bumpus))
W.dr[1:50,]                                                    # the 50 best models, many models have similar AIC values
model.avg(get.models(W.dr, subset = delta < 3))$avg.model      # model averaging, using models within 3 units
model.avg(get.models(W.dr, subset = delta < 5))$avg.model      # within 5 units
model.avg(get.models(W.dr, subset = delta < 10))$avg.model     # within 10 units

W.dr[1,]
summary(lm(W~ TL+AE+LBH+WS+LKS,data=bumpus))                   # the best model, ignores post-selection uncertainty, P values are suspicious!

##############################################################################
# logistic regression models to analyse survival
surv.glm1=glm(surv~(TL+AE+W),family=binomial,data=bumpus)
surv.glm2=glm(surv~(TL+AE+W)+sex,family=binomial,data=bumpus)
surv.glm3=glm(surv~(TL+AE+W)*sex,family=binomial,data=bumpus)

par(mfrow=c(2,2))                                                   # assessing assumptions of a logistic regression model
plot(surv.glm3)                                                     # not easy with binary data! There exists other ways (see eg library Design of Harrell)
summary(surv.glm3)                                                  # parameter estimates; be aware that coefficients define the linear predictor, on the logit scale

predict(surv.glm1)[1:5]                                             # the first five predicted values,by defualt on logit scale

plot(bumpus$TL,inv.logit(predict(surv.glm1,type="terms")[,"TL"]))   # showing how each prediction term can be back transformed to prob scale; These cannot be added

# centripetal selection Grant 1972
summary(glm(surv~TL+I(TL^2),binomial,bumpus))                       # if there is selection against the extreme individuals (small and large), expect a nonlinear effect

plot(bumpus$TL,predict(glm(surv~TL,binomial,bumpus),type="response"),ylim=c(0,1))                       # comparing predictions from a linear and polynomial model
points(bumpus$TL,predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response"),pch=19)

surv.dr=dredge(glm(surv~(poly(TL,2)+poly(AE,2)+LBH+LH+LF+LT+WS+LKS)*sex,family=binomial,data=bumpus))   # using dredge to get the best models / AICc
surv.dr[1:5,]                                                                                           # 5 best models
bumpus.glm <- glm(surv~LH+LKS+sex+poly(TL,2),family=binomial,data=bumpus)                               # considering the best model
cv.glm(bumpus, bumpus.glm, cost)$delta 
model.avg(get.models(surv.dr, subset = delta < 10))                                                     # using model averaging to correct for model uncertainty

par(mfrow=c(2,2))
plot(bumpus.glm)                                                                                        # looking at fit, mostly influential values
surv.dr=dredge(glm(surv~(poly(TL,2)+poly(AE,2)+LBH+LH+LF+LT+WS+LKS)*sex,family=binomial,data=bumpus[-c(36,123),]))         # to assess the robustness of model selection
surv.dr[1:5,]                                                                                           # models with interaction *sex are dropped, sensitive to single obs

summary(glm(surv~abs(scale(TL)),binomial,bumpus))                                                       # an alternative way to consider selection against extremes
summary(glm(surv~sex+abs(scale(TL)),binomial,bumpus))                                                   # abs(scale()) is the absolute distance from the mean
summary(glm(surv~sex*abs(scale(TL)),binomial,bumpus))

require(mgcv)                                                                                            # a way to assess part of the fit (the linearity on logit scale)
plot(gam(surv~s(TL),binomial,bumpus))                                                                    # is to use additive models
plot(gam(surv~s(abs(scale(TL))),binomial,bumpus))

predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response")                                           # consider predicted values and assess 0/1 prediction error
ifelse(predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response")>0.5,1,0)                           # predicted values on 0/1 scale
table(bumpus$surv,ifelse(predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response")>0.5,1,0))        # table of prediction vs observations

dredge(glm(surv~(TL+I(TL^2)+AE+I(AE^2)+W+I(W^2))*sex,data=bumpus,family=binomial))[1:10,]               # different model selections with different starting points
summary(glm(surv~AE+I(AE^2)*sex+TL*sex+W,data=bumpus,family=binomial))                                  # summary best model (AICc)

dredge(glm(surv~sex*(TL+AE+W),family=binomial,data=bumpus))[1:10,]                                      # different starting point
summary(glm(surv~sex*(TL+AE)+W,family=binomial,data=bumpus))
summary(glm(surv~sex+poly(AE,2)+poly(W,2),family=binomial,data=bumpus))

# add other selection criteria and ways to estimate them (CV, error rates, etc)
require(boot)
cost <- function(r, pi=0) mean(abs(r-pi)>0.5)                                       # function to calculate the 0/1 prediction error

bumpus.glm <- glm(surv~TL+AE+W,family=binomial,data=bumpus)
cv.glm(bumpus, bumpus.glm, cost)$delta 
cv.glm(bumpus, bumpus.glm, cost, K=5)$delta 

bumpus.glm <- glm(surv~sex*TL+AE+I(AE^2)+W,family=binomial,data=bumpus)
cv.glm(bumpus, bumpus.glm, cost)$delta 
cv.glm(bumpus, bumpus.glm, cost, K=5)$delta 

bumpus.glm <- glm(surv~sex*TL+AE+sex*I(AE^2)+W,family=binomial,data=bumpus)
cv.glm(bumpus, bumpus.glm, cost)$delta 
cv.glm(bumpus, bumpus.glm, cost, K=5)$delta 

pred.bump=ifelse(predict(bumpus.glm,type="response")>0.5,1,0)
table(pred.bump,bumpus$surv)
             
# example of Bayesian Model Averaging
summary(lm(TL~AE+LBH+LH+LF+LT+WS+LKS,data=bumpus))
require(mgcv)
bumpus.bas=bumpus[,c("TL","AE","LBH","LH","LF","LT","WS","LKS")]
bumpus.bas=scale(bumpus.bas)

require(BMS)
bma1 = bms(bumpus.bas, burn=1000, iter=2000, mprior="uniform")
coef(bma1,exact=TRUE, std.coefs=TRUE)