rm(list=ls()) bumpus=read.delim("bumpus_c.txt",dec=",") summary(bumpus) plot(bumpus[,c("TL","AE","W","LBH")]) # simple plot of 4 variables W.lm1=lm(W~TL+AE+LBH,bumpus) # three models with/without interactions W.lm2=lm(W~TL+AE+LBH+sex,bumpus) W.lm3=lm(W~(TL+AE+LBH)*sex,bumpus) par(mfrow=c(2,2)) plot(W.lm3) summary(W.lm3) # model with interaction; interpret the coefficients (reference values vs contrasts) W.glm1=glm(W~TL+AE+LBH,data=bumpus) # same linear regression models with/without interactions W.glm2=glm(W~TL+AE+LBH+sex,data=bumpus) # function cv.glm for cross validation requires a glm object W.glm3=glm(W~(TL+AE+LBH)*sex,data=bumpus) require(boot) cv.glm(bumpus,W.glm1)$delta # leave one out cross validation require(AICcmodavg) Cand.models<-list() # library to generate AIC tables based on a set of specified models Cand.models[[1]] = W.lm1 Cand.models[[2]] = W.lm2 Cand.models[[3]] = W.lm3 ## create a vector of names to trace back models in set Modnames<-paste("mod", 1:length(Cand.models), sep="") ## generate AICc table aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE) # using the library MuMIn to consider all models require(MuMIn) W.dr=dredge(lm(W~ TL+AE+LBH+LH+LF+LT+WS+LKS,data=bumpus)) W.dr[1:50,] # the 50 best models, many models have similar AIC values model.avg(get.models(W.dr, subset = delta < 3))$avg.model # model averaging, using models within 3 units model.avg(get.models(W.dr, subset = delta < 5))$avg.model # within 5 units model.avg(get.models(W.dr, subset = delta < 10))$avg.model # within 10 units W.dr[1,] summary(lm(W~ TL+AE+LBH+WS+LKS,data=bumpus)) # the best model, ignores post-selection uncertainty, P values are suspicious! ############################################################################## # logistic regression models to analyse survival surv.glm1=glm(surv~(TL+AE+W),family=binomial,data=bumpus) surv.glm2=glm(surv~(TL+AE+W)+sex,family=binomial,data=bumpus) surv.glm3=glm(surv~(TL+AE+W)*sex,family=binomial,data=bumpus) par(mfrow=c(2,2)) # assessing assumptions of a logistic regression model plot(surv.glm3) # not easy with binary data! There exists other ways (see eg library Design of Harrell) summary(surv.glm3) # parameter estimates; be aware that coefficients define the linear predictor, on the logit scale predict(surv.glm1)[1:5] # the first five predicted values,by defualt on logit scale plot(bumpus$TL,inv.logit(predict(surv.glm1,type="terms")[,"TL"])) # showing how each prediction term can be back transformed to prob scale; These cannot be added # centripetal selection Grant 1972 summary(glm(surv~TL+I(TL^2),binomial,bumpus)) # if there is selection against the extreme individuals (small and large), expect a nonlinear effect plot(bumpus$TL,predict(glm(surv~TL,binomial,bumpus),type="response"),ylim=c(0,1)) # comparing predictions from a linear and polynomial model points(bumpus$TL,predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response"),pch=19) surv.dr=dredge(glm(surv~(poly(TL,2)+poly(AE,2)+LBH+LH+LF+LT+WS+LKS)*sex,family=binomial,data=bumpus)) # using dredge to get the best models / AICc surv.dr[1:5,] # 5 best models bumpus.glm <- glm(surv~LH+LKS+sex+poly(TL,2),family=binomial,data=bumpus) # considering the best model cv.glm(bumpus, bumpus.glm, cost)$delta model.avg(get.models(surv.dr, subset = delta < 10)) # using model averaging to correct for model uncertainty par(mfrow=c(2,2)) plot(bumpus.glm) # looking at fit, mostly influential values surv.dr=dredge(glm(surv~(poly(TL,2)+poly(AE,2)+LBH+LH+LF+LT+WS+LKS)*sex,family=binomial,data=bumpus[-c(36,123),])) # to assess the robustness of model selection surv.dr[1:5,] # models with interaction *sex are dropped, sensitive to single obs summary(glm(surv~abs(scale(TL)),binomial,bumpus)) # an alternative way to consider selection against extremes summary(glm(surv~sex+abs(scale(TL)),binomial,bumpus)) # abs(scale()) is the absolute distance from the mean summary(glm(surv~sex*abs(scale(TL)),binomial,bumpus)) require(mgcv) # a way to assess part of the fit (the linearity on logit scale) plot(gam(surv~s(TL),binomial,bumpus)) # is to use additive models plot(gam(surv~s(abs(scale(TL))),binomial,bumpus)) predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response") # consider predicted values and assess 0/1 prediction error ifelse(predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response")>0.5,1,0) # predicted values on 0/1 scale table(bumpus$surv,ifelse(predict(glm(surv~TL+I(TL^2),binomial,bumpus),type="response")>0.5,1,0)) # table of prediction vs observations dredge(glm(surv~(TL+I(TL^2)+AE+I(AE^2)+W+I(W^2))*sex,data=bumpus,family=binomial))[1:10,] # different model selections with different starting points summary(glm(surv~AE+I(AE^2)*sex+TL*sex+W,data=bumpus,family=binomial)) # summary best model (AICc) dredge(glm(surv~sex*(TL+AE+W),family=binomial,data=bumpus))[1:10,] # different starting point summary(glm(surv~sex*(TL+AE)+W,family=binomial,data=bumpus)) summary(glm(surv~sex+poly(AE,2)+poly(W,2),family=binomial,data=bumpus)) # add other selection criteria and ways to estimate them (CV, error rates, etc) require(boot) cost <- function(r, pi=0) mean(abs(r-pi)>0.5) # function to calculate the 0/1 prediction error bumpus.glm <- glm(surv~TL+AE+W,family=binomial,data=bumpus) cv.glm(bumpus, bumpus.glm, cost)$delta cv.glm(bumpus, bumpus.glm, cost, K=5)$delta bumpus.glm <- glm(surv~sex*TL+AE+I(AE^2)+W,family=binomial,data=bumpus) cv.glm(bumpus, bumpus.glm, cost)$delta cv.glm(bumpus, bumpus.glm, cost, K=5)$delta bumpus.glm <- glm(surv~sex*TL+AE+sex*I(AE^2)+W,family=binomial,data=bumpus) cv.glm(bumpus, bumpus.glm, cost)$delta cv.glm(bumpus, bumpus.glm, cost, K=5)$delta pred.bump=ifelse(predict(bumpus.glm,type="response")>0.5,1,0) table(pred.bump,bumpus$surv) # example of Bayesian Model Averaging summary(lm(TL~AE+LBH+LH+LF+LT+WS+LKS,data=bumpus)) require(mgcv) bumpus.bas=bumpus[,c("TL","AE","LBH","LH","LF","LT","WS","LKS")] bumpus.bas=scale(bumpus.bas) require(BMS) bma1 = bms(bumpus.bas, burn=1000, iter=2000, mprior="uniform") coef(bma1,exact=TRUE, std.coefs=TRUE)