#install packages install.packages ("gplots") install.packages("ggplot2") install.packages("gtools") install.packages("plotrix") install.packages("nlme") install.packages("Rmisc") install.packages("ade4") install.packages("MuMIn") install.packages("lmtest") #load packages library (gplots) library(ggplot2) library(gtools) library(plotrix) library(nlme) library(Rmisc) library(ade4) library(MuMIn) library(lmtest) #upload dataset setwd("~/PhD/Autres/R workshop/RMartin") lizard <- read.table("lizardrepro.txt", h=T, dec=",") #CORRELATION # check data distribution shapiro.test (lizard$SVL) #distribution of snout vent length is normal shapiro.test(lizard$altitude) #data are not normally distributed for altitude #run spearman's correlation test cor.test (lizard$SVL, lizard$altitude, method = "spearman") #let's plot the results plot (lizard$altitude, lizard$SVL, xlab = "altitude", ylab = "snout vent length") #the results can be reported as follows: #there is no relationship between snout vent length and altitude (rs (57) = 0.09, p = 0.50) #LINEAR REGRESSION #is litter mass (LM) a good predicator of female body length (SVL)? #most basic method ml <- lm(LM ~ SVL, data = lizard) summary(ml) plot(LM ~ SVL, data=lizard, xlab="litter mass", ylab="female SVL") abline(ml) #results can be reported as follows: #F(1, 57) = 36.89, p < 0.001, R2 = 0.38 #check model assumptions #normality residuals resLM=residuals(ml) shapiro.test(resLM) #homoscédasticity(Breusch-pagan) bptest(ml) #independant residuals dwtest(ml) par(mfrow=c(2,2))# display several graphs plot(ml) ggplot(lizard, aes(x=SVL, y=LM)) + geom_point(shape=1) + geom_smooth(method=lm,se=T) # Add linear regression line + add confidence region #we can look at effects of different parameters on our variable of interest = what are the effect of temperature, humidity and SVL on LM #one of the assumptions of multiple regression, though, is that predictors are not correlated with one another #to test the presence/absence of multicolonearity, we can use Pearson's correlation test cor.test (lizard$Tmin, lizard$WDmin) cor.test (lizard$Tmin, lizard$SVL) cor.test (lizard$WDmin, lizard$SVL) #so Tmin and WDmin are correlated. Therefore we can include either variable in the model but not both modenv <- lm (LM ~ Tmin+ SVL, data = lizard) summary (modenv) #we can take account for interractive effect between paramters (here: does small females react differently to min temperature than bigger females) modenv2 <- lm (LM ~ SVL +Tmin:SVL, data = lizard) summary (modenv2) #GLM for count data (= non continous numerical variable) setwd("~/PhD/Autres/R workshop/RMartin") croc <- read.table("clutchsize_femalesize.txt", h=T, dec=".") CS_glm <-glm (CS~femTL, family=poisson, data=croc) summary (CS_glm) #the main assumption is mean should be roughly equal the variance. If #variance is much higher than mean, the model is overdispersed. #to check overdispersion you can use the following code install.packages ("AER") library(AER) dispersiontest(CS_glm) #data are overdispersed #to deal with overdispersion, you can switch to a negative binomial model install.packages ("MASS") library(MASS) CS_glm_nb <-glm.nb (CS~femTL, data=croc) summary (CS_glm_nb) plot(CS~femTL, data=croc) curve(predict(CS_glm_nb,data.frame(femTL=x),type="response"),add=TRUE, col="grey50",lty=2) #non linear regression (quasibinomial distribution) for proportion data fer_ok=subset(croc,croc$nb_fer!="NA")#need to remove all"NA" fer_ok$prop_success=fer_ok$nb_fer / fer_ok$CS#create collomn for proportion of success par(mfrow=c(1,1))#display only one graph plot(prop_success~femTL, cex.lab=1.5,cex.axis=1.5, cex=1.8,bty="l", pch=21, col="white", bg="black", ylim=c(0,1), ylab=" % fertile eggs", xlim=c(100,400),xlab="female total length (cm)", xaxt="n", yaxt="n",data=fer_ok) axis(side=1,at=c(160,220,280,340), labels=c("160","220","280","340"),cex.axis=1.5) axis(side=2,at=c(0,0.25,0.5,0.75,1), labels=c("0","25","50","75","100"),cex.axis=1.5) A <- glm(cbind(fer_ok$nb_fer, fer_ok$nb_unfer)~ femTL ,family=quasibinomial, data=fer_ok) summary(A) curve(predict(A,data.frame(femTL=x),type="response"),add=TRUE, col="grey50",lty=2)