#UPLOADING DATA#### #set working directory setwd ('C:\\Users\\kabur\\Dropbox\\R workshop\\2021-22 series\\session 2') #upload data monkey_data <- read.csv ("treatment.csv", header = T, stringsAsFactors = F) #let's change the name of the header from "Intervention" to "Int" colnames (monkey_data)[colnames(monkey_data)=="Intervention"] <- "Int" #HISTOGRAM#### #we can check the distribution of Play data using a histogram hist (monkey_data$play) #you can edit the histogram by determining the width of the bars (using "breaks"), #adding the labels (xlab), #determining the limits of the x or y axis (xlim, ylim), #adding a title (main), #and changing the colour (col) hist (monkey_data$play, breaks = 20, xlab = "Play rates", xlim = c(0,240), main = "Histogram of Play rates", col = "lightblue") #BAR GRAPH#### #to plot differences in mean of a continuous variable between two groups we can use bar graphs #let's suppose we want to compare Play rates betwen males and females #here are the steps #STEP 1: convert sex from number to character monkey_data["Sex"] [monkey_data["Sex"] == 0] <- "am" monkey_data["Sex"] [monkey_data["Sex"] == 1] <- "af" #STEP 2: let's calculate mean and sd hccs for each sex install.packages ('plyr') library (plyr) data_summary <- ddply(monkey_data, .(Sex), summarize, mean = round (mean(play),2), sd = sd(play)) #plot a basic bar graph using "ggplot" #install ggplot2 package install.packages ('ggplot2') library (ggplot2) #bar graph p <- ggplot (data_summary, aes (x = Sex, y = mean)) + geom_bar (stat = "identity", fill = "white", colour = "black")+ geom_errorbar( aes(ymin=mean-sd, ymax=mean+sd), width = .2) #you can add labels q <- p +ggtitle("Difference in play rates \n between males and females")+ xlab("Sex") + ylab("Play rates (N/hr)") #you can add text to bins m <- q +geom_text (aes (label= mean), vjust = 1.5, hjust = 1, size = 7) #you can edit format of text and background as follows w <-m + theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text = element_text(color = "black", size = 20, hjust = .5, vjust = .5, face = "plain"), axis.title= element_text(color = "black", size = 20, vjust =3), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2)) #we can also create bar graphs by grouping bars together #let's suppose we want to plot mean play rates for intervention split by sex #STEP 1: convert the variable "Intervention" from number to character monkey_data$Int<- as.character (monkey_data$Int) #STEP 2: relable each intervention into a more informative variable monkey_data["Int"] [monkey_data["Int"] == "0"] <- "Control" monkey_data["Int"] [monkey_data["Int"] == "1"] <- "Handling-only" monkey_data["Int"] [monkey_data["Int"] == "2"] <- "Face-to-face" #STEP2: calculate mean play rates by intervention and sex data_summary2 <- ddply(monkey_data, .(Sex, Int), summarize, mean = mean(play), sd = sd(play)) #STEP 3: plot p2 <- ggplot (data_summary2, aes (x = Int, y = mean, fill = Sex)) + geom_bar (position = "dodge", stat = "identity")+ geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2, position=position_dodge(.9)) #add labels and change text in legend and change colour of variables q2 <- p2 +ggtitle("Difference in play rates \n between intervention measures split by sex")+ xlab("Sex") + ylab("Play rates (N/hr.)") + scale_fill_manual(labels = c("Adult \n females", "Adult \n males"), values = c("green", "blue")) #edit format text w2 <-q2 + theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text = element_text(color = "black", size = 20, hjust = .5, vjust = .5, face = "plain"), axis.title= element_text(color = "black", size = 20, vjust =3), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2), legend.title = element_text(color = "black", size = 20), legend.text = element_text(color = "black", size = 15, margin = margin(t=6))) #BOXPLOT##### #bar graph p3 <- ggplot (monkey_data, aes (x = Int, y = play)) + geom_boxplot () #add labels q3 <- p3 +ggtitle("Difference in play rates \n between intervention measures")+ xlab("Intervention measures") + ylab("Play rates (N/hr.)") #edit format text w3 <-q3 + theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text = element_text(color = "black", size = 20, hjust = .5, vjust = .5, face = "plain"), axis.title= element_text(color = "black", size = 20, vjust =3), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2), legend.title = element_text(color = "black", size = 20)) #we can rearrange the order in which elements are plotted monkey_data$Int <- factor (c ("Control", "Face-to-face", "Handling-only"), levels =c ("Face-to-face", "Handling-only", "Control")) w3b<- ggplot (monkey_data, aes (x = Int, y = play)) + geom_boxplot () + ggtitle("Difference in play rates \n between intervention measures")+ xlab("Sex") + ylab("Play rates (N/hr.)") + scale_fill_discrete(labels = c("Adult \n females", "Adult \n males")) + theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text = element_text(color = "black", size = 20, hjust = .5, vjust = .5, face = "plain"), axis.title= element_text(color = "black", size = 20, vjust =3), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2), legend.title = element_text(color = "black", size = 20), legend.text = element_text(color = "black", size = 15, margin = margin(t=6))) #if you ran stats to compare differences, you can add text on the graph to indicate which pairs are significantly different t <- w3b + annotate ("segment", x = (2), xend = (3), y = 300, yend = 300 )+ annotate ("segment", x = (1), xend = (3), y = 350, yend = 350) + annotate ("segment", x = (1), xend = (2), y = 230, yend = 230) + annotate ("text", x = 2.5, y =330, label ="***", size = 5) + annotate ("text", x = 2, y = 380, label ='***', size = 5) + annotate ("text", x = 1.5, y = 250, label = "ns", size = 5) #SCATTERPLOT#### #To plot the relationship between two continuous variables you can use the scatterplot #let's upload a new dataset "HVS" HVS <- read.csv ("HVS.csv", header = T) #let's plot Orbital Volume vs Cranial Capacity p4<-ggplot(HVS, aes(CranialCapacity, MeanOrbitalVolume))+ geom_point() #you can add the best fit line q4<- p4 + geom_smooth(method='lm', se = F, col = "black") #you can edit axis labels and graph title w4 <- q4 + xlab ("Cranial Capacity") + ylab ("Orbital Volume") + ggtitle ("Relationship between Orbital Volume and Cranial Capacity") #let's change the format of text c <- w4 + theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text= element_text(color = "black", size = 20, hjust =1, vjust = .5, face = "plain"), axis.title = element_text(color = "black", size = 20, vjust = 2, face = "plain"), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2)) #we can also generate two best fit line for each gender separately ggplot(HVS, aes(CranialCapacity, MeanOrbitalVolume, colour = Gender))+ geom_point() + geom_smooth(method='lm', se = F) + xlab ("Cranial Capacity") + ylab ("Orbital Volume") + ggtitle ("Relationship between Orbital Volume and Cranial Capacity")+ theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text= element_text(color = "black", size = 20, hjust =1, vjust = .5, face = "plain"), axis.title = element_text(color = "black", size = 20, vjust = 2, face = "plain"), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2)) #we can having multiple plots together using "patchwork" install.packages ("patchwork") library (patchwork) c + t #using inset_element you can insert a graph within a graph c + inset_element(t, 0.6, 0.6, 1, 1)