#set working directory
setwd ('C:\\Users\\u23916\\Dropbox\\R workshop\\2021-22 series\\session 7')

#import data
hm_interactions<-read.csv ("hm_interactions.csv", header = T)

#you can extract a single column using $
hm_interactions$Focal_ID

#you can change column names as follows
colnames(hm_interactions)[colnames(hm_interactions)=="Focal_ID"] <- "ID"
colnames(hm_interactions)[colnames(hm_interactions)=="Tot_N_HM_interactions"] <- "hm_int"
colnames(hm_interactions)[colnames(hm_interactions)=="Tot_Provision"] <- "prov"
colnames(hm_interactions)[colnames(hm_interactions)=="Grooming_given"] <- "gr"

#we can change the numbers into meaningful text. For example we can replace "0" with adult male and "1" with adult females
hm_interactions["sex"] [hm_interactions["sex"] == "0"] <- "am"
hm_interactions["sex"] [hm_interactions["sex"] == "1"] <- "af"

#you can do all sorts of mathematical calculations
sum (hm_interactions$Obs_time)#sum
mean (hm_interactions$Obs_time)#mean
median (hm_interactions$Obs_time)#median

#you can create a new column that is the mathematical calculation of other two columns
hm_interactions$gr_rates <- hm_interactions$gr/ hm_interactions$Obs_time

#average grooming per group and per sex
install.packages("plyr")
library (plyr)
data_summary <- ddply(hm_interactions, .(Group, sex), summarize,   
                       mean = mean(gr_rates, na.rm = T),
                       sd = sd(gr_rates, na.rm= T))

#we can create a new table that includes only females
females <- hm_interactions [which(hm_interactions$sex == "af"),]

#You can use the "which" and "==" functions to exclude some data points that have too few or too large observation time (i.e., exclude outliers)
females_nooutlier <- females [-which(females$Obs_time < 3 | females$Obs_time > 10),]