#let's clean our work environment first rm(list = ls()) #the first step is to tell R where your data are located. In other words you need to set your working directory setwd ('C:\\Users\\kabur\\Dropbox\\R workshop\\2021-22 series\\session 1') #Upload data monkeys <- read.csv ("data_monkeys.csv", header = T, stringsAsFactors = F) #To see objects in working space enter ls () #check names of the headers names (monkeys) #check characteristics of data str (monkeys) #check first six raws head (monkeys) #check for last six raws tail (monkeys) #check how many raws and columns there are in the data set dim (monkeys) #you can select individual data points by using coordinates (row number, column column) monkeys [1,4] #you can also save an object that corresponds to a specific coordinate adriana <- monkeys [2,2] #you can extract a whole section (from row number to row number, from column number to column number) monkeys [1:4, 5:7] #you can extract a single column using $ monkeys$Focal_ID #you can change column names as follows colnames(monkeys)[colnames(monkeys)=="Focal_ID"] <- "ID" colnames(monkeys)[colnames(monkeys)=="Tot_N_HM_interactions"] <- "hm_int" colnames(monkeys)[colnames(monkeys)=="Tot_Provision"] <- "prov" colnames(monkeys)[colnames(monkeys)=="Grooming_given"] <- "gr" #let's recheck the header names names (monkeys) #you can change the format of your data from a number to a character monkeys$csex <- as.character (monkeys$sex) #let's recheck data format str (monkeys) #we can change the numbers into meaningful text. For example we can replace "0" with adult male and "1" with adult females monkeys["csex"] [monkeys["csex"] == "0"] <- "am" monkeys["csex"] [monkeys["csex"] == "1"] <- "af" #you can round up decimal positions monkeys$Obs_time <-round(monkeys$Obs_time, digits =0) #let's recheck the data head (monkeys) #you can do all sorts of mathematical calculations sum (monkeys$Obs_time)#sum mean (monkeys$Obs_time)#mean median (monkeys$Obs_time)#median #you can create a new column that is the mathematical calculation of other two columns monkeys$Tot_HMINT <- monkeys$hm_int + monkeys$prov #you can extract statistical information for different groups/factors using the ddply function in the "plyr" package install.packages(("plyr")) library (plyr) summarize_data <-ddply(monkeys, .(csex), summarize, #to summarize the data by mean and sd mean = round(mean(hm_int), 2), sd = round(sd(hm_int), 2)) #check the output summarize_data #you can also change the headers of the object "summarize data" colnames(summarize_data)[colnames(summarize_data)=="csex"] <- "sex" #check the headers head (summarize_data) #you can remove objects from the environment using the "rm" function rm (summarize_data) #We can also ask R whether specific cells have specific data. For example, by using the "==" symbol we can ask whether we have females in our data sets and how many females there were monkeys$csex == "af" sum (monkeys$csex == "af") #We can use the "which()" function to select a specific category of data and do calculations on that categories. So for example: if we want to isolate how long female monkeys were observed for, then you can enter the following command: sum(monkeys[which(monkeys$csex == "af"), "Obs_time"]) #we can create a new table that includes only females females <- monkeys [which(monkeys$csex == "af"),] #You can use the "which" and "==" functions to exclude some data points that have too few or too large observation time (i.e., exclude outliers) females_nooutlier <- females [-which(females$Obs_time < 3 | females$Obs_time > 10),] #You can see if you have missing data points through the following R command: which(is.na(females_nooutlier), arr.ind = T) #If you have any missing data point, you can take them out as follows: data_clean<- females_nooutlier[-which(is.na(females_nooutlier), arr.ind = T),] #Recheck presence of outlier as follows: which(is.na(data_clean), arr.ind = T) #if you want to remove a specific ID, you can do it as follows: data_clean_norw <- data_clean [-c(which (data_clean$ID == "rw")),]