#upload the data iris_data <- read.csv ("iris.csv", header = T) #Generate a new table that contains only the species "setosa" and "versicolor" iris_data2 <- iris_data[which(iris_data$Species==c("setosa", "versicolor")),] #Compare the length of the sepal (variable name: "Sepal.length") between the two species #we can run either the Independent-sample t-test or the Mann-Whitney U-test #let's check the t-test assumptions #distribution of the data install.packages ("rstatix") library(rstatix) iris_data2%>% group_by(Species) %>% shapiro_test(Petal.Length) #data are normally distributed #variance of means var.test(Petal.Length ~ Species, data = iris_data2) #assumption on homogeneity of variance is viaoleted (p< 0.001) #since one of the two assumptions is violeted, then we need to run the Mann-Whitney U-test wilcox.test(iris_data2$Petal.Length~iris_data2$Species) #effect size and confidence interval install.packages ("rcompanion") library(rcompanion) wilcoxonR(x = iris_data2$Petal.Length, g = iris_data2$Species, ci = TRUE) #Calculate mean, median and standard deviation of petal length for the two species ddply(iris_data2, .(Species), summarize, mean = mean(Petal.Length), median = median (Petal.Length), sd = sd(Petal.Length)) #Plot the results with the most appropriate plot ggplot (iris_data2, aes (x = Species, y = Petal.Length)) + geom_boxplot () + geom_jitter ()+ ggtitle("Difference in Petal length between plant species")+ xlab("Species") + ylab("Petal length (cm)") + theme(plot.title = element_text(color = "black", size = 20, hjust = .5), axis.text = element_text(color = "black", size = 20, hjust = .5, vjust = .5, face = "plain"), axis.title= element_text(color = "black", size = 20, vjust =3), panel.background = element_rect(fill = "transparent",colour = NA), axis.line = element_line(color="black", size = .2))