#################################################### ## Data S2: R code #################################################### ################################################### ### stevens_rcode.R ### Created by Jeffrey R. Stevens on 3 Feb 2014 (jeffrey.r.stevens@gmail.com), ### Summary: This script calculates statistics and generates figures for the ### analysis of evolutionary hypotheses explaining comparative intertemporal choice. ### Instructions: Place this file and the data files (stevens_data.csv) ### in the same directory. Create a folder called "figures". Set the R ### working directory to this directory. At the R command prompt, type ### > source("stevens_rcode.R") ### This will run the script, adding all of the calculated variables to the ### workspace and saving PDF versions of the figures in the figures directory. ### Uses: This script can be reproduced and modified for personal and scientific use. ### Please cite the original source. ### Data files: Description of the data columns: ### stevens_dataS1--intertemporal choice and comparative data ### subject - name of subject if available ### species - species (common name) of subject (e.g., Marmoset) ### type - specific common name of subject (e.g., Common) ### latin_name - Latin name of species ### measure - type of data (indiff = indifference point, bodywt = body weight, ### ecv = endocranial volume, groupsize = group size, homerange = home range, ### lifespan = lifespan) ### value - value for specific measure (units: indifference point = seconds, ### body weight = grams, endocranial volume = cubic centimeters, group size = ### individuals, home range = hectares, lifespan = years) ### n - number of samples ### sd - standard deviation of samples ### reference - citation for source of data ### notes - miscellaneous notes on the measure, value, or source ################################################### ###################### ## Clear variables, load libraries, load data ###################### rm(list=ls()) # clears all variables library(beeswarm) # needed for beeswarm plot library(car) # needed for recode library(caper) # needed for pgls library(foreach) # needed for foreach library(lattice) # needed for plots library(latticeExtra) # needed for axis scaling library(psych) # needed for principal() for PCR library(xtable) # needed for xtable ## Load and prepare data raw_data <- read.csv("stevens_data.csv") # read in comparative data raw_data$n <- recode(raw_data$n, "'?' = 1") # if sample size (n) is unknown, recode it as 1 raw_data$n <- as.numeric(levels(raw_data$n)[as.integer(raw_data$n)])# convert samples sizes to numeric species <- levels(raw_data$latin_name) # extract species names species_nospace <- gsub(" ", "_", species) # remove space from species names num_species <- length(species) # calculate number of species measures <- levels(raw_data$measure) # extract measures num_measures <- length(measures) # calculate number of measures line_counter <- 1 # initialize line counter mean_data <- data.frame(matrix(rep(NA, num_species * num_measures * 3), ncol = 3)) # initialize mean_data data frame names(mean_data) <- c("species", "measure", "value") # name mean_data ###################### ## Calculate weighted mean of different measures for each species ###################### foreach(j = measures) %do% { # for each measure measures_subset <- subset(raw_data, measure == j) # create a subset of the data for that measure foreach(k = species) %do% { # for each species (within each measure) species_subset <- subset(measures_subset, latin_name == k) # create a subset of the data for that measure weighted_mean <- sum(species_subset$value * species_subset$n, na.rm = T) / sum(species_subset$n, na.rm = T) # calculate a weighted mean for that measure and species mean_data[line_counter, 1] <- k # assign species to column 1 mean_data[line_counter, 2] <- j # assign measure to column 2 mean_data[line_counter, 3] <- weighted_mean # assign weighted mean to column 3 line_counter <- line_counter + 1 # increment line counter } } ###################### ## Create primate data frame ###################### primate_data <- unstack(mean_data, form = "value ~ measure") # unstack the data to create columns for each measure primate_data <- data.frame(species = species_nospace, primate_data) # create data frame with species names and measure data primate_data$logindiff <- log(primate_data$indiff) # create log indifference point data primate_data$loglifespan <- log(primate_data$lifespan) # create log lifespan data primate_data$logbodywt <- log(primate_data$bodywt) # create log body weight data primate_data$logECV <- log(primate_data$ecv) # create log brain size data primate_data$loggroupsize <- log(primate_data$groupsize) # create log group size data primate_data$loghomerange <- log(primate_data$homerange) # create log home range size data primate_table <- data.frame(species, primate_data[, c(6, 2, 3, 7, 5, 4)]) # create data frame for table names(primate_table) <- c("Species", "Waiting time", "Body weight", "Brain size", "Lifespan", "Home range", "Group size") ###################### ## Create phylogenetic tree ###################### primate_tree_nex <- "((((Macaca_fascicularis:5.045978,Macaca_mulatta:5.045978):24.954022,((Gorilla_gorilla:8.652233,(Pan_paniscus:2.333553,Pan_troglodytes:2.333553):6.318679):6.480222,Pongo_pygmaeus:15.132455):14.867546):16.811821,(Ateles_geoffroyi:21.321300,((Callithrix_jacchus:15.712251,Saguinus_oedipus:15.712253):4.380275,Sapajus_apella:20.092528):1.228773):25.490521):26.191197,(Eulemur_macaco:20.542803,(Varecia_rubra:1.270526,Varecia_variegata:1.270526):19.272276):52.460215);" # read in phylogenetic data primate_tree_raw <- read.tree(text = primate_tree_nex) # convert to tree object primate_tree <- rotate(primate_tree_raw, node = 16) # rotate nodes to reorder tree primate_tree <- rotate(primate_tree, node = 17) primate_tree <- rotate(primate_tree, node = 19) primate_tree <- rotate(primate_tree, node = 20) primate_tree <- rotate(primate_tree, node = 22) primate_tree <- rotate(primate_tree, node = 24) primate_tree <- rotate(primate_tree, node = 25) ###################### ## Integrate behavioral data with phylogenetic tree (preliminary to calculate relative brain size) ###################### prelim_primate_comp <- comparative.data(phy = primate_tree_raw, data = primate_data, names.col = species, vcv = TRUE, na.omit = FALSE, warn.dropped = TRUE) # combine tree with comparative data ###################### ## Calculate relative brain size as residuals from regression of log(ecv) and log(bodywt) ###################### relative_brain <- data.frame(residuals(pgls(log(ecv) ~ log(bodywt), data = prelim_primate_comp))) # calculate residuals from phylogenetic regression relative_brain_species <- rownames(relative_brain) # extract species names rownames(relative_brain) <- NULL # remove species names relative_brain <- data.frame(relative_brain_species, relative_brain) # create data frame of species names and residuals names(relative_brain) <- c("species", "relative_brain") # rename columns primate_table$"Relative brain size" <- primate_data$relative_brain <- relative_brain$relative_brain # append relative_brain to primate_data primate_predictors <- primate_data[, c(2, 3, 7, 5, 14, 4, 6)] # extract main predictors and indifference points primate_logpredictors <- primate_data[, c(10, 11, 9, 13, 14, 12)] # extract log predictors and indifference points ###################### ## Analyze allometric variables ###################### # Calculate allometric correlations allometric_vars <- primate_data[, c(9:11, 13)] # extract body-size related columns row.names(allometric_vars) <- primate_data$species # assign species as row names allometric_cor_matrix <- cor(allometric_vars) # create matrix of allometric correlation values allometric_cor <- c(allometric_cor_matrix[2:4, 1], allometric_cor_matrix[3:4, 2], allometric_cor_matrix[4, 3]) # extract only single case of each correlation # Conduct principal components analysis allometric_pca <- principal(allometric_vars) # calculate PCA on allometric variables primate_data$allometric <- allometric_pca$scores # extract PCA scores allometric_propvar <- colSums(allometric_pca$loading*allometric_pca$loading)/dim(allometric_pca$loading)[1] # calculate proportion of variance accounted for predictors_pca <- principal(primate_logpredictors) # Calculate correlations allometric_itc_cor <- cor.test(log(primate_data$indiff), primate_data$allometric) # calculate correlation of ITC and allometric relative_brain_itc_cor <- cor.test(log(primate_data$indiff), primate_data$relative_brain) # calculate correlation of ITC and relative brain size group_size_itc_cor <- cor.test(log(primate_data$indiff), log(primate_data$groupsize)) # calculate correlation of ITC and group size ###################### ## Integrate behavioral data with phylogenetic tree ###################### primate_comp <- comparative.data(phy = primate_tree_raw, data = primate_data, names.col = species, vcv = TRUE, na.omit = FALSE, warn.dropped = TRUE) # combine tree with comparative data ###################### ## Analyze comparative data ###################### model_pgls <- pgls(log(indiff) ~ allometric + relative_brain + log(groupsize), data = primate_comp, lambda ='ML') # calculate phylogenetic generalized least squares regression summary(model_pgls) # Create function that rounds p-values round_pvalue <- function(pvalue) { if(pvalue < 0.01) { # if p < 0.01 new_pvalue <- "p < 0.01" # print p < 0.01 } else { new_pvalue <- paste("p =", round(pvalue, 2)) # otherwise round p-value to 2 digits } new_pvalue # return new p-value } # Calculate standardized coefficients (betas) b <- summary(model_pgls)$coef[-1, 1] # extract nonstandardized coefficients sx1 <- sd(primate_data$allometric) # calculate standard deviation of allometric data sx2 <- sd(primate_data$relative_brain) # calculate standard deviation of relative brain size data sx3 <- sd(primate_data$loggroupsize) # calculate standard deviation of log group size data sy <- sd(primate_data$logindiff) # calculate standard deviation of log indifference point data allometric_beta <- round(b[1] * sx1/sy, 2) # standardize allometric coefficient relative_brain_beta <- round(b[2] * sx2/sy, 2) # standardize relative brain size coefficient groupsize_beta <- round(b[3] * sx3/sy, 2) # standardize group size coefficient # Extract t-values, F-statistics, p-values model_fstat <- round(summary(model_pgls)$fstatistic[1],2) # extract F statistic from overall model model_pvalue <- round_pvalue(pf(summary(model_pgls)$fstatistic[1], summary(model_pgls)$fstatistic[2], summary(model_pgls)$fstatistic[3], lower = FALSE)) # calculate p-value for overall model intercept_tvalue <- round(summary(model_pgls)$coefficients[1, 3], 2) # extract t-values allometric_tvalue <- round(summary(model_pgls)$coefficients[2, 3], 2) relative_brain_tvalue <- round(summary(model_pgls)$coefficients[3, 3], 2) groupsize_tvalue <- round(summary(model_pgls)$coefficients[4, 3], 2) intercept_pvalue <- round_pvalue(summary(model_pgls)$coefficients[1, 4]) # extract p-values allometric_pvalue <- round_pvalue(summary(model_pgls)$coefficients[2, 4]) relative_brain_pvalue <- round_pvalue(summary(model_pgls)$coefficients[3, 4]) groupsize_pvalue <- round_pvalue(summary(model_pgls)$coefficients[4, 4]) ###################### ## Calculate phylogenetic signal estimate (lambda) ###################### lambda <- round(summary(model_pgls)$param[2], 2) # extract lambda lambda_pvalues <- round(summary(model_pgls)$param.CI$lambda$bounds.p, 2) # extract lambda estimate p-values for significant difference from 0 and 1 ###################### ## Analysis without orangutan data ###################### primate_data_no_orang <- subset(primate_data, species != "Pongo_pygmaeus") primate_comp_no_orang <- comparative.data(phy = primate_tree_raw, data = primate_data_no_orang, names.col = species, vcv = TRUE, na.omit = FALSE, warn.dropped = FALSE) # combine tree with comparative data group_size_itc_cor_no_orang <- cor.test(log(primate_data_no_orang$indiff), log(primate_data_no_orang$groupsize)) # correlate ITC and group size model_pgls_no_orang <- pgls(log(indiff) ~ allometric + relative_brain + log(groupsize), data = primate_comp_no_orang, lambda ='ML') # calculate phylogenetic generalized least squares regression summary(model_pgls_no_orang) b_no_orang <- summary(model_pgls_no_orang)$coef[-1, 1] # extract nonstandardized coefficients sx_no_orang <- sd(primate_data_no_orang$loggroupsize) # calculate standard deviation of log group size data sy_no_orang <- sd(primate_data_no_orang$logindiff) # calculate standard deviation of log indifference point data groupsize_beta_no_orang <- round(b_no_orang[3] * sx_no_orang/sy_no_orang, 2) # standardize group size coefficient groupsize_pvalue_no_orang <- round_pvalue(summary(model_pgls_no_orang)$coefficients[4, 4]) # extract p-value for group size ###################### ## Plot data ###################### ## Plot hypothesis correlations # Allometric xy_allometric <- xyplot(indiff ~ allometric, data = primate_data, scales = list(y = list(log = TRUE, equispaced.log = FALSE, tck = c(1,0))), xlab = "Allometric score", ylab = "Waiting time (s)", aspect = 1, par.settings = list(par.ylab.text = list(cex = 2), par.xlab.text = list(cex = 2), axis.text = list(cex = 1.5), layout.widths = list(ylab.axis.padding = 1, ylab = 1, axis.left = 0.8, panel = 1, between = 1, axis.right = 0, ylab.right = 0, right.padding = 0), axis.components = list(left = list(pad2 = 0))), panel = function(x, y, ...) { panel.xyplot(x, y, type = c("p", "r"), cex = 2) panel.text(x = -1.7, y = log10(120), labels = "(a)", cex = 2) } ) pdf(file = "figures/itc_allometric.pdf", width = 7, height = 5.5) print(xy_allometric) dev.off() # Relative brain size xy_relative_brain <- xyplot(indiff ~ relative_brain, data = primate_data, scales = list(y = list(log = TRUE, equispaced.log = FALSE, labels = "", tck = c(0, 1))), xlab = "Relative brain size", ylab = NULL, aspect = 1, par.settings = list(par.ylab.text = list(cex = 2), par.xlab.text = list(cex = 2), axis.text = list(cex = 1.5), layout.widths = list(ylab.axis.padding = 0, ylab = 0, axis.left = 0, panel = 1, between = 0, axis.right = 0, ylab.right = 0, right.padding = 0), axis.components = list(left = list(pad2 = 0))), panel = function(x, y, ...) { panel.xyplot(x, y, cex = 2) panel.text(x = -0.3, y = log10(120), labels = "(b)", cex = 2) } ) pdf(file = "figures/itc_relative_brain.pdf", width = 7, height = 5.5) print(xy_relative_brain) dev.off() # Group size xy_groupsize <- xyplot(indiff ~ groupsize, data = primate_data, xlab = "Group size", ylab = NULL, cex = 2, aspect = 1, scales = list(log = TRUE, equispaced.log = FALSE, y = list(labels = "")), par.settings = list(par.ylab.text = list(cex = 2), par.xlab.text = list(cex = 2), axis.text = list(cex = 1.5), layout.widths = list(ylab.axis.padding = 0, ylab = 0, axis.left = 0, panel = 1, between = 0, axis.right = 0, ylab.right = 0, right.padding = 0), axis.components = list(left = list(pad2 = 0))), panel = function(x, y, ...) { panel.xyplot(x, y, cex = 2) panel.text(x = log10(2), y = log10(120), labels = "(c)", cex = 2) } ) pdf(file = "figures/itc_groupsize.pdf", width = 7, height = 5.5) print(xy_groupsize) dev.off() # Combine separate plots into figure with three subfigures pdf(file = "figures/itc_correlates.pdf", width = 12, height = 5) print(xy_allometric, split = c(1,1,3,1), position = c(0.03, 0, 1, 1), panel.width = list(x = 3.55, unit = "in"), more=T) print(xy_relative_brain, split = c(2,1,3,1), position = c(0.075, 0, 1, 1), panel.width = list(x = 3.5, unit = "in"), more=T) print(xy_groupsize, split = c(3,1,3,1), position = c(0.05, 0, 1, 1), panel.width = list(x = 3.5, unit = "in"), more=F) dev.off() ## Plot indifference points indiff_data <- subset(raw_data, measure == "indiff") indiff_data$latin_name <- factor(as.character(indiff_data$latin_name), # reorder species levels = c("Eulemur macaco", "Varecia rubra", "Varecia variegata", "Saguinus oedipus", "Callithrix jacchus", "Sapajus apella", "Ateles geoffroyi", "Macaca fascicularis", "Macaca mulatta", "Pongo pygmaeus", "Gorilla gorilla", "Pan paniscus", "Pan troglodytes")) pdf("figures/indifference_points.pdf", width = 9) par(mai = c(2, 0.9, 0, 0)) indifference_plot <- boxplot(value ~ latin_name, data = indiff_data, range = 0, col="#0000ff22", ylab = "Adjusted delay at indifference (s)", xlab = NULL, ylim = c(0, 160), las = 2, cex = 1.5, cex.axis = 1.2, cex.lab = 1.5, lwd = 0.5, boxwex = 0.5, outlier = F, color = T ) # generate boxplot of species indifference points for 2v6 condition mean_indiff <- tapply(indiff_data$value, indiff_data$latin_name, mean) # calculate means points(seq(indifference_plot$n), mean_indiff, pch = 17, cex = 1.5, lwd = 2) # overlay means on boxplot beeswarm(value ~ latin_name, data = indiff_data, pch = 1, cex = 1.15, col = "grey30", add = T) # overlay beeswarm plot of individual data points dev.off() ## Plot phylogenetic tree pdf(file = "figures/phylogeny.pdf", width = 7, height = 5.5)# print tree to file plot(primate_tree) dev.off() ## Plot all correlations # Create function for upper panels of correlation plot panel.cor <- function(x, y, ...) { # Calculate center of x dimension and assign x to log or linear scale if(min(x) > 0) { # if the minimum value is positive, this variable is plotted on a log scale (hack) rx <- sqrt(min(x) * max(x)) # calculate midpoint of log scale scaledx <- log(x) # log transform x } else { # if the minimum value is negative, this variable is plotted on a linear scale (hack) rx <- mean(x) # calculate midpoint of linear scale scaledx <- x # keep x in linear scale } # Calculate center of y dimension and assign y to log or linear scale if(min(y) > 0) { # if the minimum value is positive, this variable is plotted on a log scale (hack) ry <- sqrt(min(y) * max(y)) # calculate midpoint of log scale scaledy <- log(y) # log transform y } else { # if the minimum value is negative, this variable is plotted on a linear scale (hack) ry <- mean(y) # calculate midpoint of linear scale scaledy <- y # keep y in linear scale } # Calculate correlation coefficients for each panel r <- round(abs(cor(scaledx, scaledy)), 2) # calculate correlation coefficient if(r == 0) r = "0.00" # if correlation coefficient = 0, print "0.00" # Color background of allometric variables if(r > 0.9) { # if correlation coefficient > 0.9 (therefore, is one of the allometric variables) minx <- min(x) - abs(min(x)) * 0.50 # calculate minimum of x-axis maxx <- max(x) + abs(max(x)) * 0.50 # calculate maximum of x-axis miny <- min(y) - abs(min(y)) * 0.50 # calculate minimum of y-axis maxy <- max(y) + abs(max(y)) * 0.50 # calculate maximum of y-axis rect(minx, miny, maxx, maxy, col = "gray90", border = NA) # print gray rectangle as background for allometric variables } text(rx, ry, r, cex = 2) # print correlation coefficient } # Create function for lower panels of correlation plot panel.mypoints = function(x, y, ...) { # Determine whether scale should be log or linear if(min(x) > 0) { # if the minimum value is positive, this variable is plotted on a log scale (hack) rx <- sqrt(min(x) * max(x)) # calculate midpoint of log scale scaledx <- log(x) # log transform x } else { # if the minimum value is negative, this variable is plotted on a linear scale (hack) rx <- mean(x) # calculate midpoint of linear scale scaledx <- x # keep x in linear scale } # Calculate center of y dimension and assign y to log or linear scale if(min(y) > 0) { # if the minimum value is positive, this variable is plotted on a log scale (hack) ry <- sqrt(min(y) * max(y)) # calculate midpoint of log scale scaledy <- log(y) # log transform y } else { # if the minimum value is negative, this variable is plotted on a linear scale (hack) ry <- mean(y) # calculate midpoint of linear scale scaledy <- y # keep y in linear scale } # Calculate correlation coefficients for each panel r <- round(abs(cor(scaledx, scaledy)), 2) # calculate correlation coefficient if(r == 0) r = "0.00" # if correlation coefficient = 0, print "0.00" # Color background of allometric variables if(r > 0.9) { # if correlation coefficient > 0.9 (therefore, is one of the allometric variables) minx <- min(x) - abs(min(x)) * 0.50 # calculate minimum of x-axis maxx <- max(x) + abs(max(x)) * 0.50 # calculate maximum of x-axis miny <- min(y) - abs(min(y)) * 0.50 # calculate minimum of y-axis maxy <- max(y) + abs(max(y)) * 0.50 # calculate maximum of y-axis rect(minx, miny, maxx, maxy, col = "gray90", border = NA) # print gray rectangle as background for allometric variables } points(x, y) # plot points if(r > 0.9) { # if correlation coefficient > 0.9 (therefore, is one of the allometric variables) abline(coef = coef(lm(log10(y) ~ log10(x)))) } } # Create function for diagonal panels of correlation plot diag_labels <- c("Body mass", "Absolute\nbrain volume", "Lifespan", "Home range") # list labels for allometric variables diag_colors <- c("gray80", "gray80", "gray80", "gray80", "white", "white", "white")# assign background colors panel.diag <- function(x, y, labels, ...) { minx <- min(x) # calculate minimum of x-axis maxx <- max(x) # calculate maximum of x-axis miny <- min(y) # calculate minimum of y-axis maxy <- max(y) # calculate maximum of y-axis if(labels %in% diag_labels) { # if label is allometric label rect(0.1, 0.1, 10, 10, col = "gray90", border = NA) # print gray rectangle as background for allometric variables } text(minx, miny, labels, cex = 1.85) # print label } # Print correlations plot options(scipen=3) # set options to remove scientific notation from axes pdf(file = "figures/all_correlates.pdf", width = 11, height = 8) # save as PDF pairs(primate_predictors, log = c(1:4, 6:7), upper.panel = panel.cor, lower.panel = panel.mypoints, text.panel = panel.diag, labels = c("Body mass", "Absolute\nbrain volume", "Lifespan", "Home range", "Relative\nbrain size", "Group size", "Indifference\npoints"), cex.labels = 1.9, cex.axis = 1.35, cex = 1.25, gap = 1.5 ) # create correlation graphs dev.off()