#CCA Butterflies library("CCA") library("yacca") www = "https://www.mimuw.edu.pl/~noble/courses/MultivariateStatistics/data/butterfly.dat" butterfly <- read.table(www, header=T, quote="\"") View(butterfly) a <- butterfly[,5]+butterfly[,6] y <- cbind(a,butterfly[,7:9]) y <- simplify2array(y) x <- butterfly[,1:4] x<-simplify2array(x) canon <- cca(x,y,xcenter=TRUE, ycenter=TRUE, xscale=TRUE, yscale=TRUE, standardize.scores=TRUE) canon$corr canon canon$chisq canon$df pchisq(canon$chisq,canon$df,ncp=0) u1<- x%*%canon$xcoef[,1] v1 <- y%*%canon$ycoef[,1] library(ggplot2) qplot(u1,v1) #CCA PENGUIN DATA library(tidyverse) theme_set(theme_bw(16)) link2data <- "https://www.mimuw.edu.pl/~noble/courses/MultivariateStatistics/data/palmer_penguins.csv" penguins <- read_csv(link2data) penguins <- penguins %>% drop_na() penguins %>% head() X <- penguins %>% select(bill_depth_mm, bill_length_mm) %>% scale() Y <- penguins %>% select(flipper_length_mm,body_mass_g) %>% scale() head(Y) library(CCA) cc_results <- cancor(X,Y) str(cc_results) cc_results$xcoef cc_results$ycoef cc_results$cor CC1_X <- as.matrix(X) %*% cc_results$xcoef[, 1] CC1_Y <- as.matrix(Y) %*% cc_results$ycoef[, 1] CC2_X <- as.matrix(X) %*% cc_results$xcoef[, 2] CC2_Y <- as.matrix(Y) %*% cc_results$ycoef[, 2] cor(CC1_X,CC1_Y) assertthat::are_equal(cc_results$cor[1], cor(CC1_X,CC1_Y)[1]) cca_df <- penguins %>% mutate(CC1_X=CC1_X, CC1_Y=CC1_Y, CC2_X=CC2_X, CC2_Y=CC2_Y) cca_df %>% ggplot(aes(x=CC1_X,y=CC1_Y))+ geom_point() cca_df %>% ggplot(aes(x=species,y=CC1_X, color=species))+ geom_boxplot(width=0.5)+ geom_jitter(width=0.15)+ theme(legend.position="none") cca_df %>% ggplot(aes(x=species,y=CC1_Y, color=species))+ geom_boxplot(width=0.5)+ geom_jitter(width=0.15) cca_df %>% ggplot(aes(x=CC1_X,y=CC1_Y, color=species))+ geom_point() cca_df %>% ggplot(aes(x=CC2_X,y=CC2_Y, color=sex))+ geom_point() #MMREG mm <- read.csv("https://www.mimuw.edu.pl/~noble/courses/MultivariateStatistics/data/mmreg.csv") colnames(mm) <- c("Control", "Concept", "Motivation", "Read", "Write", "Math", "Science", "Sex") psych <- mm[, 1:3] acad <- mm[, 4:8] require(CCA) matcor(psych, acad) cc1 <- cc(psych, acad) cc1$cor # display the canonical correlation coefficients names(cc1) cc1[3:4] # display the x and y coefficients n <- dim(psych)[1] p <- length(psych) q <- length(acad) { ev <- (1 - cc1$cor^2) k <- min(p, q) m <- n - 3/2 - (p + q)/2 w <- rev(cumprod(rev(ev))) d1 <- d2 <- f <- vector("numeric", k) for (i in 1:k) { s <- sqrt((p^2 * q^2 - 4)/(p^2 + q^2 - 5)) si <- 1/s d1[i] <- p * q d2[i] <- m * s - p * q/2 + 1 r <- (1 - w[i]^si)/w[i]^si f[i] <- r * d2[i]/d1[i] p <- p - 1 q <- q - 1 } pv <- pf(f, d1, d2, lower.tail = FALSE) } dmat <- cbind(WilksL = w, F = f, df1 = d1, df2 = d2, p = pv) dmat s1 <- diag(sqrt(diag(cov(psych)))) s1 %*% cc1$xcoef s2 <- diag(sqrt(diag(cov(acad)))) s2 %*% cc1$ycoef