Setup

Upload the R packages needed to illustate

library("MVA")
## Loading required package: HSAUR2
## Loading required package: tools
set.seed(280875)

library("lattice")

lattice.options(default.theme =
     function()
         standard.theme("pdf", color = FALSE))

PCA: PCA:blood_corr

The data in this example consist of eight blood chemistry variables measured on 72 patients in a clinical trial

bc <- c(
  0.290,           
  0.202,  0.415,       
 -0.055,  0.285,  0.419,       
 -0.105, -0.376, -0.521, -0.877,      
 -0.252, -0.349, -0.441, -0.076,  0.206,
 -0.229, -0.164, -0.145,  0.023,  0.034,  0.192,
  0.058, -0.129, -0.076, -0.131,  0.151,  0.077,  0.423)

blood_sd <- c(rblood = 0.371, plate = 41.253,  wblood = 1.935,
               neut = 0.077, lymph = 0.071, bilir = 4.037,
               sodium = 2.732, potass = 0.297)

blood_corr <- diag(length(blood_sd)) / 2

blood_corr[upper.tri(blood_corr)] <- bc     

blood_corr <- blood_corr + t(blood_corr)

blood_cov <- blood_corr * outer(blood_sd, blood_sd, "*")

blood_corr
##        [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]
## [1,]  1.000  0.290  0.202 -0.055 -0.105 -0.252 -0.229  0.058
## [2,]  0.290  1.000  0.415  0.285 -0.376 -0.349 -0.164 -0.129
## [3,]  0.202  0.415  1.000  0.419 -0.521 -0.441 -0.145 -0.076
## [4,] -0.055  0.285  0.419  1.000 -0.877 -0.076  0.023 -0.131
## [5,] -0.105 -0.376 -0.521 -0.877  1.000  0.206  0.034  0.151
## [6,] -0.252 -0.349 -0.441 -0.076  0.206  1.000  0.192  0.077
## [7,] -0.229 -0.164 -0.145  0.023  0.034  0.192  1.000  0.423
## [8,]  0.058 -0.129 -0.076 -0.131  0.151  0.077  0.423  1.000
blood_sd
## rblood  plate wblood   neut  lymph  bilir sodium potass 
##  0.371 41.253  1.935  0.077  0.071  4.037  2.732  0.297
blood_pcacov <- princomp(covmat = blood_cov)

summary(blood_pcacov, loadings = TRUE)
## Importance of components:
##                            Comp.1      Comp.2     Comp.3      Comp.4
## Standard deviation     41.2877486 3.880212624 2.64197339 1.624583979
## Proportion of Variance  0.9856182 0.008705172 0.00403574 0.001525986
## Cumulative Proportion   0.9856182 0.994323381 0.99835912 0.999885108
##                             Comp.5       Comp.6       Comp.7       Comp.8
## Standard deviation     0.353951757 2.561722e-01 8.510631e-02 2.372715e-02
## Proportion of Variance 0.000072436 3.794288e-05 4.187837e-06 3.255049e-07
## Cumulative Proportion  0.999957544 9.999955e-01 9.999997e-01 1.000000e+00
## 
## Loadings:
##        Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## rblood                              0.943  0.329              
## plate   0.999                                                 
## wblood         0.192         0.981                            
## neut                                              0.758  0.650
## lymph                                            -0.649  0.760
## bilir         -0.961  0.195  0.191                            
## sodium        -0.193 -0.979                                   
## potass                              0.329 -0.942
blood_pcacor <- princomp(covmat = blood_corr)

summary(blood_pcacor, loadings = TRUE)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     1.6710100 1.2375848 1.1177138 0.88227419 0.78839505
## Proportion of Variance 0.3490343 0.1914520 0.1561605 0.09730097 0.07769584
## Cumulative Proportion  0.3490343 0.5404863 0.6966468 0.79394778 0.87164363
##                            Comp.6     Comp.7     Comp.8
## Standard deviation     0.69917350 0.66002394 0.31996216
## Proportion of Variance 0.06110545 0.05445395 0.01279697
## Cumulative Proportion  0.93274908 0.98720303 1.00000000
## 
## Loadings:
##      Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## [1,]  0.194  0.417  0.400  0.652  0.175  0.363  0.176  0.102
## [2,]  0.400  0.154  0.168        -0.848 -0.230 -0.110       
## [3,]  0.459         0.168 -0.274  0.251 -0.403  0.677       
## [4,]  0.430 -0.472 -0.171  0.169  0.118        -0.237  0.678
## [5,] -0.494  0.360        -0.180 -0.139 -0.136  0.157  0.724
## [6,] -0.319 -0.320 -0.277  0.633 -0.162 -0.384  0.377       
## [7,] -0.177 -0.535  0.410 -0.163 -0.299  0.513  0.367       
## [8,] -0.171 -0.245  0.709         0.198 -0.469 -0.376
plot(blood_pcacor$sdev^2, xlab = "Component number",
      ylab = "Component variance", type = "l", main = "Scree diagram")

plot(log(blood_pcacor$sdev^2), xlab = "Component number",
      ylab = "log(Component variance)", type="l",
      main = "Log(eigenvalue) diagram")

PCA: headsize:table

The data give the head lengths and head breadths (in millimetres) for each of the first two adult sons in 25 families.

"headsize" <-
 matrix(c(191, 195, 181, 183, 176, 208, 189, 197, 188, 192, 179, 183, 174, 190, 188, 163, 195, 186, 181, 175, 192, 174,
         176, 197, 190, 155, 149, 148, 153, 144, 157, 150, 159, 152, 150, 158, 147, 150, 159, 151, 137, 155, 153,
         145, 140, 154, 143, 139, 167, 163, 179, 201, 185, 188, 171, 192, 190, 189, 197, 187, 186, 174, 185, 195,
         187, 161, 183, 173, 182, 165, 185, 178, 176, 200, 187, 145, 152, 149, 149, 142, 152, 149, 152, 159, 151,
         148, 147, 152, 157, 158, 130, 158, 148, 146, 137, 152, 147, 143, 158, 150),
        nrow = 25, ncol = 4
 ,  dimnames = list(character(0)
 , c("head1", "breadth1", "head2", "breadth2")))

 x <- headsize

headsize <- as.data.frame(headsize)
        
toLatex(HSAURtable(headsize), pcol = 2,
     caption = "Head Size Data.",
     label = "ch:PCA:headsize:tab", rownames = FALSE)
## \index{headsize data@\Robject{headsize} data}
## \begin{center}
## \begin{longtable}{ rrrr|rrrr }
## \caption{\Robject{headsize} data. Head Size Data. \label{ch:PCA:headsize:tab}}
## \\
## \hline
## \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} & \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} \\ \hline
## \endfirsthead
## \caption[]{\Robject{headsize} data (continued).} \\
## \hline
## \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} & \Robject{head1} & \Robject{breadth1} & \Robject{head2} & \Robject{breadth2} \\ \hline
## \endhead
## 191 & 155 & 179 & 145 & 190 & 159 & 195 & 157 \\
## 195 & 149 & 201 & 152 & 188 & 151 & 187 & 158 \\
## 181 & 148 & 185 & 149 & 163 & 137 & 161 & 130 \\
## 183 & 153 & 188 & 149 & 195 & 155 & 183 & 158 \\
## 176 & 144 & 171 & 142 & 186 & 153 & 173 & 148 \\
## 208 & 157 & 192 & 152 & 181 & 145 & 182 & 146 \\
## 189 & 150 & 190 & 149 & 175 & 140 & 165 & 137 \\
## 197 & 159 & 189 & 152 & 192 & 154 & 185 & 152 \\
## 188 & 152 & 197 & 159 & 174 & 143 & 178 & 147 \\
## 192 & 150 & 187 & 151 & 176 & 139 & 176 & 143 \\
## 179 & 158 & 186 & 148 & 197 & 167 & 200 & 158 \\
## 183 & 147 & 174 & 147 & 190 & 163 & 187 & 150 \\
## 174 & 150 & 185 & 152 &   &   &   &   \\
## \hline
## \end{longtable}
## \end{center}
headsize <- x

PCA: headsize

head_dat <- headsize[, c("head1", "head2")]

colMeans(head_dat)
##  head1  head2 
## 185.72 183.84
head_pca <- princomp(x = head_dat)

head_pca
## Call:
## princomp(x = head_dat)
## 
## Standard deviations:
##    Comp.1    Comp.2 
## 12.690766  5.215406 
## 
##  2  variables and  25 observations.
print(summary(head_pca), loadings = TRUE)
## Importance of components:
##                            Comp.1    Comp.2
## Standard deviation     12.6907660 5.2154059
## Proportion of Variance  0.8555135 0.1444865
## Cumulative Proportion   0.8555135 1.0000000
## 
## Loadings:
##       Comp.1 Comp.2
## head1  0.693  0.721
## head2  0.721 -0.693
s1 <- round(diag(cov(head_pca$scores))[1], 3)

s2 <- round(diag(cov(head_pca$scores))[2], 3)

s <- summary(head_pca)

l1 <- round(s$loadings[,1], 2)

l2 <- round(s$loadings[,2], 2)

diag(cov(head_pca$scores))
##    Comp.1    Comp.2 
## 167.76619  28.33381
a1 <- 183.84-0.721*185.72/0.693

b1 <- 0.721/0.693

a2 <- 183.84-(-0.693*185.72/0.721)

b2 <- -0.693/0.721

plot(head_dat, xlab = "First son's head length (mm)",
      ylab = "Second son's head length")

abline(a1, b1)

abline(a2, b2, lty = 2)

xlim <- range(head_pca$scores[,1])

plot(head_pca$scores, xlim = xlim, ylim = xlim)

PCA:heptathlon

The pentathlon for women was first held in Germany in 1928. Initially this consisted of the shot put, long jump, 100 m, high jump, and javelin events, held over two days. In the 1964 Olympic Games, the pentathlon became the first combined Olympic event for women, consisting now of the 80 m hurdles, shot, high jump, long jump, and 200 m. In 1977, the 200 m was replaced by the 800 m run, and from 1981 the IAAF brought in the seven-event heptathlon in place of the pentathlon, with day one containing the events 100 m hurdles, shot, high jump, and 200 m run, and day two the long jump, javelin, and 800 m run. A scoring system is used to assign points to the results from each event, and the winner is the woman who accumulates the most points over the two days. The event made its first Olympic appearance in 1984.

In the 1988 Olympics held in Seoul, the heptathlon was won by one of the stars of women’s athletics in the USA, Jackie Joyner-Kersee. The results for all 25 competitors in all seven disciplines are given from Hand, Daly, Lunn, McConway, and Ostrowski 1994.

data("heptathlon",package="HSAUR2")

toLatex(HSAURtable(heptathlon), pcol = 1,
     caption = "Results of Olympic heptathlon, Seoul, 1988.",
     label = "ch:PCA:heptathlon:tab",
     rownames = TRUE)
## \index{heptathlon data@\Robject{heptathlon} data}
## \begin{center}
## \begin{longtable}{l rrrrrrrr }
## \caption{\Robject{heptathlon} data. Results of Olympic heptathlon, Seoul, 1988. \label{ch:PCA:heptathlon:tab}}
## \\
## \hline
##   &  \Robject{hurdles} & \Robject{highjump} & \Robject{shot} & \Robject{run200m} & \Robject{longjump} & \Robject{javelin} & \Robject{run800m} & \Robject{score} \\ \hline
## \endfirsthead
## \caption[]{\Robject{heptathlon} data (continued).} \\
## \hline
##   &  \Robject{hurdles} & \Robject{highjump} & \Robject{shot} & \Robject{run200m} & \Robject{longjump} & \Robject{javelin} & \Robject{run800m} & \Robject{score} \\ \hline
## \endhead
## Joyner-Kersee (USA)   &  12.69 & 1.86 & 15.80 & 22.56 & 7.27 & 45.66 & 128.51 & 7291 \\
## John (GDR)   &  12.85 & 1.80 & 16.23 & 23.65 & 6.71 & 42.56 & 126.12 & 6897 \\
## Behmer (GDR)   &  13.20 & 1.83 & 14.20 & 23.10 & 6.68 & 44.54 & 124.20 & 6858 \\
## Sablovskaite (URS)   &  13.61 & 1.80 & 15.23 & 23.92 & 6.25 & 42.78 & 132.24 & 6540 \\
## Choubenkova (URS)   &  13.51 & 1.74 & 14.76 & 23.93 & 6.32 & 47.46 & 127.90 & 6540 \\
## Schulz (GDR)   &  13.75 & 1.83 & 13.50 & 24.65 & 6.33 & 42.82 & 125.79 & 6411 \\
## Fleming (AUS)   &  13.38 & 1.80 & 12.88 & 23.59 & 6.37 & 40.28 & 132.54 & 6351 \\
## Greiner (USA)   &  13.55 & 1.80 & 14.13 & 24.48 & 6.47 & 38.00 & 133.65 & 6297 \\
## Lajbnerova (CZE)   &  13.63 & 1.83 & 14.28 & 24.86 & 6.11 & 42.20 & 136.05 & 6252 \\
## Bouraga (URS)   &  13.25 & 1.77 & 12.62 & 23.59 & 6.28 & 39.06 & 134.74 & 6252 \\
## Wijnsma (HOL)   &  13.75 & 1.86 & 13.01 & 25.03 & 6.34 & 37.86 & 131.49 & 6205 \\
## Dimitrova (BUL)   &  13.24 & 1.80 & 12.88 & 23.59 & 6.37 & 40.28 & 132.54 & 6171 \\
## Scheider (SWI)   &  13.85 & 1.86 & 11.58 & 24.87 & 6.05 & 47.50 & 134.93 & 6137 \\
## Braun (FRG)   &  13.71 & 1.83 & 13.16 & 24.78 & 6.12 & 44.58 & 142.82 & 6109 \\
## Ruotsalainen (FIN)   &  13.79 & 1.80 & 12.32 & 24.61 & 6.08 & 45.44 & 137.06 & 6101 \\
## Yuping (CHN)   &  13.93 & 1.86 & 14.21 & 25.00 & 6.40 & 38.60 & 146.67 & 6087 \\
## Hagger (GB)   &  13.47 & 1.80 & 12.75 & 25.47 & 6.34 & 35.76 & 138.48 & 5975 \\
## Brown (USA)   &  14.07 & 1.83 & 12.69 & 24.83 & 6.13 & 44.34 & 146.43 & 5972 \\
## Mulliner (GB)   &  14.39 & 1.71 & 12.68 & 24.92 & 6.10 & 37.76 & 138.02 & 5746 \\
## Hautenauve (BEL)   &  14.04 & 1.77 & 11.81 & 25.61 & 5.99 & 35.68 & 133.90 & 5734 \\
## Kytola (FIN)   &  14.31 & 1.77 & 11.66 & 25.69 & 5.75 & 39.48 & 133.35 & 5686 \\
## Geremias (BRA)   &  14.23 & 1.71 & 12.95 & 25.50 & 5.50 & 39.64 & 144.02 & 5508 \\
## Hui-Ing (TAI)   &  14.85 & 1.68 & 10.00 & 25.23 & 5.47 & 39.14 & 137.30 & 5290 \\
## Jeong-Mi (KOR)   &  14.53 & 1.71 & 10.83 & 26.61 & 5.50 & 39.26 & 139.17 & 5289 \\
## Launa (PNG)   &  16.42 & 1.50 & 11.78 & 26.16 & 4.88 & 46.38 & 163.43 & 4566 \\
## \hline
## \end{longtable}
## \end{center}
heptathlon$hurdles <- with(heptathlon, max(hurdles)-hurdles)

heptathlon$run200m <- with(heptathlon, max(run200m)-run200m)

heptathlon$run800m <- with(heptathlon, max(run800m)-run800m)

score <- which(colnames(heptathlon) == "score")

round(cor(heptathlon[,-score]), 2)
##          hurdles highjump shot run200m longjump javelin run800m
## hurdles     1.00     0.81 0.65    0.77     0.91    0.01    0.78
## highjump    0.81     1.00 0.44    0.49     0.78    0.00    0.59
## shot        0.65     0.44 1.00    0.68     0.74    0.27    0.42
## run200m     0.77     0.49 0.68    1.00     0.82    0.33    0.62
## longjump    0.91     0.78 0.74    0.82     1.00    0.07    0.70
## javelin     0.01     0.00 0.27    0.33     0.07    1.00   -0.02
## run800m     0.78     0.59 0.42    0.62     0.70   -0.02    1.00
plot(heptathlon[,-score])

plot(heptathlon[,-score], pch = ".", cex = 1.5)

heptathlon <- heptathlon[-grep("PNG", rownames(heptathlon)),]

score <- which(colnames(heptathlon) == "score")

round(cor(heptathlon[,-score]), 2)
##          hurdles highjump shot run200m longjump javelin run800m
## hurdles     1.00     0.58 0.77    0.83     0.89    0.33    0.56
## highjump    0.58     1.00 0.46    0.39     0.66    0.35    0.15
## shot        0.77     0.46 1.00    0.67     0.78    0.34    0.41
## run200m     0.83     0.39 0.67    1.00     0.81    0.47    0.57
## longjump    0.89     0.66 0.78    0.81     1.00    0.29    0.52
## javelin     0.33     0.35 0.34    0.47     0.29    1.00    0.26
## run800m     0.56     0.15 0.41    0.57     0.52    0.26    1.00
plot(heptathlon[,-score], pch = ".", cex = 1.5)

op <- options(digits = 2)

heptathlon_pca <- prcomp(heptathlon[, -score], scale = TRUE)

print(heptathlon_pca)
## Standard deviations (1, .., p=7):
## [1] 2.08 0.95 0.91 0.68 0.55 0.34 0.26
## 
## Rotation (n x k) = (7 x 7):
##            PC1    PC2   PC3    PC4    PC5    PC6    PC7
## hurdles  -0.45  0.058 -0.17  0.048 -0.199  0.847 -0.070
## highjump -0.31 -0.651 -0.21 -0.557  0.071 -0.090  0.332
## shot     -0.40 -0.022 -0.15  0.548  0.672 -0.099  0.229
## run200m  -0.43  0.185  0.13  0.231 -0.618 -0.333  0.470
## longjump -0.45 -0.025 -0.27 -0.015 -0.122 -0.383 -0.749
## javelin  -0.24 -0.326  0.88  0.060  0.079  0.072 -0.211
## run800m  -0.30  0.657  0.19 -0.574  0.319 -0.052  0.077
a1 <- heptathlon_pca$rotation[,1]

center <- heptathlon_pca$center

scale <- heptathlon_pca$scale

hm <- as.matrix(heptathlon[,-score])

drop(scale(hm, center = center, scale = scale) %*% 
      heptathlon_pca$rotation[,1])
## Joyner-Kersee (USA)          John (GDR)        Behmer (GDR)  Sablovskaite (URS) 
##              -4.758              -3.148              -2.926              -1.288 
##   Choubenkova (URS)        Schulz (GDR)       Fleming (AUS)       Greiner (USA) 
##              -1.503              -0.958              -0.953              -0.633 
##    Lajbnerova (CZE)       Bouraga (URS)       Wijnsma (HOL)     Dimitrova (BUL) 
##              -0.382              -0.522              -0.218              -1.076 
##      Scheider (SWI)         Braun (FRG)  Ruotsalainen (FIN)        Yuping (CHN) 
##               0.003               0.109               0.209               0.233 
##         Hagger (GB)         Brown (USA)       Mulliner (GB)    Hautenauve (BEL) 
##               0.660               0.757               1.881               1.828 
##        Kytola (FIN)      Geremias (BRA)       Hui-Ing (TAI)      Jeong-Mi (KOR) 
##               2.118               2.771               3.901               3.897
predict(heptathlon_pca)[,1]
## Joyner-Kersee (USA)          John (GDR)        Behmer (GDR)  Sablovskaite (URS) 
##              -4.758              -3.148              -2.926              -1.288 
##   Choubenkova (URS)        Schulz (GDR)       Fleming (AUS)       Greiner (USA) 
##              -1.503              -0.958              -0.953              -0.633 
##    Lajbnerova (CZE)       Bouraga (URS)       Wijnsma (HOL)     Dimitrova (BUL) 
##              -0.382              -0.522              -0.218              -1.076 
##      Scheider (SWI)         Braun (FRG)  Ruotsalainen (FIN)        Yuping (CHN) 
##               0.003               0.109               0.209               0.233 
##         Hagger (GB)         Brown (USA)       Mulliner (GB)    Hautenauve (BEL) 
##               0.660               0.757               1.881               1.828 
##        Kytola (FIN)      Geremias (BRA)       Hui-Ing (TAI)      Jeong-Mi (KOR) 
##               2.118               2.771               3.901               3.897
sdev <- heptathlon_pca$sdev

prop12 <- round(sum(sdev[1:2]^2)/sum(sdev^2)*100, 0)

plot(heptathlon_pca)

plot(heptathlon_pca, main = "")

cor(heptathlon$score, heptathlon_pca$x[,1])
## [1] -0.99
plot(heptathlon$score, heptathlon_pca$x[,1])

PCA:USairpollution

The air pollution data were originally collected to investigate the determinants of pol- lution, presumably by regressing SO2 on the six other variables.

data("USairpollution", package = "HSAUR2")

panel.hist <- function(x, ...) {
     usr <- par("usr"); on.exit(par(usr))
     par(usr = c(usr[1:2], 0, 1.5) )
     h <- hist(x, plot = FALSE)
     breaks <- h$breaks; nB <- length(breaks)
     y <- h$counts; y <- y/max(y)
     rect(breaks[-nB], 0, breaks[-1], y, col="grey", ...)
 }

USairpollution$negtemp <- USairpollution$temp * (-1)

USairpollution$temp <- NULL

pairs(USairpollution[,-1], diag.panel = panel.hist, 
       pch = ".", cex = 1.5)

cor(USairpollution[,-1])
##           manu  popul   wind precip predays negtemp
## manu     1.000  0.955  0.238 -0.032   0.132   0.190
## popul    0.955  1.000  0.213 -0.026   0.042   0.063
## wind     0.238  0.213  1.000 -0.013   0.164   0.350
## precip  -0.032 -0.026 -0.013  1.000   0.496  -0.386
## predays  0.132  0.042  0.164  0.496   1.000   0.430
## negtemp  0.190  0.063  0.350 -0.386   0.430   1.000
usair_pca <- princomp(USairpollution[,-1], cor = TRUE)

summary(usair_pca, loadings = TRUE)
## Importance of components:
##                        Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Standard deviation       1.48   1.22   1.18   0.87  0.338 0.1856
## Proportion of Variance   0.37   0.25   0.23   0.13  0.019 0.0057
## Cumulative Proportion    0.37   0.62   0.85   0.98  0.994 1.0000
## 
## Loadings:
##         Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## manu     0.612  0.168  0.273  0.137  0.102  0.703
## popul    0.578  0.222  0.350               -0.695
## wind     0.354 -0.131 -0.297 -0.869 -0.113       
## precip         -0.623  0.505 -0.171  0.568       
## predays  0.238 -0.708         0.311 -0.580       
## negtemp  0.330 -0.128 -0.672  0.306  0.558 -0.136
pairs(usair_pca$scores[,1:3], ylim = c(-6, 4), xlim = c(-6, 4),
       panel = function(x,y, ...) {
           text(x, y, abbreviate(row.names(USairpollution)), 
                cex = 0.6)
           bvbox(cbind(x,y), add = TRUE)
       })

out <- sapply(1:6, function(i) {
     plot(USairpollution$SO2,usair_pca$scores[,i],
          xlab = paste("PC", i, sep = ""), 
          ylab = "Sulphur dioxide concentration")
     })

usair_reg <- lm(SO2 ~ usair_pca$scores, 
                 data = USairpollution)

summary(usair_reg)
## 
## Call:
## lm(formula = SO2 ~ usair_pca$scores, data = USairpollution)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -23.00  -8.54  -0.99   5.76  48.76 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              30.049      2.286   13.15  6.9e-15 ***
## usair_pca$scoresComp.1    9.942      1.542    6.45  2.3e-07 ***
## usair_pca$scoresComp.2   -2.240      1.866   -1.20   0.2384    
## usair_pca$scoresComp.3    0.375      1.936    0.19   0.8475    
## usair_pca$scoresComp.4    8.549      2.622    3.26   0.0025 ** 
## usair_pca$scoresComp.5   15.176      6.753    2.25   0.0312 *  
## usair_pca$scoresComp.6   39.271     12.316    3.19   0.0031 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15 on 34 degrees of freedom
## Multiple R-squared:  0.67,   Adjusted R-squared:  0.611 
## F-statistic: 11.5 on 6 and 34 DF,  p-value: 5.42e-07

PCA-heptathlon-biplot

biplot(heptathlon_pca, col = c("gray", "black"))

tmp <- heptathlon[, -score]

rownames(tmp) <- abbreviate(gsub(" \\(.*", "", rownames(tmp)))

biplot(prcomp(tmp, scale = TRUE), col = c("black", "darkgray"), xlim =
+ c(-0.5, 0.7), cex = 0.7)

CCA-headsize

headsize.std <- sweep(headsize, 2, 
                       apply(headsize, 2, sd), FUN = "/")

R <- cor(headsize.std)

r11 <- R[1:2, 1:2]
 
r22 <- R[-(1:2), -(1:2)]

r12 <- R[1:2, -(1:2)]

r21 <- R[-(1:2), 1:2]

(E1 <- solve(r11) %*% r12 %*% solve(r22) %*%r21)
##          head1 breadth1
## head1     0.32     0.32
## breadth1  0.30     0.30
(E2 <- solve(r22) %*% r21 %*% solve(r11) %*%r12)
##          head2 breadth2
## head2     0.30     0.30
## breadth2  0.32     0.32
(e1 <- eigen(E1))
## eigen() decomposition
## $values
## [1] 0.6217 0.0029
## 
## $vectors
##      [,1]  [,2]
## [1,] 0.73 -0.70
## [2,] 0.69  0.71
(e2 <- eigen(E2))
## eigen() decomposition
## $values
## [1] 0.6217 0.0029
## 
## $vectors
##       [,1]  [,2]
## [1,] -0.68 -0.71
## [2,] -0.73  0.71
girth1 <- headsize.std[,1:2] %*% e1$vectors[,1]

girth2 <- headsize.std[,3:4] %*% e2$vectors[,1]

shape1 <- headsize.std[,1:2] %*% e1$vectors[,2]

shape2 <- headsize.std[,3:4] %*% e2$vectors[,2]

(g <- cor(girth1, girth2))
##       [,1]
## [1,] -0.79
(s <- cor(shape1, shape2))
##       [,1]
## [1,] 0.054
plot(girth1, girth2)

plot(shape1, shape2)

CCA- LAdepr

The data for this example arise from a study of depression amongst 294 respondents in Los An- geles. The two sets of variables of interest were “health variables”, namely the CESD (the sum of 20 separate numerical scales measuring different aspects of depression) and a measure of general health and “personal” variables, of which there were four: gender, age, income, and educational level (numerically coded from the lowest “less than high school”, to the highest, “finished doctorate”).

depr <- c(
  0.212,
  0.124,  0.098,
 -0.164,  0.308,  0.044,
 -0.101, -0.207, -0.106, -0.208,
 -0.158, -0.183, -0.180, -0.192, 0.492)

LAdepr <- diag(6) / 2

LAdepr[upper.tri(LAdepr)] <- depr

LAdepr <- LAdepr + t(LAdepr)

rownames(LAdepr) <- colnames(LAdepr) <- c("CESD", "Health", "Gender", "Age", "Edu", "Income")

x <- LAdepr

LAdepr <- as.data.frame(LAdepr)

toLatex(HSAURtable(LAdepr), 
     caption = "Los Angeles Depression Data.",
     label = "ch:PCA:LAdepr:tab", rownames = FALSE)
## \index{LAdepr data@\Robject{LAdepr} data}
## \begin{center}
## \begin{longtable}{ rrrrrr }
## \caption{\Robject{LAdepr} data. Los Angeles Depression Data. \label{ch:PCA:LAdepr:tab}}
## \\
## \hline
## \Robject{CESD} & \Robject{Health} & \Robject{Gender} & \Robject{Age} & \Robject{Edu} & \Robject{Income} \\ \hline
## \endfirsthead
## \caption[]{\Robject{LAdepr} data (continued).} \\
## \hline
## \Robject{CESD} & \Robject{Health} & \Robject{Gender} & \Robject{Age} & \Robject{Edu} & \Robject{Income} \\ \hline
## \endhead
## 1.000 & 0.212 & 0.124 & -0.164 & -0.101 & -0.158 \\
## 0.212 & 1.000 & 0.098 & 0.308 & -0.207 & -0.183 \\
## 0.124 & 0.098 & 1.000 & 0.044 & -0.106 & -0.180 \\
## -0.164 & 0.308 & 0.044 & 1.000 & -0.208 & -0.192 \\
## -0.101 & -0.207 & -0.106 & -0.208 & 1.000 & 0.492 \\
## -0.158 & -0.183 & -0.180 & -0.192 & 0.492 & 1.000 \\
## \hline
## \end{longtable}
## \end{center}
LAdepr <- x

r11 <- LAdepr[1:2, 1:2]

r22 <- LAdepr[-(1:2), -(1:2)]

r12 <- LAdepr[1:2, -(1:2)]

r21 <- LAdepr[-(1:2), 1:2]

(E1 <- solve(r11) %*% r12 %*% solve(r22) %*%r21)
##          CESD Health
## CESD    0.084 -0.043
## Health -0.033  0.133
(E2 <- solve(r22) %*% r21 %*% solve(r11) %*%r12)
##         Gender     Age    Edu Income
## Gender  0.0155 -0.0015 -0.018 -0.022
## Age    -0.0024  0.1471 -0.040 -0.016
## Edu    -0.0149 -0.0260  0.025  0.025
## Income -0.0212  0.0130  0.022  0.029
(e1 <- eigen(E1))
## eigen() decomposition
## $values
## [1] 0.153 0.063
## 
## $vectors
##       [,1]  [,2]
## [1,]  0.53 -0.91
## [2,] -0.85 -0.42
(e2 <- eigen(E2))
## eigen() decomposition
## $values
## [1]  1.5e-01  6.3e-02 -5.2e-18  2.9e-18
## 
## $vectors
##         [,1]  [,2]  [,3]  [,4]
## [1,]  0.0026  0.49  0.15 -0.85
## [2,]  0.9801 -0.32 -0.11 -0.16
## [3,] -0.1858 -0.43 -0.70 -0.47
## [4,]  0.0699 -0.69  0.69 -0.19