Presentation is loading. Please wait.

Presentation is loading. Please wait.

PCA/LDA Lab CSCE 587 Spring 2017.

Similar presentations


Presentation on theme: "PCA/LDA Lab CSCE 587 Spring 2017."— Presentation transcript:

1 PCA/LDA Lab CSCE 587 Spring 2017

2 PCA/LDA Lab # Plan A: PCA & LDA Lab # We will need the following packages: # stat # ggplot2 # ggfortify # lfda # # stat & ggplot2 are already installed and can be loaded # ggfortify and lfda are not installed # Install ggfortify --- This will take a while # Install lfda This won't take as long as ggfortify # after installation, load ggfortify & lfda

3 PCA/LDA Lab # Plan B: PCA & LDA Lab
# We will need the following packages: # stats # MASS # ggplot2 # stats, MASS & ggplot2 are already installed and can be loaded # ggfortify and lfda are not available for the version of R the virtual machines > install.packages('ggfortify') Warning in install.packages : package ‘ggfortify’ is not available (for R version 3.1.3) > install.packages("lfda") package ‘lfda’ is not available (for R version 3.1.3)

4 PCA ############################################################
# example iris data # PCA # extract the independent variables iris_data <- iris[,-5] # generate the PCs iris_pca <- prcomp(iris_data, center = TRUE, scale. = TRUE) # Display the PCA object print(iris_pca)

5 PCA # convert from SD to var # get percent var #plot percent var
tot_var <- sum(sapply(iris_pca$sdev,function(x) x*x)) # get percent var pct_var <- sapply(iris_pca$sdev,function(x) x*x/tot_var) #plot percent var plot(pct_var, type="l")

6 PCA # first two PCs account for most of the variance
sum(pct_var[1:2]) # plot the first two principal components # If you install ggfortify on you laptop you can use the # autoplot command that is commented out below o/w # use the work-around plot command # autoplot(prcomp(iris_data)) plot(prcomp(iris_data)$x[,1:2])

7 PCA # plot the first two principal components and color by species
# autoplot(prcomp(iris_data), data = iris, colour = 'Species') plot(prcomp(iris_data)$x[,1:2], col=iris[,5])

8 ggfortify plots for our PCA example
# If you were able to install ggfortify: plot the first two principal # components, color by species, and label by observation index autoplot(prcomp(iris_data), data = iris, colour = 'Species', label = TRUE, label.size = 3)

9 ggfortify plots for our PCA example
# If you were able to install ggfortify: plot the first two principal # components, color by species, and label by observation index autoplot(prcomp(iris_data), data = iris, colour = 'Species', label = TRUE, label.size = 3) # If you were able to install ggfortify: plot the first to PCs and draw # the eigenvectors and label the loadings autoplot(prcomp(iris_data), data = iris, colour = 'Species', loadings = TRUE, loadings.colour = 'blue',loadings.label = TRUE, loadings.label.size = 3)

10 LDA using the lfda package
########################################################## # If your were able to install lfda you can use the lfda LDA function # Create the LDA model. The first argument is the set of 4 # independent variables. # The second argument is the dependent variable. # The third argument is the dimensionality of the reduced space # The fourth argument is the type of metric in the embedding space # # iris_LDA <- lfda(iris_data, iris[, 5], r = 3, metric="plain")

11 LDA using the lfda package
# If your were able to install lfda & ggfortify: plot the model # autoplot(iris_LDA, data = iris, frame = TRUE, frame.colour = 'Species') # # If your were able to install lfda: plot the model and color the # data by class # autoplot(iris_LDA, data = iris, colour="Species", frame = TRUE, frame.colour = 'Species')

12 LDA from MASS package # Create a data frame for the iris dataset
Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]),Sp = rep(c("s","c","v"), rep(50,3))) # Decide which observations a comprise the training set train <- sample(1:150, 75) # Display the class membership of the training data set table(Iris$Sp[train])

13 LDA from MASS package # create the LDA model
# Formula: SP ~., i.e. Sp is the classification and the other columns # are the independant data # Iris is the data set # prior = c(1,1,1)/3 even priors # subset: which observations are used to train with z <- lda(Sp ~ ., Iris, prior = c(1,1,1)/3, subset = train)

14 LDA from MASS package # project from 4 iris dimensions onto the dimension of the first eigenvector iris_x <- (z$scaling[,1]) %*% t(iris[,-5]) # project from 4 iris dimensions onto the dimension of the second eigenvector iris_y <- (z$scaling[,2]) %*% t(iris[,-5])

15 LDA from MASS package # plot 150 iris data points
plot(iris_y~iris_x) # plot 150 iris data points & color by species plot(iris_y~iris_x, col=iris[,5])

16 More difficult PCA example
############################################################## # PCA example using icu data from earlier in the semester tmp <- icu[,-c(1, 2)] pca_tmp <- prcomp(tmp, center = TRUE, scale. = TRUE) print(pca_tmp) plot(pca_tmp, type="l") tot_var <- sum(sapply(pca_tmp$sdev,function(x) x*x))

17 More difficult PCA example
pct_var <- sapply(pca_tmp$sdev,function(x) x*x/tot_var) plot(pct_var, type="l") sum(pct_var[1:10]) # autoplot(prcomp(tmp)) plot(prcomp(tmp)$x[,1:2]) # autoplot(prcomp(tmp), data = icu, colour = 'STA') plot(prcomp(tmp)$x[,1:2], col=icu[,2]+1)


Download ppt "PCA/LDA Lab CSCE 587 Spring 2017."

Similar presentations


Ads by Google