output: word_document: default html_document: default pdf_document: default
rm(list = ls())
#library(bruceR)
#library(lmerTest)
# Packages required for Chapter 10
#library(knitr)
#library(gridExtra)
#library(GGally)
#library(mice)
#library(nlme)
#library(lme4)
#library(mnormt)
#library(boot)
#library(HLMdiag)
#library(kableExtra)
#library(pander)
library(tidyverse)
#library(ggstatsplot)
library(psych)
library(GPArotation)
library(nFactors)
#library(caret)
#library(Hmisc)
library(ggthemes)
library(factoextra)
library(FactoMineR)
#library(randomForest)
library(reshape2)
library("ggsci")
library("ggpubr")
source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/geom_flat_violin.R")
source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/summarySE.R")
source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/R_rainclouds.R")
pass_check_data<-read.csv('Experiment2_data.CSV',header=T)#n=2631
cols <- c("IRI2", "IRI3", "IRI4", "IRI6", "IRI8", "IRI9", "IRI10", "IRI11",
"IRI13", "IRI14","IRI15", "IRI17", "IRI18","IRI19","IRI20","IRI21","IRI22",
"IRI24","IRI25","IRI27","IRI28",
"PSY1", "PSY2", "PSY3", "PSY4", "PSY5", "PSY6","PSY7", "PSY8", "PSY9", "PSY10", "PSY11",
"PSY12","PSY13", "PSY14","PSY15", "PSY16", "PSY17","PSY18","PSY19","PSY20","PSY21","PSY22",
"PSY23","PSY24","PSY25","PSY26","PSY27","PSY28","PSY29","PSY30",
"GASP1", "GASP2", "GASP3", "GASP4", "GASP5", "GASP6", "GASP7", "GASP8", "GASP9", "GASP10", "GASP11",
"GASP12","GASP13", "GASP14","GASP15", "GASP16",
"SC1", "SC2", "SC3", "SC4", "SC5", "SC6", "SC7", "SC8", "SC9", "SC10", "SC11",
"SC12","SC13", "SC14","SC15", "SC16", "SC17", "SC18","SC19","SC20","SC21","SC22",
"SC23","SC24","SC25","SC26",
"GBJWS1", "GBJWS2", "GBJWS3", "GBJWS4", "GBJWS5", "GBJWS6", "GBJWS7", "GBJWS8", "GBJWS9", "GBJWS10", "GBJWS11",
"GBJWS12","GBJWS13",
"AS1", "AS2", "AS3", "AS4", "AS5", "AS6", "AS7", "AS8", "AS9", "AS10", "AS11",
"AS12","AS13", "AS14","AS15", "AS16", "AS17", "AS18","AS19","AS20")
data_for_factor_analysis <- pass_check_data[,cols]
# Pearson correlation
corr_mat <- cor (data_for_factor_analysis)
ev <- eigen(corr_mat)
eva <- ev$values
# Determine the number of factors
numFact <- nCng(eva, model="factors", details=TRUE)
plotuScree(eva, main=paste(numFact$nFactors,
" factors retained by the CNG procedure",
sep=""))
# Factor analysis
fa.new <- fa(r = corr_mat, nfactors = 3, n.obs = nrow(data_for_factor_analysis), rotate = "oblimin", fm = "ml", scores = "regression")
fa.scores <- factor.scores(x=data_for_factor_analysis,f=fa.new)
scores = data.frame(fa.scores$scores)
loadings <- fa.new$loadings
# add scores to all subs data
pass_check_data$factor1_score <- scores$ML1
pass_check_data$factor2_score <- scores$ML2
pass_check_data$factor3_score <- scores$ML3
#output results
write.csv(pass_check_data,'Experiment2_data.csv')
write.csv(scores,'factor_scores_R.csv')
write.csv(loadings,'factor_loading_R.csv')
#allData$F1 <- scores$ML1
#allData$F2 <- scores$ML2
#allData$F3 <- scores$ML3
#1compute PCA
res.pca <- prcomp(data_for_factor_analysis, scale = TRUE)
#2Visualize eigenvalues
fviz_eig(res.pca,addlabels=TRUE)
fviz_pca_var(res.pca, col.var = "black")
# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 30)
# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 30)
# Contributions of variables to PC3
fviz_contrib(res.pca, choice = "var", axes = 3, top = 30)
# Contributions of variables to PC4
#fviz_contrib(res.pca, choice = "var", axes = 4, top = 10)
#graph of individuals;individuals with a similar profile are grouped together.
fviz_pca_ind(res.pca,
label = "none", # hide individual labels
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
#addEllipses = TRUE # Concentration ellipses
)
library(dendextend) # for comparing two dendrograms
## cluster
data_cluster <- subset(pass_check_data,select = c('factor1_score','factor2_score','factor3_score'))
### no need to normalize/scale
#data_cluster_z <- base::scale(data_cluster)
#data_cluster <- as.data.frame(data_cluster)
library(clustertend)
# Compute Hopkins statistic for iris dataset
hopkins(data_cluster, n = nrow(data_cluster)-1)
fviz_dist(dist(data_cluster), show_labels = FALSE,order=TRUE,gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))+
labs(title = "Dissimilarity matrix across all subs")
require(clValid)
intern <- clValid(data_cluster, nClust = 2:10, maxitems=3000,
clMethods = c("hierarchical","kmeans","pam"), validation = "internal")# Summary
summary(intern)
optimalScores(intern)
plot(intern)
stability <- clValid(data_cluster, nClust = 2:10, maxitems=3000,
clMethods = c("hierarchical","kmeans","pam"), validation = "stability")# Summary
summary(stability)
#optimalScores(stability)
plot(stability)
require(NbClust)
res_hc_D2<-NbClust(data_cluster, distance = "euclidean", min.nc=2, max.nc=10, method = "ward.D2", index = "all")
fviz_nbclust(res_hc_D2)
