hello, web3

title: "R_psychologicalTraits_xiaoyuzeng_211228"

output: word_document: default html_document: default pdf_document: default

rm(list = ls())

libraries

#library(bruceR)
#library(lmerTest)
# Packages required for Chapter 10
#library(knitr)
#library(gridExtra)
#library(GGally)
#library(mice)
#library(nlme)
#library(lme4)
#library(mnormt)
#library(boot)
#library(HLMdiag)
#library(kableExtra)
#library(pander)
library(tidyverse)
#library(ggstatsplot)
library(psych)
library(GPArotation)
library(nFactors)
#library(caret)
#library(Hmisc)
library(ggthemes)
library(factoextra)
library(FactoMineR)
#library(randomForest)
library(reshape2)
library("ggsci")
library("ggpubr")
source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/geom_flat_violin.R")
source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/summarySE.R")
source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/R_rainclouds.R")

data import

pass_check_data<-read.csv('Experiment2_data.CSV',header=T)#n=2631

PCA principle factor analysis

cols <- c("IRI2", "IRI3", "IRI4",  "IRI6",  "IRI8", "IRI9", "IRI10", "IRI11", 
          "IRI13", "IRI14","IRI15", "IRI17", "IRI18","IRI19","IRI20","IRI21","IRI22",
          "IRI24","IRI25","IRI27","IRI28",
          "PSY1", "PSY2", "PSY3", "PSY4", "PSY5", "PSY6","PSY7", "PSY8", "PSY9", "PSY10", "PSY11",
          "PSY12","PSY13", "PSY14","PSY15", "PSY16", "PSY17","PSY18","PSY19","PSY20","PSY21","PSY22", 
          "PSY23","PSY24","PSY25","PSY26","PSY27","PSY28","PSY29","PSY30",
          "GASP1", "GASP2", "GASP3", "GASP4", "GASP5", "GASP6", "GASP7", "GASP8", "GASP9", "GASP10", "GASP11", 
          "GASP12","GASP13", "GASP14","GASP15", "GASP16",
          "SC1", "SC2", "SC3", "SC4", "SC5", "SC6", "SC7", "SC8", "SC9", "SC10", "SC11", 
          "SC12","SC13", "SC14","SC15", "SC16", "SC17", "SC18","SC19","SC20","SC21","SC22",
          "SC23","SC24","SC25","SC26",
          "GBJWS1", "GBJWS2", "GBJWS3", "GBJWS4", "GBJWS5", "GBJWS6", "GBJWS7", "GBJWS8", "GBJWS9", "GBJWS10", "GBJWS11", 
          "GBJWS12","GBJWS13",
          "AS1", "AS2", "AS3", "AS4", "AS5", "AS6", "AS7", "AS8", "AS9", "AS10", "AS11", 
          "AS12","AS13", "AS14","AS15", "AS16", "AS17", "AS18","AS19","AS20")
data_for_factor_analysis <- pass_check_data[,cols]

# Pearson correlation
corr_mat <- cor (data_for_factor_analysis)
ev <- eigen(corr_mat)
eva <- ev$values

# Determine the number of factors

numFact <- nCng(eva, model="factors", details=TRUE)

plotuScree(eva, main=paste(numFact$nFactors,
                           " factors retained by the CNG procedure",
                           sep=""))

# Factor analysis

fa.new <- fa(r = corr_mat, nfactors = 3, n.obs = nrow(data_for_factor_analysis), rotate = "oblimin", fm = "ml", scores = "regression")

fa.scores <- factor.scores(x=data_for_factor_analysis,f=fa.new)
scores = data.frame(fa.scores$scores)
loadings <- fa.new$loadings
# add scores to all subs data
pass_check_data$factor1_score <- scores$ML1
pass_check_data$factor2_score <- scores$ML2
pass_check_data$factor3_score <- scores$ML3
#output results
write.csv(pass_check_data,'Experiment2_data.csv')
write.csv(scores,'factor_scores_R.csv')
write.csv(loadings,'factor_loading_R.csv')
#allData$F1 <- scores$ML1
#allData$F2 <- scores$ML2
#allData$F3 <- scores$ML3

#1compute PCA
res.pca <- prcomp(data_for_factor_analysis, scale = TRUE)
#2Visualize eigenvalues
fviz_eig(res.pca,addlabels=TRUE)

fviz_pca_var(res.pca, col.var = "black")

# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 30)

# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 30)
# Contributions of variables to PC3
fviz_contrib(res.pca, choice = "var", axes = 3, top = 30)
# Contributions of variables to PC4
#fviz_contrib(res.pca, choice = "var", axes = 4, top = 10)

#graph of individuals;individuals with a similar profile are grouped together.
fviz_pca_ind(res.pca,
             label = "none", # hide individual labels
             col.ind = "cos2", # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
             #addEllipses = TRUE # Concentration ellipses
             )

clustering analysis

data for clustering

library(dendextend) # for comparing two dendrograms
## cluster
data_cluster <- subset(pass_check_data,select = c('factor1_score','factor2_score','factor3_score'))

prep: scale/normalization

### no need to normalize/scale
#data_cluster_z <- base::scale(data_cluster)
#data_cluster <- as.data.frame(data_cluster)

clustering tendency

method1 using PCs: hopkins stats;0.5=no cluster; 0=clusterable

library(clustertend)
# Compute Hopkins statistic for iris dataset
hopkins(data_cluster, n = nrow(data_cluster)-1)

method2: RSA using PCs

fviz_dist(dist(data_cluster), show_labels = FALSE,order=TRUE,gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))+
labs(title = "Dissimilarity matrix across all subs")

determine cluster number

internal to determine optimal cluster method

require(clValid)
intern <- clValid(data_cluster, nClust = 2:10,   maxitems=3000,
  clMethods = c("hierarchical","kmeans","pam"), validation = "internal")# Summary  
summary(intern)
optimalScores(intern)
plot(intern)

stability to determine optimal cluster method

stability <- clValid(data_cluster, nClust = 2:10,   maxitems=3000,
  clMethods = c("hierarchical","kmeans","pam"), validation = "stability")# Summary  
summary(stability)
#optimalScores(stability)
plot(stability)

30+indexes to determine optimal cluster num

require(NbClust)
res_hc_D2<-NbClust(data_cluster, distance = "euclidean", min.nc=2, max.nc=10, method = "ward.D2", index = "all")  
fviz_nbclust(res_hc_D2)

xiaoyuzeng_psychbrain

hello, web3

title: "R_psychologicalTraits_xiaoyuzeng_211228"

libraries

data import

PCA principle factor analysis

clustering analysis

data for clustering

prep: scale/normalization

clustering tendency

method1 using PCs: hopkins stats;0.5=no cluster; 0=clusterable

method2: RSA using PCs

determine cluster number

internal to determine optimal cluster method

stability to determine optimal cluster method

30+indexes to determine optimal cluster num

xiaoyuzeng_psychbrain