# hello, web3

By [xiaoyuzeng_psychbrain](https://paragraph.com/@xiaoyuzeng-psychbrain) · 2022-01-13

---

* * *

title: "R\_psychologicalTraits\_xiaoyuzeng\_211228"
---------------------------------------------------

output: word\_document: default html\_document: default pdf\_document: default

    rm(list = ls())
    

libraries
---------

    #library(bruceR)
    #library(lmerTest)
    # Packages required for Chapter 10
    #library(knitr)
    #library(gridExtra)
    #library(GGally)
    #library(mice)
    #library(nlme)
    #library(lme4)
    #library(mnormt)
    #library(boot)
    #library(HLMdiag)
    #library(kableExtra)
    #library(pander)
    library(tidyverse)
    #library(ggstatsplot)
    library(psych)
    library(GPArotation)
    library(nFactors)
    #library(caret)
    #library(Hmisc)
    library(ggthemes)
    library(factoextra)
    library(FactoMineR)
    #library(randomForest)
    library(reshape2)
    library("ggsci")
    library("ggpubr")
    source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/geom_flat_violin.R")
    source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/summarySE.R")
    source("D:/微云同步助手/754424340/kvip/03data analysis/batch/LA/mainfigure/R_rainclouds.R")
    

data import
-----------

    pass_check_data<-read.csv('Experiment2_data.CSV',header=T)#n=2631
    

PCA principle factor analysis
-----------------------------

    cols <- c("IRI2", "IRI3", "IRI4",  "IRI6",  "IRI8", "IRI9", "IRI10", "IRI11", 
              "IRI13", "IRI14","IRI15", "IRI17", "IRI18","IRI19","IRI20","IRI21","IRI22",
              "IRI24","IRI25","IRI27","IRI28",
              "PSY1", "PSY2", "PSY3", "PSY4", "PSY5", "PSY6","PSY7", "PSY8", "PSY9", "PSY10", "PSY11",
              "PSY12","PSY13", "PSY14","PSY15", "PSY16", "PSY17","PSY18","PSY19","PSY20","PSY21","PSY22", 
              "PSY23","PSY24","PSY25","PSY26","PSY27","PSY28","PSY29","PSY30",
              "GASP1", "GASP2", "GASP3", "GASP4", "GASP5", "GASP6", "GASP7", "GASP8", "GASP9", "GASP10", "GASP11", 
              "GASP12","GASP13", "GASP14","GASP15", "GASP16",
              "SC1", "SC2", "SC3", "SC4", "SC5", "SC6", "SC7", "SC8", "SC9", "SC10", "SC11", 
              "SC12","SC13", "SC14","SC15", "SC16", "SC17", "SC18","SC19","SC20","SC21","SC22",
              "SC23","SC24","SC25","SC26",
              "GBJWS1", "GBJWS2", "GBJWS3", "GBJWS4", "GBJWS5", "GBJWS6", "GBJWS7", "GBJWS8", "GBJWS9", "GBJWS10", "GBJWS11", 
              "GBJWS12","GBJWS13",
              "AS1", "AS2", "AS3", "AS4", "AS5", "AS6", "AS7", "AS8", "AS9", "AS10", "AS11", 
              "AS12","AS13", "AS14","AS15", "AS16", "AS17", "AS18","AS19","AS20")
    data_for_factor_analysis <- pass_check_data[,cols]
    
    # Pearson correlation
    corr_mat <- cor (data_for_factor_analysis)
    ev <- eigen(corr_mat)
    eva <- ev$values
    
    # Determine the number of factors
    
    numFact <- nCng(eva, model="factors", details=TRUE)
    
    plotuScree(eva, main=paste(numFact$nFactors,
                               " factors retained by the CNG procedure",
                               sep=""))
    

    # Factor analysis
    
    fa.new <- fa(r = corr_mat, nfactors = 3, n.obs = nrow(data_for_factor_analysis), rotate = "oblimin", fm = "ml", scores = "regression")
    
    fa.scores <- factor.scores(x=data_for_factor_analysis,f=fa.new)
    scores = data.frame(fa.scores$scores)
    loadings <- fa.new$loadings
    # add scores to all subs data
    pass_check_data$factor1_score <- scores$ML1
    pass_check_data$factor2_score <- scores$ML2
    pass_check_data$factor3_score <- scores$ML3
    #output results
    write.csv(pass_check_data,'Experiment2_data.csv')
    write.csv(scores,'factor_scores_R.csv')
    write.csv(loadings,'factor_loading_R.csv')
    #allData$F1 <- scores$ML1
    #allData$F2 <- scores$ML2
    #allData$F3 <- scores$ML3
    

    #1compute PCA
    res.pca <- prcomp(data_for_factor_analysis, scale = TRUE)
    #2Visualize eigenvalues
    fviz_eig(res.pca,addlabels=TRUE)
    

    fviz_pca_var(res.pca, col.var = "black")
    

    # Contributions of variables to PC1
    fviz_contrib(res.pca, choice = "var", axes = 1, top = 30)
    
    # Contributions of variables to PC2
    fviz_contrib(res.pca, choice = "var", axes = 2, top = 30)
    # Contributions of variables to PC3
    fviz_contrib(res.pca, choice = "var", axes = 3, top = 30)
    # Contributions of variables to PC4
    #fviz_contrib(res.pca, choice = "var", axes = 4, top = 10)
    

    #graph of individuals;individuals with a similar profile are grouped together.
    fviz_pca_ind(res.pca,
                 label = "none", # hide individual labels
                 col.ind = "cos2", # Color by the quality of representation
                 gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                 repel = TRUE     # Avoid text overlapping
                 #addEllipses = TRUE # Concentration ellipses
                 )
    

clustering analysis
-------------------

### data for clustering

    library(dendextend) # for comparing two dendrograms
    ## cluster
    data_cluster <- subset(pass_check_data,select = c('factor1_score','factor2_score','factor3_score'))
    

### prep: scale/normalization

    ### no need to normalize/scale
    #data_cluster_z <- base::scale(data_cluster)
    #data_cluster <- as.data.frame(data_cluster)
    

### clustering tendency

#### method1 using PCs: hopkins stats;0.5=no cluster; 0=clusterable

    library(clustertend)
    # Compute Hopkins statistic for iris dataset
    hopkins(data_cluster, n = nrow(data_cluster)-1)
    

#### method2: RSA using PCs

    fviz_dist(dist(data_cluster), show_labels = FALSE,order=TRUE,gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))+
    labs(title = "Dissimilarity matrix across all subs")
    

### determine cluster number

### internal to determine optimal cluster method

    require(clValid)
    intern <- clValid(data_cluster, nClust = 2:10,   maxitems=3000,
      clMethods = c("hierarchical","kmeans","pam"), validation = "internal")# Summary  
    summary(intern)
    optimalScores(intern)
    plot(intern)
    

### stability to determine optimal cluster method

    stability <- clValid(data_cluster, nClust = 2:10,   maxitems=3000,
      clMethods = c("hierarchical","kmeans","pam"), validation = "stability")# Summary  
    summary(stability)
    #optimalScores(stability)
    plot(stability)
    

#### 30+indexes to determine optimal cluster num

    require(NbClust)
    res_hc_D2<-NbClust(data_cluster, distance = "euclidean", min.nc=2, max.nc=10, method = "ward.D2", index = "all")  
    fviz_nbclust(res_hc_D2)

---

*Originally published on [xiaoyuzeng_psychbrain](https://paragraph.com/@xiaoyuzeng-psychbrain/hello-web3)*
