Chapter 11 Lavaan Lab 8: Estimation Methods
In this lab, we will learn how to estimate parameters in CFA/SR models.
Load up the lavaan library:
library(lavaan)
11.1 PART I: Hypothetical Example
11.1.1 One-factor CFA model
A made-up sample covariance matrix with n = 200:
= 200
n = matrix(c(5, 2, 3.5, 2, 3, 2, 3.5, 2, 6), 3, 3,
S_3fac dimnames = list(c('Y1', 'Y2', 'Y3'), c('Y1', 'Y2', 'Y3')))
S_3fac
## Y1 Y2 Y3
## Y1 5.0 2 3.5
## Y2 2.0 3 2.0
## Y3 3.5 2 6.0
Fit a one-factor CFA to the sample covariance matrix:
<- "
one_fac_syntax eta =~ Y1 + Y2 + Y3
"
Request Unweighted Least Squares (ULS):
<- lavaan::sem(one_fac_syntax,
one_fac_fit2 sample.cov = S_3fac,
sample.nobs = n,
estimator = "ULS",
fixed.x = FALSE)
summary(one_fac_fit2, standardized = T)
## lavaan 0.6-12 ended normally after 23 iterations
##
## Estimator ULS
## Optimization method NLMINB
## Number of model parameters 6
##
## Number of observations 200
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Unstructured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## eta =~
## Y1 1.000 1.871 0.837
## Y2 0.571 0.023 24.496 0.000 1.069 0.617
## Y3 1.000 0.050 19.950 0.000 1.871 0.764
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Y1 1.500 0.202 7.423 0.000 1.500 0.300
## .Y2 1.857 0.094 19.749 0.000 1.857 0.619
## .Y3 2.500 0.202 12.372 0.000 2.500 0.417
## eta 3.500 0.189 18.497 0.000 1.000 1.000
Sigma:
fitted(one_fac_fit2)$cov
## Y1 Y2 Y3
## Y1 5.0
## Y2 2.0 3.0
## Y3 3.5 2.0 6.0
= fitted(one_fac_fit2)$cov
Sigma = Sigma[colnames(S_3fac), colnames(S_3fac)] - S_3fac
diff round(diff,3)
## Y1 Y2 Y3
## Y1 0 0 0
## Y2 0 0 0
## Y3 0 0 0
all zeros. Meaning that Sigma = S.
11.2 PART II: ULS on the Positive Affect Example
Let’s read this dataset in:
<- read.csv("cfaInclassData.csv", header = T) cfaData
Fit a two-factor CFA model:
<- "
fixedIndTwoFacSyntax #Factor Specification
posAffect =~ glad + happy + cheerful
satisfaction =~ satisfied + content + comfortable
"
<- lavaan::sem(fixedIndTwoFacSyntax,
two_fac_fit_uls data = cfaData,
fixed.x = FALSE,
estimator = "ULS")
summary(two_fac_fit_uls, standardized = T)
## lavaan 0.6-12 ended normally after 33 iterations
##
## Estimator ULS
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 1000
##
## Model Test User Model:
##
## Test statistic 0.837
## Degrees of freedom 8
## P-value (Unknown) NA
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Unstructured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## posAffect =~
## glad 1.000 0.696 0.708
## happy 1.067 0.066 16.205 0.000 0.743 0.760
## cheerful 1.105 0.068 16.157 0.000 0.769 0.781
## satisfaction =~
## satisfied 1.000 0.775 0.768
## content 1.073 0.064 16.845 0.000 0.832 0.768
## comfortable 0.906 0.053 17.124 0.000 0.702 0.738
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## posAffect ~~
## satisfaction 0.264 0.017 15.795 0.000 0.489 0.489
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .glad 0.482 0.050 9.597 0.000 0.482 0.499
## .happy 0.402 0.054 7.488 0.000 0.402 0.422
## .cheerful 0.377 0.056 6.762 0.000 0.377 0.390
## .satisfied 0.417 0.055 7.615 0.000 0.417 0.410
## .content 0.482 0.060 8.106 0.000 0.482 0.411
## .comfortable 0.411 0.049 8.322 0.000 0.411 0.455
## posAffect 0.484 0.039 12.395 0.000 1.000 1.000
## satisfaction 0.600 0.045 13.455 0.000 1.000 1.000
Sigma:
= cov(cfaData[,-1])
S = fitted(two_fac_fit_uls)$cov[colnames(S), colnames(S)]
Sigma = Sigma - S
diff round(diff,3)
## glad cheerful happy satisfied content comfortable
## glad 0.000 -0.002 0.004 -0.014 0.002 0.009
## cheerful -0.002 0.000 -0.002 0.009 -0.004 0.003
## happy 0.004 -0.002 0.000 -0.003 -0.011 0.011
## satisfied -0.014 0.009 -0.003 0.000 0.009 -0.007
## content 0.002 -0.004 -0.011 0.009 0.000 -0.003
## comfortable 0.009 0.003 0.011 -0.007 -0.003 0.000
print(paste0("The difference between S and Sigma ranged between ", round(min(diff),4), " and ", round(max(diff),4), "."))
## [1] "The difference between S and Sigma ranged between -0.0143 and 0.0113."
Sigma is not the same as S, but close.
The sum of squared differences is:
sum(diff[lower.tri(diff,diag = T)]^2)
## [1] 0.0008375874
11.3 PART III: Calculate ULS test statistic manually
ULS test statistic is calculated as:
= (1000-1)*sum(diff[lower.tri(diff,diag = T)]^2)
T_uls T_uls
## [1] 0.8367498
One can also obtain vectors of S and Sigma first:
= lav_matrix_vech(S)
s = lav_matrix_vech(Sigma) sigma
and calculate the ULS test statistic:
= (1000-1)*sum((s - sigma)^2)
T_uls T_uls
## [1] 0.8367498
- No p-value for ULS test statistic
- as there is no known distribution for this test statistic
- i.e., no suitable method for model fit evaluation
11.4 PART IV: ML vs ULS vs WLS
11.4.1 ML Estimation
<- lavaan::sem(fixedIndTwoFacSyntax,
two_fac_fit_ml data = cfaData,
fixed.x = FALSE,
estimator = "ML")
11.4.2 WLS Estimation
<- lavaan::sem(fixedIndTwoFacSyntax,
two_fac_fit_wls data = cfaData,
fixed.x = FALSE,
estimator = "WLS")
11.4.3 Compare the parameter estimates
= parameterEstimates(two_fac_fit_ml)[,1:3]
coefTable = cbind(coefTable,
coefTable ML = parameterEstimates(two_fac_fit_ml)$est,
ULS = parameterEstimates(two_fac_fit_uls)$est,
WLS = parameterEstimates(two_fac_fit_wls)$est)
coefTable
## lhs op rhs ML ULS WLS
## 1 posAffect =~ glad 1.0000000 1.0000000 1.0000000
## 2 posAffect =~ happy 1.0665969 1.0674339 1.0645600
## 3 posAffect =~ cheerful 1.1121981 1.1045607 1.1150756
## 4 satisfaction =~ satisfied 1.0000000 1.0000000 1.0000000
## 5 satisfaction =~ content 1.0683189 1.0732316 1.0719435
## 6 satisfaction =~ comfortable 0.9175967 0.9059151 0.9151164
## 7 glad ~~ glad 0.4838829 0.4823845 0.4835721
## 8 happy ~~ happy 0.4048385 0.4021235 0.4052179
## 9 cheerful ~~ cheerful 0.3711009 0.3772155 0.3643077
## 10 satisfied ~~ satisfied 0.4185958 0.4165368 0.4182162
## 11 content ~~ content 0.4909589 0.4823048 0.4902739
## 12 comfortable ~~ comfortable 0.4003673 0.4114721 0.4034071
## 13 posAffect ~~ posAffect 0.4816239 0.4840887 0.4812181
## 14 satisfaction ~~ satisfaction 0.5973737 0.6004497 0.5964489
## 15 posAffect ~~ satisfaction 0.2617756 0.2638580 0.2619159
11.4.4 Compare the standard errors:
= parameterEstimates(two_fac_fit_ml)[,1:3]
seTable = cbind(seTable,
seTable ML = parameterEstimates(two_fac_fit_ml)$se,
ULS = parameterEstimates(two_fac_fit_uls)$se,
WLS = parameterEstimates(two_fac_fit_wls)$se)
seTable
## lhs op rhs ML ULS WLS
## 1 posAffect =~ glad 0.00000000 0.00000000 0.00000000
## 2 posAffect =~ happy 0.05528238 0.06587199 0.05328971
## 3 posAffect =~ cheerful 0.05715937 0.06836322 0.05484498
## 4 satisfaction =~ satisfied 0.00000000 0.00000000 0.00000000
## 5 satisfaction =~ content 0.05204954 0.06371370 0.05010033
## 6 satisfaction =~ comfortable 0.04512163 0.05290389 0.04329804
## 7 glad ~~ glad 0.02906697 0.05026166 0.02803266
## 8 happy ~~ happy 0.02813623 0.05369964 0.02823333
## 9 cheerful ~~ cheerful 0.02853712 0.05578216 0.02731647
## 10 satisfied ~~ satisfied 0.02921942 0.05470277 0.02802346
## 11 content ~~ content 0.03376166 0.05950183 0.03304768
## 12 comfortable ~~ comfortable 0.02614297 0.04944382 0.02655468
## 13 posAffect ~~ posAffect 0.04210374 0.03905424 0.04141331
## 14 satisfaction ~~ satisfaction 0.04708792 0.04462502 0.04883263
## 15 posAffect ~~ satisfaction 0.02545399 0.01670551 0.02718848
11.5 PART V: Improper Solutions
Going back to the 1-factor toy example…
Suppose we have a new covariance matrix now and a super large sample size (n = 200000000):
= 2000
n = matrix(c(5, 1, 3.5, 1, 3, 2, 3.5, 2, 6), 3, 3,
S_3fac_new dimnames = list(c('Y1', 'Y2', 'Y3'), c('Y1', 'Y2', 'Y3')))
S_3fac_new
## Y1 Y2 Y3
## Y1 5.0 1 3.5
## Y2 1.0 3 2.0
## Y3 3.5 2 6.0
The one-factor syntax is the same:
<- "
one_fac_syntax eta =~ Y1 + Y2 + Y3
"
ML Estimation:
<- lavaan::sem(one_fac_syntax,
one_fac_fit_new sample.cov = S_3fac_new,
sample.nobs = n,
estimator = "ML",
fixed.x = FALSE)
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov variances are negative
- lavaan WARNING: some estimated ov variances are negative
- negative residual variances
- changing the estimation from ML to ULS doesn’t help
summary(one_fac_fit_new, standardized = T)
## lavaan 0.6-12 ended normally after 28 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 6
##
## Number of observations 2e+08
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## eta =~
## Y1 1.000 1.323 0.592
## Y2 0.571 0.000 6181.151 0.000 0.756 0.436
## Y3 2.000 0.000 4714.045 0.000 2.646 1.080
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Y1 3.250 0.000 7039.895 0.000 3.250 0.650
## .Y2 2.429 0.000 9150.327 0.000 2.429 0.810
## .Y3 -1.000 0.001 -760.286 0.000 -1.000 -0.167
## eta 1.750 0.001 3486.948 0.000 1.000 1.000
Label and constraint sig3 to be larger than 0:
<- "
one_fac_syntax_const eta =~ Y1 + Y2 + Y3
Y3~~sig3*Y3
# constraints
sig3 > 0
"
ML Estimation:
<- lavaan::sem(one_fac_syntax_const,
one_fac_fit_new2 sample.cov = S_3fac_new,
sample.nobs = n,
fixed.x = FALSE)
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov variances are negative
summary(one_fac_fit_new2, standardized = T)
## lavaan 0.6-12 ended normally after 63 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 6
## Number of inequality constraints 1
##
## Number of observations 2e+08
##
## Model Test User Model:
##
## Test statistic 806452.705
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## eta =~
## Y1 1.000 1.429 0.639
## Y2 0.571 0.000 6357.073 0.000 0.816 0.471
## Y3 1.714 0.000 11748.540 0.000 2.449 1.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Y3 (sig3) -0.000 -0.000 -0.000
## .Y1 2.958 0.000 10000.000 0.000 2.958 0.592
## .Y2 2.333 0.000 10000.000 0.000 2.333 0.778
## eta 2.042 0.000 5065.023 0.000 1.000 1.000
##
## Constraints:
## |Slack|
## sig3 - 0 0.000
fitted(one_fac_fit_new)$cov # perfect
## Y1 Y2 Y3
## Y1 5.0
## Y2 1.0 3.0
## Y3 3.5 2.0 6.0
fitted(one_fac_fit_new2)$cov # compromised, at the cost of model fit
## Y1 Y2 Y3
## Y1 5.000
## Y2 1.167 3.000
## Y3 3.500 2.000 6.000
S_3fac_new
## Y1 Y2 Y3
## Y1 5.0 1 3.5
## Y2 1.0 3 2.0
## Y3 3.5 2 6.0