############################################################################# # eirm_c2.R # adaptation of IRT analyses for the book "Explanatory Item # Response Models" (De Boeck and Wilson, Springer 2004) # # ------------------------------------------- # Christophe Lalanne # Centre international d'études pédagogiques # lalanne@ciep.fr # source code avalaible on: www.aliquote.org # ------------------------------------------- # # Time-stamp: <2007-08-01 22:57:05 chl> ############################################################################# # set the path where datafiles are stored wd <- "~/data/EIRM/data/Verbal aggression/" # save the graphics as eps postscript(file="eirm_c2.eps") options(width=78, digits=4) ############################################################################# #### Chapter 2 - Descriptive and explanatory item response models #### ############################################################################# #### #### Purely descriptive Rasch Model #### # load the data a <- read.table(paste(wd,"data verbal aggression matrix dichot.txt", sep=""),header=TRUE) # here, responses have already been dichotomized str(a) length(names(a)) ## gives the number of variables # we only need to work with the first 24 variables for the moment a.1 <- a[,-c(25,26)] head(a.1) # computation with the `ltm` package # http://student.kuleuven.be/~m0390867/dimitris.htm library(ltm) # Rasch model with discrimination parameter = 1 a.1.rasch.1 <- rasch(a.1,constraint=rbind(cbind(dim(a.1)[[2]] + 1, 1)) ,IRT.param=TRUE) # IRT.param=TRUE gives standard errors for coefficients estimates # (SDs are computed via Delta Method) (res.1 <- summary(a.1.rasch.1)) -2*res.1$logLik ## deviance # Note: # AIC can be obtained directly with summary(a.1.rasch.1)$AIC # see : attributes(summary(a.1.rasch.2)) # we will use this soon to compare the different models # now, we can look at the estimated difficulties (with standard errors) a.1.rasch.1.diff <- coef(a.1.rasch.1)[,1] a.1.rasch.1.diff.se <- diag(sqrt(summary(a.1.rasch.1)$Var.betas)) # and also the estimated abilities a.1.rasch.1.abil <- factor.scores(a.1.rasch.1)$score.dat[,"z1"] # and we can plot an Item-Person map plot(factor.scores(a.1.rasch.1),include.items=TRUE) # Rasch model with unconstrained discrimination parameter a.1.rasch.2 <- rasch(a.1) (res.2 <- summary(a.1.rasch.2)) -2*res.2$logLik ## deviance # ICC curves related to these two models op <- par(mfrow=c(1,2),mar=c(5,2,4,1)) plot(a.1.rasch.1,main="disc=1") plot(a.1.rasch.2,main="disc=estimated") par(op) # we do not obtain the same results as those of DB & W: # Deviance AIC BIC # 8072 8122 8216 # # default optimization method in rasch() is set to "BFGS" within # the call to optim() function, with 21 quadrature points # we fix the number of quadrature points to 20, as DB & W a.1.rasch.3 <- rasch(a.1,control=list(GHk=20)) res.3 <- summary(a.1.rasch.3) res.3$AIC res.3$BIC -2*res.3$logLik # and get an approximately convergent AIC, BIC and deviance, that is: # 8122.527, 8216.42 and 8072.527, respectively # items properties diff(range(a.1.rasch.3$coefficients[,1])) summary(a.1.rasch.3$coefficients[,1]) # These results are the same as those of DB & W except for the sign # Parameters values are also concordant (see p. 367), but the SD isn't sd(a.1.rasch.3$coefficients[,1]) # other stuff needed for computing ability dim(a.1.rasch.3$patterns$X)[1] ## gives the number of distinct ## pattern responses # model fit (a.1.rasch.3.items <- item.fit(a.1.rasch.3)) person.fit <- person.fit(a.1.rasch.3) ## TODO: give a better output # model fit using a parametric bootstrap test for Rasch models # (Pearson's chi-squared residuals) GoF.rasch(a.1.rasch.3) ## takes a lot of time... # Now, we can check what would be obtained using the `eRm`package library(eRm) a.1.rasch.4 <- RM(a.1) summary(a.1.rasch.4) # difficulty of items a.1.rasch.4.diff <- a.1.rasch.4$betapar a.1.rasch.4.diff.se <- a.1.rasch.4$se.beta # compare ltm and eRm estimation xx <- as.numeric(a.1.rasch.1.diff) yy <- as.numeric(a.1.rasch.4.diff) plot(-xx,yy,xlab=expression(paste("-",beta[i]," (eRm)")),ylab=expression(paste(beta[i]," (ltm)"))) beta.ols <- lm(yy~I(-xx)) summary(beta.ols) abline(beta.ols,lty=3,col="lightgrey") beta.slope <- coef(beta.ols)[2] r.sq <- summary(beta.ols)$r.squared legend("topleft",paste("Slope=",round(beta.slope,3),"\n","R^2=",round(r.sq,4)),bty="n") # add the X se (eRm) segments(-xx-a.1.rasch.4.diff.se,yy,-xx+a.1.rasch.4.diff.se,yy) # add the Y se (ltm) segments(-xx,yy-a.1.rasch.1.diff.se,-xx,yy+a.1.rasch.1.diff.se) ############################################################################# # same analyses using a mixed model approach # Note: # don't forget to have a look at Bates' article (JSS 2007, vol. 20) library(lme4) # we need to get to the long format b <- reshape(a.1,varying=list(names(a.1)),idvar="subj",timevar="item",direction="long",v.names="rep") b$item <- factor(b$item,labels=names(a.1)) # Note: # lmer's default : logit link, estimation is done via Laplace method (which # is the default for generalized LMM) b.lme <- lmer(rep ~ 0 + item + (1|subj), b, binomial) print(b.lme,corr=FALSE) # Fixed effects = items difficulty # these are in fact the log-odds for a positive response b.lme.diff <- fixef(b.lme) summary(b.lme.diff) c(var=var(b.lme.diff),sd=sd(b.lme.diff)) plot(density(b.lme.diff),main=expression(paste(beta[i],"estimates")),ylab="Density") # Note: # The range of difficulties is strictly the same, after reversing the sign # of the coefficients. This is because lmer is modelling beta-theta instead # of theta-beta in the SAS script of the authors. Average difficulty is also # the same as that obtained with NLMIXED. # to get estimates for a positive response on the probability scale, # we use the inverse link function binomial()$linkinv(b.lme.diff) # estimated variance of the random effect (person) show(vc <- VarCorr(b.lme)) # Note: # We cannot get sd for variance estimate as is done with SAS NLMIXED # see Bates's email posted in 2003 about `lme`: # "Other software for fitting mixed-effects models, such as SAS PROC # MIXED and HLM, return standard errors along with the estimates of the # variances and covariances of the random effects. We don't return # standard errors of estimated variances because we don't think they are # useful. A standard error for a parameter estimate is most useful when # the distribution of the estimator is approximately symmetric, and # these are not." # BLUP for random effect and caterpillar plot b.lme.rr <- ranef(b.lme,postVar=TRUE) b.lme.qq <- qqmath(b.lme.rr) print(b.lme.qq$subj) # ability estimates are stored in a slot in the object b.lme.rr b.lme.abil <- unlist(b.lme.rr@.Data) summary(b.lme.abil) var(b.lme.abil) # then, we can compute the posterior variance (which is the product of # variances and scale factor. Here, the scale factor has been constrained # to 1 b.lme.abil.sd <- as.numeric(b.lme@bVar$subj) * attr(vc,"sc")**2 summary(b.lme.abil.sd) # we could make use of estimated scale factor summary(as.numeric(b.lme@bVar$subj) * summary(b.lme)@sigma**2) # Finally, model fit can be assessed using Deviance, AIC and BIC. # Here, we get b.lme@deviance # or more concisely summary(b.lme)@AICtab # Note: # Standard indices for model fit (AIC, BIC, LogLik and Deviance) differ # from NLMIXED and `ltm` output. However, it should be noted that lmer used # 11 quadrature points per random effect # It would be nice if we could compare with an adaptive quadrature # approximation (instead of the default Laplace one), but it is still lacking # at the time of this writings. ## b.lme2 <- lmer(rep ~ 0 + item + (1|subj), b, binomial,method="AGQ") ## # gives an error like ## # method = "AGQ" not yet implemented for supernodal representation ############################################################################# #### #### Latent Regression Rasch Model #### # we now make use of the two covariates # a$Geslacht, aka gender, coded 0 (female) and 1 (male) # a$Anger, trait anger, raw score (reference mean score = 20, sd = 4.85) c <- reshape(a.1,varying=list(names(a.1)),idvar="subj",timevar="item",direction="long",v.names="rep") c$item <- factor(c$item,labels=names(a.1)) # we add the two covariates c$Sex <- rep(as.factor(a$Geslacht),nrow(c)/length(a$Geslacht)) c$Anger <- rep(a$Anger,nrow(c)/length(a$Anger)) summary(c) # numerical summary for Trait Anger summary(c$Anger) sd(c$Anger) # table of counts for men and women c.tab.sex <- table(c$subj,c$Sex) apply(c.tab.sex,2,function(x) sum(x!=0)) require(lme4) ## if not already loaded c.lme <- lmer(rep ~ 0 + Anger + Sex + item + (Anger+Sex|subj), c, binomial) print(c.lme,corr=FALSE) # model fit summary(c.lme)@AICtab # fixed effect c.lme.diff <- fixef(c.lme) summary(c.lme.diff) c(var=var(c.lme.diff),sd=sd(c.lme.diff)) # ability c.lme.rr <- ranef(c.lme,postVar=TRUE) c.lme.qq <- qqmath(c.lme.rr) print(c.lme.qq) c.lme.abil <- c.lme.rr$subj[,1] # comparing the two models with the LR test # anova(b.lme,c.lme) doesn't seem to work, so we calculate the LR statistic # by hand 1-pchisq(-2*(as.numeric(logLik(b.lme))-as.numeric(logLik(c.lme))),2) # we get quite the same result (chi^2(2)=12.6 in W & DB, when comparing two # deviances of 8072 vs. 8060). # Conclusion: # Overall, there is a slight improvement in model fit when using the Latent # Regression Rasch Model. ############################################################################# #### #### Linear Logistic Test Model (LLTM) #### # We need to have three items predictors, codes in 4 factors, namely # Behavior Mode # do = 1 # want = 0 # # Situation Type # Other-to-blame = 1 # Self-to-blame = 0 # # Behavior Type # Curse, Scold = 1/2 and Shout = -1 # Curse, Shout = 1/2 and Scold = -1 # # we use the data.frame constructed when evaluation the standard Rasch model # and add the corresponding factors b$Behav.Mode <- as.factor(ifelse(regexpr("want",b$item,ignore.case=TRUE)!=-1,0,1)) b$Situa.Type <- as.factor(ifelse(regexpr("S1",b$item)!=-1,1,0)) b$Behav.Type.1 <- as.factor(ifelse(regexpr("curse",b$item)!=-1 | regexpr("Scold",b$item)!=-1,1/2,-1)) b$Behav.Type.2 <- as.factor(ifelse(regexpr("curse",b$item)!=-1 | regexpr("Shout",b$item)!=-1,1/2,-1)) summary(b) # Note: # I haven't try to fit such a model using lmer... require(eRm) ## if not already loaded # This package allows to fit several model, including LLTM, and it is # based on conditional likelihood. # Check the original article published in JSS: # Mair, P., and Hatzinger, R. (2007). Extended Rasch modeling: The # eRm package for the application of IRT models in R. Journal of # Statistical Software, 20(9), 1-20. # Note, however, that estimation is not strictly the same as with ltm # or NLMIXED and so differing results may be obtained. # # Be careful! The data matrix must be in "short" format (person x item), # and we need to build the design matrix by hand a.1.W <- matrix(c(rep(c(0,1),c(12,12)), rep(c(1,0,1,0),each=6), rep(c(.5,.5,-1),8), rep(c(.5,-1,.5),8)),ncol=4) # next, we can fit the model a.1.lltm <- LLTM(a.1,W=a.1.W) summary(a.1.lltm) # as proposed by Mair & Hatzinger, we may check the model against the # simpler RM model a.1.rm <- RM(a.1) summary(a.1.rm) # difficulty are obtained using (a.1.lltm.diff <- a.1.lltm$betapar) a.1.lltm.diff.se <- a.1.lltm$se.beta summary(a.1.lltm.diff) yy <- sort(a.1.lltm.diff) # plot the difficulty estimates + approximate CI op <- par(mar=c(8,4,4,2)) plot(yy,xlab="",ylab=expression(beta),las=1,axes=F) lines(1:24,lowess(yy,f=1/3)$y) axis(2,at=seq(-2,2,.5)) mtext(colnames(a.1),side=1,line=2,at=1:24,las=2) segments(1:24,yy-a.1.lltm.diff.se*1.96,1:24,yy+a.1.lltm.diff.se*1.96) legend("topleft",expression(paste(beta%+-%1.96,"se")),bty="n") title("LLTM Model fitted to Aggression Data") par(op) # a QQ-plot qqnorm(a.1.lltm.diff) qqline(a.1.lltm.diff) vcov(a.1.lltm) # model fit logLik(a.1.lltm) # and thus deviance is computed as -2*logLik(a.1.lltm) # ability estimates a.1.lltm.abil <- as.numeric(unlist(person.parameter(a.1.lltm)$thetapar)) head(a.1.lltm.abil) # hist(a.1.lltm.abil,freq=FALSE,plot=FALSE) # lines(density(a.1.lltm.abil)) # Note: # Be careful! There is an error in help function which indicates that # person.parameters() should be used while the correct function is # person.parameter() (eRm version 0.9-6, 2008-01-20) ############################################################################# #### #### Latent Regression LLTM #### # # still to do... # ############################################################################## # a quick summary of all the results source("eirm_c2_summary.R") # stop the eps device dev.off()