# # Math 3330 Section B Fall 2001 # Testing a General Linear Hypothesis # # Small sample data set: download from 'dose.xls' # on # Dose of drug, # Sev: severity of illness, # Y: symptoms summary( dose ) plot( dose, ask = F ) pairs(dose) # trellis.device( col = F ,background.color = "Bright White") # trellis.device( background.color = "Bright White") # td() xyplot ( Y ~ Dose, dose) xyplot ( Y ~ Dose | Sev, dose ) xyplot ( Y ~ Dose , dose, groups = Sev, panel = panel.superpose ) # # Although this is a small artificial example # a number of elements make it interesting (complicated?) # # 1) categorical predictor variable with 3 levels # 2) numerical predictor associated with with cat. predictor # 3) conditional association with different sign from # marginal association # 4) interaction between cat. predictor and num. predictor # fit <- lm( Y ~ Dose, dose) # a simple doseression summary(fit) fiti <- lm( Y ~ Dose * Sev, dose ) # multiple doseression with cat. pred. and interaction summary(fiti) # Other ways of specifying the same model summary( lm( Y ~ Sev * Dose, dose )) summary(fit <- lm( Y ~ Sev / Dose, dose )) summary(fit <- lm( Y ~ Sev / Dose - 1, dose )) # Note that '-1' means REMOVE intercept # # How S-Plus fits a doseression # fit <- lm( Y ~ Sev * Dose, dose ) class(fit) mode(fit) names(fit) fit$coef fit$resid fit$fit fit$eff fit$R fit$rank fit$assign fit$df.resid fit$contrasts # helmert contrasts fit$terms fit$call fit # same as print(fit) in command line summary(fit) # this is what gets printed fit.sum <- summary(fit) names(fit.sum) fit.sum$call fit.sum$terms fit.sum$residuals fit.sum$coef fit.sum$sigma fit.sum$df fit.sum$r.squared fit.sum$fstat fit.sum$cov.unscaled fit.sum$corr # Important methods for doseression ('lm') objects: summary(fit) fiti$contrasts # coding matrix for categorical preds dose[, c(1,2,3)] # the X 'data.frame' model.matrix ( fit ) # the X matrix anova(fit) # Sequential SS drop1.aov(fit) # 'Type III SS' respecting marginality drop1.aov(fit, scope = . ~ . ) # 'Type III SS' ignoring marginality plot( fit ) coef( fit ) resid ( fit ) predict ( fit ) lm.influence ( fit ) # Exercise: How would you get y from 'fit' # # prediction # fit.add <- update( fit, . ~ Sev + Dose) fit.add dose$pred.add <- predict(fit.add) dose$pred <- predict(fit) xyplot( Y ~ Dose | Sev , data = dose, y1 = dose$pred.add, y2 = dose$pred, xx = dose$Dose, panel = function(x,y,subscripts, y1, y2, xx, ...) { i <- order(x) panel.xyplot( x, y, ... ) panel.xyplot( x[i], y1[subscripts][i], type = 'l', lty=1) panel.xyplot( x[i], y2[subscripts][i], type = 'l', lty=3) } ) # Comparing two nested models anova(fit , fit.add) anova(fit) # # prediction with new data # # create a new data frame pdf <- expand.grid ( Dose = 1:6, Sev= levels(dose$Sev) ) pdf$pred <- predict( fit, newdata = pdf ) pdf xyplot( pred ~ Dose , pdf, groups = Sev, panel = panel.superpose, type = 'l') ## ## ## Estimation ## ## fit$coef coef(fit) # Suppose we want to test whether line is the same for the two levels # of Severity # # Two approaches: # 1) fit a reduced model and compare # 2) construct a General Linear Hypothesis # # # 1) Fitting a reduced model # dose$Sev dose$SevRed <- ifelse( dose$Sev == 'control', 'control', 'other') dose$SevRed <- factor( dose$SevRed ) contrasts( dose$SevRed ) fit.red <- update( fit , . ~ SevRed * Dose ) summary(fit.red) anova(fit.red ,fit) # # 2) Generalized Linear Hypothesis # fit$contrasts fit$coef # > fit$cont # $Sev: # [,1] [,2] # control -1 -1 # high 1 -1 # low 0 2 # # > fiti$coef # (Intercept) Sev1 Sev2 Dose Sev1Dose Sev2Dose # 5.29381 4.367857 -0.1740476 -0.5795238 -0.5107143 -0.01880952 # # control 1 -1 -1 x -x -x # high 1 1 -1 x x -x # low 1 0 2 x 0 2x # # To test equality of the 'high' and 'low' lines at x, we take the difference: # # high-low 0 1 -3 0 x -3x # # To test equality of lines for 'high' and 'low' we need to test equality # at two values of x, say x = 0 and x = 1, yielding coefficients: # # high-low # at x = 0: 0 1 -3 0 0 0 # at x = 1: 0 1 -3 0 1 -3 # # So letting Lmat: # Lmat <- rbind ( c( 0,1,-3,0,0,0), c(0,1,-3,0,1,-3)) . # # # We want to test: Lmat %*% beta = 0 # # We compute Lmat %*% b and its variance matrix and construct the appropriate F test: # est <- Lmat %*% coef(fit) est fits <- summary(fit) . # We need: fit$df.residual fits$cov.unscaled fits$sigma evar.est <- Lmat %*% (fits$sigma^2 * fits$cov.unscaled) %*% t(Lmat) evar.est F.val <- t(est) %*% solve(evar.est) %*% est / 2 1 - pf( F.val, 2, fit$df.residual) # # Turning all this into a function # glh <- function(fit, Lmat) { # tests a General Linear Hypethesis # BUGS: 1) can't cope if Lmat is not of full row rank # 2) it should be easier to specify Lmat # Lmat <- rbind ( Lmat ) # turns Lmat into a row vector if it isn't est <- Lmat %*% coef(fit) fit.sum <- summary(fit) evar.est <- Lmat %*% (fit.sum$sigma^2 * fit.sum$cov.unscaled) %*% t(Lmat) df.num <- nrow(Lmat) # this assumes Lmat of full row rank df.den <- fit$df.residual F.val <- t(est) %*% solve(evar.est) %*% est / df.num c("Df Num" = df.num, "Df Denom" = df.den, "F Value" = F.val, "Pr(F)" = 1 - pf(F.val, df.num, df.den )) } glh(fit, Lmat)