# # Math 3330 Section B Fall 2001 # Diagnostics # # # Graphical diagnostics # # # define a generic function (ambitious plans for the future) # diags <- function(x, ...) UseMethod("diags") # # define a method for objects of class 'lm' # diags.lm <- function(x, y, ..., ask, labels = names(residuals(x)), showlabs = text) { # diags.lm # graphical diagnostics for lm, locally first-order for glm # enlarged version of plot.lm with emphasis on diagnostics # G. Monette, Dec. 94 # modified Nov. 97, May 98 # if(!missing(ask)) { op <- par(ask = ask) on.exit(par(op)) } form <- formula(x) f <- predict(x) r <- residuals(x) nams <- names(r) if(!missing(labels)) { nams <- names(residuals(x)) # # if labels not same length as residuals assume it's a vector # of len == original data and select elements included in residuals if(length(nams) != length(labels)) labels <- labels[nams] } ret <- NULL if(missing(y)) { y <- f + r yname <- deparse(form[[2]]) } else yname <- deparse(substitute(y)) fname <- paste("Fitted:", deparse(form[[3]]), collapse = " ") plot(f, y, xlab = fname, ylab = yname, main = "Residual vs. Predicted", ...) abline(0, 1, lty = 2) showlabs(f, y, labels) # # get influence diags and model matrix while looking at first plot # lmi <- lm.influence(x) hat <- lmi$hat sigma <- lmi$sigma # drop 1 sigma mm <- scale(model.matrix(x), scale = F) # centres each column mp <- predict(x, type = "terms") comp.res <- mp + r # effect + residual # # Absolute residual vs. predicted # plot(f, abs(r), xlab = fname, ylab = deparse(substitute(abs(resid(x))) ), main = "Absolute Residual vs. Predicted", ...) showlabs(f, abs(r), labels) # # # Normal quantile plot # zq <- qqnorm(r, main = "Normal Quantile Plot", ylab = "Residual", sub = fname) qqline(r) showlabs(zq, labels) # # # Symmetry plot of residuals (Lawrence C. Hamilton, doseression with # Graphics, Duxbury, 1992) n <- length(r) r.o <- sort(r) half <- (n + 1)/2 if(n %% 2 == 1) { # n is odd med <- r.o[half] below <- med - r.o[half:1] above <- r.o[half:n] - med } else { # n is even med <- sum(r.o[c(half, half + 1)])/2 below <- med - r.o[(n/2):1] above <- r.o[(n/2 + 1):n] - med } opt <- par(pty = "s") ran <- range(c(below, above)) plot(below, above, main = "Symmetry plot of residuals", xlab = "Distance below median", ylab = "Distance above median", xlim = ran, ylim = ran) abline(0, 1, lty = 2) par(opt) # # # Studentized residual vs. leverage # std.r <- r/(sigma * sqrt(1 - hat)) plot(hat, std.r, xlab = "Leverage (hat)", ylab = yname, sub = fname, main = "Studentized residual vs. Leverage", ...) showlabs(hat, std.r, labels) # plot(lmi$sig, std.r) # # # Added variable plot (partial doseression leverage plot) # and partial residual plot # dr1 <- drop1(x, . ~ ., keep = c("residuals", "x.residuals")) drk <- dr1$keep # print(dimnames(drk)[[1]]) for(ii in dimnames(drk)[[1]]) { # print(paste("Doing AVP for", ii)) xr <- drk[[ii, "x.residuals"]] yr <- drk[[ii, "residuals"]] if(is.null(dim(xr))) { # one degree of freedom effect symbols(xr, yr, circles = hat, inches = max(hat), xlab = paste("Residual of", ii, "on other predictors"), ylab = paste( "Residual of", yname, "on all but", ii), main = paste("Added variable plot for", ii), sub = "radius of circles proportional to leverage") abline(lm(yr ~ xr)) lines(supsmu(xr, yr)) showlabs(xr, yr, labels) # partial residual plot symbols(mm[, ii], comp.res[, ii], circles = hat, inches = max(hat), xlab = paste(ii, "- mean(", ii, ")"), ylab = paste("Residual of", yname, "plus component for", ii), main = paste( "Partial residual plot for", ii), sub = "radius of circles proportional to leverage") lines(supsmu(mm[, ii], comp.res[, ii])) showlabs(mm[, ii], comp.res[, ii], labels) } else { symbols(mp[, ii], comp.res[, ii], circles = hat, inches = max(hat), xlab = paste("Effect of", ii), ylab = paste("Residual of", yname, "plus component for", ii), main = paste( "Modified partial residual plot for", ii), sub = "radius of circles proportional to leverage") lines(supsmu(mp[, ii], comp.res[, ii])) showlabs(mp[, ii], comp.res[, ii], labels) for(iii in dimnames(xr)[[2]]) { symbols(xr[, iii], yr, circles = hat, inches = max(hat), xlab = paste("Residual of", iii, "on all predictors but", ii), ylab = paste( "Residual of", yname, "on all but", ii), main = paste( "Modified added variable plot for", iii), sub = "radius of circles proportional to leverage" ) lines(supsmu(xr[, iii], yr)) showlabs(xr[, iii], yr, labels) } } } # # effect of dropping one observation DFBETA # pairs(lmi$coefficients) # main = "Effect of dropping one case", sub = fname) invisible(0) } # # let's try this monster: # diags(fit) # Exercise: Use diagnostics on the analysis of salary # data for the 'Big Law Firm' (blf)