# # MATH 3330 B, Fall 2001 # S-Plus functions for the course # # Copy this file as a script and run it under S-Plus to # create the functions used in MATH 3330 # # Last change: 01-10-15 # data.ell <- function(x, y, radius = 1,...) { # # Draws a data ellipse by transforming and translating a circle # # Latest version: 01-10-15 dmat <- cbind( x, y ) v <- var( dmat ) m <- apply( dmat, 2, mean) rads <- 2* pi * (0:100) / 100 circle <- cbind(cos(rads), sin(rads)) lines ( t(m + radius * t( circle %*% chol(v) )),...) } td <- function( color=F, background.color="Bright White",...){ # open a trellis graphics window with no color trellis.device( color=color, background.color=background.color, ...) } stack <- function( df1, df2 , nams = intersect( names(df1), names(df2)), Type. = 1:2) { # # stacks two data frames like rbind but keeps only names in common # adds a Type. variable to distinguish provenance # # BUGS: 1)should handle any number of data.frames # 2)should check to see if 'Type.' already used # ret <- rbind( df1[,nams], df2[, nams] ) ret$Type. <- rep( Type. , c( nrow( df1), nrow( df2 ))) ret } # Better version of stack to handle more than one data.frame stack <- function( ... , nams , Type. ) { # # stacks any number of data.frames like rbind but keeps only names in common # adds a Type. variable to distinguish provenance # # BUGS: should check to see if 'Type.' already used # dfs <- list(...) if ( missing(nams) ) { nams <- names(dfs[[1]]) for ( i in 2:length(dfs)) { nams <- intersect( nams, names( dfs[[i]] )) } } if ( missing(Type.) ) Type. <- 1:length(dfs) dfs <- lapply( dfs, function(x,nams) x[,nams],nams) ret <- do.call('rbind', dfs) ret$Type. <- rep( Type. , sapply(dfs,nrow)) ret } # # panel function that includes data ellipse # panel.xyell <- function(x, y, ...) { panel.xyplot(x,y,...) data.ell(x,y,...) } # Week 11: glh glh <- function(fit, Lmat) { # tests a General Linear Hypethesis # BUGS: 1) can't cope if Lmat is not of full row rank # 2) it should be easier to specify Lmat # Lmat <- rbind ( Lmat ) # turns Lmat into a row vector if it isn't est <- Lmat %*% coef(fit) fit.sum <- summary(fit) evar.est <- Lmat %*% (fit.sum$sigma^2 * fit.sum$cov.unscaled) %*% t(Lmat) df.num <- nrow(Lmat) # this assumes Lmat of full row rank df.den <- fit$df.residual F.val <- t(est) %*% solve(evar.est) %*% est / df.num c("Df Num" = df.num, "Df Denom" = df.den, "F Value" = F.val, "Pr(F)" = 1 - pf(F.val, df.num, df.den )) } # Week 11: diags diags <- function(x, ...) UseMethod("diags") # # define a method for objects of class 'lm' # diags.lm <- function(x, y, ..., ask, labels = names(residuals(x)), showlabs = text) { # diags.lm # graphical diagnostics for lm, locally first-order for glm # enlarged version of plot.lm with emphasis on diagnostics # G. Monette, Dec. 94 # modified Nov. 97, May 98 # if(!missing(ask)) { op <- par(ask = ask) on.exit(par(op)) } form <- formula(x) f <- predict(x) r <- residuals(x) nams <- names(r) if(!missing(labels)) { nams <- names(residuals(x)) # # if labels not same length as residuals assume it's a vector # of len == original data and select elements included in residuals if(length(nams) != length(labels)) labels <- labels[nams] } ret <- NULL if(missing(y)) { y <- f + r yname <- deparse(form[[2]]) } else yname <- deparse(substitute(y)) fname <- paste("Fitted:", deparse(form[[3]]), collapse = " ") plot(f, y, xlab = fname, ylab = yname, main = "Residual vs. Predicted", ...) abline(0, 1, lty = 2) showlabs(f, y, labels) # # get influence diags and model matrix while looking at first plot # lmi <- lm.influence(x) hat <- lmi$hat sigma <- lmi$sigma # drop 1 sigma mm <- scale(model.matrix(x), scale = F) # centres each column mp <- predict(x, type = "terms") comp.res <- mp + r # effect + residual # # Absolute residual vs. predicted # plot(f, abs(r), xlab = fname, ylab = deparse(substitute(abs(resid(x))) ), main = "Absolute Residual vs. Predicted", ...) showlabs(f, abs(r), labels) # # # Normal quantile plot # zq <- qqnorm(r, main = "Normal Quantile Plot", ylab = "Residual", sub = fname) qqline(r) showlabs(zq, labels) # # # Symmetry plot of residuals (Lawrence C. Hamilton, doseression with # Graphics, Duxbury, 1992) n <- length(r) r.o <- sort(r) half <- (n + 1)/2 if(n %% 2 == 1) { # n is odd med <- r.o[half] below <- med - r.o[half:1] above <- r.o[half:n] - med } else { # n is even med <- sum(r.o[c(half, half + 1)])/2 below <- med - r.o[(n/2):1] above <- r.o[(n/2 + 1):n] - med } opt <- par(pty = "s") ran <- range(c(below, above)) plot(below, above, main = "Symmetry plot of residuals", xlab = "Distance below median", ylab = "Distance above median", xlim = ran, ylim = ran) abline(0, 1, lty = 2) par(opt) # # # Studentized residual vs. leverage # std.r <- r/(sigma * sqrt(1 - hat)) plot(hat, std.r, xlab = "Leverage (hat)", ylab = yname, sub = fname, main = "Studentized residual vs. Leverage", ...) showlabs(hat, std.r, labels) # plot(lmi$sig, std.r) # # # Added variable plot (partial doseression leverage plot) # and partial residual plot # dr1 <- drop1(x, . ~ ., keep = c("residuals", "x.residuals")) drk <- dr1$keep # print(dimnames(drk)[[1]]) for(ii in dimnames(drk)[[1]]) { # print(paste("Doing AVP for", ii)) xr <- drk[[ii, "x.residuals"]] yr <- drk[[ii, "residuals"]] if(is.null(dim(xr))) { # one degree of freedom effect symbols(xr, yr, circles = hat, inches = max(hat), xlab = paste("Residual of", ii, "on other predictors"), ylab = paste( "Residual of", yname, "on all but", ii), main = paste("Added variable plot for", ii), sub = "radius of circles proportional to leverage") abline(lm(yr ~ xr)) lines(supsmu(xr, yr)) showlabs(xr, yr, labels) # partial residual plot symbols(mm[, ii], comp.res[, ii], circles = hat, inches = max(hat), xlab = paste(ii, "- mean(", ii, ")"), ylab = paste("Residual of", yname, "plus component for", ii), main = paste( "Partial residual plot for", ii), sub = "radius of circles proportional to leverage") lines(supsmu(mm[, ii], comp.res[, ii])) showlabs(mm[, ii], comp.res[, ii], labels) } else { symbols(mp[, ii], comp.res[, ii], circles = hat, inches = max(hat), xlab = paste("Effect of", ii), ylab = paste("Residual of", yname, "plus component for", ii), main = paste( "Modified partial residual plot for", ii), sub = "radius of circles proportional to leverage") lines(supsmu(mp[, ii], comp.res[, ii])) showlabs(mp[, ii], comp.res[, ii], labels) for(iii in dimnames(xr)[[2]]) { symbols(xr[, iii], yr, circles = hat, inches = max(hat), xlab = paste("Residual of", iii, "on all predictors but", ii), ylab = paste( "Residual of", yname, "on all but", ii), main = paste( "Modified added variable plot for", iii), sub = "radius of circles proportional to leverage" ) lines(supsmu(xr[, iii], yr)) showlabs(xr[, iii], yr, labels) } } } # # effect of dropping one observation DFBETA # pairs(lmi$coefficients) # main = "Effect of dropping one case", sub = fname) invisible(0) }