# # Week7a_BLF.SSC (updated in Week 8) # MATH 3330 Section B # Fall 2001 # # Analysis of salary and job evaluation data # from a big law firm (synthetic data based on # a true story) # # Import blf from blf.xls on the web site # blf <- blf.orig summary(blf) pairs(blf, panel = panel.xyell) pairs(blf[,-1], panel = panel.xyell) # # Extra sums of squares # Testing whether Knowledge is enough to predict salary # fit.full <- lm( Salary ~ Knowledge + Experience + Communication, blf) summary( fit.full ) anova( fit.full ) fit.null <- lm( Salary ~ Knowledge, blf) summary( fit.null ) anova( fit.null ) anova( fit.null, fit.full) # compares two fits # # Extra Sums of Squares and the overall F test # # fit an intercept-only model fit.int <- lm( Salary ~ 1, blf) # 1 is there by default summary(fit.int) # note that S-Plus is unhappy when it # tries to do an overall F on this one summary( fit.full ) # compare overall F with following: anova( fit.int, fit.full) # # Dummy variable for Gender # blf # Define a dummy variable blf$Female <- 1 * ( blf$Gender == 'F' ) blf xyplot( Salary ~ Experience, blf) xyplot( Salary ~ Experience | Gender, blf) xyplot( Salary ~ Experience | Gender, blf, panel = panel.xyell) fit1 <- lm( Salary ~ Experience, blf) summary(fit1) fit2 <- lm( Salary ~ Experience + Female, blf ) summary(fit2) fit3 <- lm( Salary ~ Female, blf ) # regressing on a dummy variable summary(fit3) # Showing the fitted values pred <- expand.grid( Experience = 1:7, Female = 0:1) pred$Salary <- predict( fit2, newdata = pred ) xyplot( Salary ~ Experience | Female, pred, type ='l') xyplot( Salary ~ Experience , pred, groups = Female, type = 'l', panel = panel.superpose) # # What if the lines are not necessarily parallel (7.3) # fit4 <- lm( Salary ~ Experience * Female, blf ) summary(fit4) pred$Salary4 <- predict( fit4, newdata = pred ) xyplot( Salary4 ~ Experience , pred, groups = Female, type = 'l', panel = panel.superpose) # # Using categorical variables directly (other codings) # # default coding for a factor: contrasts( blf$Gender ) # fitting model directly with a factor fit5 <- lm( Salary ~ Experience + Gender, blf ) summary( fit5 ) fit5$contrasts # compare with summary( fit2 ) pred <- expand.grid( Experience = 0:7, Gender = levels(blf$Gender)) pred$Salary <- predict( fit5, newdata = pred ) xyplot( Salary ~ Experience , pred, groups = Gender, type = 'l', panel = panel.superpose, key = list( columns = 2, text = list( levels(pred$Gender)), lines = Rows( trellis.par.get('superpose.line'), 1:2 )))