# source("rlmx.s") # copyright (C) 1994-9 W. N. Venables and B. D. Ripley # # # 010329: removed rlmx.formula from code # 010329: modified assignment of nmx to dimnames to use <-list(nmx) # 010329: modified assignment of paste(...) to dimnames to use <-list(paste(...)) # 010329: changed default value of k2 (bend factor) to 1.500 from 1.345 # 010329: added default value for const.scale (0) and changes logic to use # fixed scale if const.scale > 0 rlmx <- function(x, ...) UseMethod("rlmx") rlmx.default <- function(x, y, weights, ..., w = rep(1, nrow(x)), init = "ls", psi = psi.huber, scale.est = c("MAD", "Huber", "proposal 2"), k2 = 1.500, method = c("M", "MM"), wt.method = c("case", "inv.var"), maxit = 60, acc = 1e-4, test.vec = "resid", const.scale = 0) { irls.delta <- function(old, new) sqrt(sum((old - new)^2)/max(1e-20, sum(old^2))) irls.rrxwr <- function(x, w, r) { w <- sqrt(w) max(abs((matrix(r * w, 1, length(r)) %*% x)/ sqrt(matrix(w, 1, length(r)) %*% (x^2))))/sqrt(sum(w * r^2)) } method <- match.arg(method) wt.method <- match.arg(wt.method) nmx <- deparse(substitute(x)) if(is.null(dim(x))) { x <- as.matrix(x) dimnames(x)[[2]] <- list(nmx) } else x <- as.matrix(x) if(is.null(dimnames(x)[[2]])) dimnames(x)[[2]] <- list(paste("X", seq(ncol(x)), sep="")) if(qr(x)$rank < ncol(x)) stop("x is singular: singular fits are not implemented in rlmx") if(!(any(test.vec == c("resid", "coef", "w", "NULL")) || is.null(test.vec))) stop("invalid testvec") ## deal with weights w2 <- rep(1, nrow(x)) if(!missing(weights)) { if(length(weights) != nrow(x)) stop("Length of weights must equal number of observations") if(any(weights < 0)) stop("Negative weights value") w <- w * weights if(wt.method == "inv.var") w2 <- 1/sqrt(weights) } if(method == "M") { scale.est <- match.arg(scale.est) scale <- 0 if(!is.function(psi)) psi <- get(psi, mode="function") ## match any ... args to those of psi. arguments <- list(...) if(length(arguments)) { pm <- pmatch(names(arguments), names(psi), nomatch = 0) if(any(pm == 0)) warning(paste("some of ... do not match")) pm <- names(arguments)[pm> 0] psi[pm] <- unlist(arguments[pm]) } if(is.character(init)) { if(init == "ls") temp <- lm.wfit(x, y, w, method="qr") else if(init == "lts") temp <- lqs.default(x, y, intercept=FALSE, nsamp=200) else stop("init method is unknown") coef <- temp$coef resid <- temp$resid } else { if(is.list(init)) coef <- init$coef else coef <- init resid <- y - x %*% coef } } else if(method == "MM") { scale.est <- "MM" temp <- lqs.default(x, y, intercept=FALSE, method="S", k0 = 1.548) coef <- temp$coef resid <- temp$resid psi <- psi.bisquare if(length(arguments <- list(...))) if(match("c", names(arguments), F)) { c0 <- arguments$c if (c0 > 1.548) { psi$c <- c0 } else warning("c must be at least 1.548 and has been ignored") } scale <- temp$scale } else stop("method is unknown") done <- FALSE conv <- NULL n1 <- nrow(x) - ncol(x) # # added code to use constant scale factor if const.scale > 0 # if(scale.est != "MM") scale <- ifelse(const.scale>0,const.scale,mad(resid, 0)) # if(scale.est != "MM") scale <- mad(resid, 0) theta <- 2*pnorm(k2)-1 gamma <- theta + k2^2 * (1 - theta) - 2 * k2 * dnorm(k2) # # calculation for-loop # for(iiter in 1:maxit) { if(!is.null(test.vec)) testpv <- get(test.vec) if(scale.est != "MM") # # added code to use constant scale factor if const.scale > 0 # if (const.scale>0) scale <- const.scale else { # { if(scale.est == "MAD") scale <- median(abs(resid))/0.6745 else scale <- sqrt(sum(pmin(resid^2, (k2 * scale)^2))/(n1*gamma)) if(scale == 0) { done <- TRUE break } } w <- psi(w2*resid/scale) if(!missing(weights)) w <- w * weights temp <- lm.wfit(x, y, w, method="qr") coef <- temp$coef resid <- temp$residuals if(!is.null(test.vec)) convi <- irls.delta(testpv, get(test.vec)) else convi <- irls.rrxwr(x, w, resid) conv <- c(conv, convi) done <- (convi <= acc) if(done) break } if(!done) warning(paste("rlmx failed to converge in", maxit, "steps")) if(!missing(weights)) { tmp <- (weights != 0) w[tmp] <- w[tmp]/weights[tmp] } fit <- list(coefficients = coef, residuals = resid, effects = temp$effects, rank = temp$rank, fitted.values = temp$fitted.values, assign = temp$assign, qr = temp$qr, df.residual = NA, w = w, s = scale, psi = psi, k2 = k2, conv = conv, converged = done, x = x, call = match.call()) class(fit) <- c("rlmx", "lm") fit } print.rlmx <- function(x, ...) { if(!is.null(cl <- x$call)) { cat("Call:\n") dput(cl) } if(x$converged) cat("Converged in", length(x$conv), "iterations\n") else cat("Ran", length(x$conv), "iterations without convergence\n") coef <- x$coef cat("\nCoefficients:\n") print(coef, ...) nobs <- length(x$resid) rdf <- nobs - length(coef) cat("\nDegrees of freedom:", nobs, "total;", rdf, "residual\n") cat("Scale estimate:", format(signif(x$s,3)), "\n") invisible(x) } summary.rlmx <- function(object, method=c("XtX", "XtWX"), correlation=T, ...) { method <- match.arg(method) s <- object$s coef <- object$coef cnames <- names(coef) ptotal <- length(coef) resid <- object$resid n <- length(resid) if(any(na <- is.na(coef))) coef <- coef[!na] p <- length(coef) rdf <- n - p rinv <- diag(p) dimnames(rinv) <- list(cnames, cnames) w <- object$psi(resid/s) S <- sum((resid*w)^2)/rdf psiprime <- object$psi(resid/s, deriv=1) mn <- mean(psiprime) kappa <- 1 + p*var(psiprime)/(n*mn^2) stddev <- sqrt(S)*(kappa/mn) X <- object$x if(method == "XtWX") X <- X * sqrt(w/mean(w)) R <- qr(X)$qr R <- R[1:p, 1:p, drop = FALSE] R[lower.tri(R)] <- 0 rinv <- solve(R, rinv) dimnames(rinv) <- list(cnames, cnames) rowlen <- (rinv^2 %*% rep(1, p))^0.5 names(rowlen) <- cnames if(correlation) { correl <- rinv * array(1/rowlen, c(p, p)) correl <- correl %*% t(correl) } else correl <- NULL coef <- array(coef, c(p, 3)) dimnames(coef) <- list(cnames, c("Value", "Std. Error", "t value")) coef[, 2] <- rowlen %o% stddev coef[, 3] <- coef[, 1]/coef[, 2] object <- object["call"] object$residuals <- resid object$coefficients <- coef object$sigma <- s object$stddev <- stddev object$df <- c(p, rdf, ptotal) object$r.squared <- NA object$cov.unscaled <- rinv %*% t(rinv) object$correlation <- correl object$terms <- NA class(object) <- "summary.rlmx" object } print.summary.rlmx <- function(x, digits = max(3, .Options$digits - 3), ...) { cat("\nCall: ") dput(x$call) resid <- x$residuals df <- x$df rdf <- df[2] if(rdf > 5) { cat("Residuals:\n") if(length(dim(resid)) == 2) { rq <- apply(t(resid), 1, quantile) dimnames(rq) <- list(c("Min", "1Q", "Median", "3Q", "Max"), dimnames(resid)[[2]]) } else { rq <- quantile(resid) names(rq) <- c("Min", "1Q", "Median", "3Q", "Max") } print(rq, digits = digits, ...) } else if(rdf > 0) { cat("Residuals:\n") print(resid, digits = digits, ...) } if(nsingular <- df[3] - df[1]) cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", sep = "") else cat("\nCoefficients:\n") print(format(round(x$coef, digits = digits)), quote = F, ...) cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") if(!is.null(correl <- x$correlation)) { p <- dim(correl)[2] if(p > 1) { cat("\nCorrelation of Coefficients:\n") ll <- lower.tri(correl) correl[ll] <- format(round(correl[ll], digits)) correl[!ll] <- "" print(correl[-1, -p, drop = F], quote = F, digits = digits, ...) } } invisible(x) } ###### note in the definition of the function, we use a variable psi.huber <- function(u, k=1.5, deriv=0) { if(!deriv) return(pmin(1, k / abs(u))) abs(u) <= k } #### k2 is an unused argument in psi.hampel psi.hampel <- function(u, k2=1.5, a = 2, b = 4, c = 8, deriv=0) { U <- pmin(abs(u) + 1e-50, c) if(!deriv) return(ifelse(U <= a, U, ifelse(U <= b, a, a*(c-U)/(c-b) ))/U) ifelse(abs(u) <= c, ifelse(U <= a, 1, ifelse(U <= b, 0, -a/(c-b))), 0) } #### k2 is an unused argument in psi.bisquare psi.bisquare <- function(u, k2=1.5, c = 4.685, deriv=0) { if(!deriv) return((1 - pmin(1, abs(u/c))^2)^2) t <- (u/c)^2 ifelse(t < 1, (1 - t)*(1 - 5*t), 0) }