"sir.robust"<- function(data, H) { n <- dim(data)[1] p <- dim(data)[2] - 1 x <- data[, 2:(p + 1)] # # # Determine S-estimator for standardization step (the first step in the SIR-procedure) # # start.est <- cov.mcd(x) # # Determine S-estimator for location and scatter(covariance), using estimators # start.est as the basis for iteration. # Matrix C1 is the estimated variance-covariance matrix, T1 is corresponding # location estimator # s.schaetzer <- mod.s.schaetzer(x, start.est[[2]], start.est[[1]]) c1 <- s.schaetzer[[2]] t1 <- s.schaetzer[[1]] # # # Standardize X-Matrix using T1 and C1^(-1/2) # x <- scale(x, center = t1, scale = F) c1.inv <- solve(c1) c1.inv.sva <- svd(c1.inv) c1.inv.wurzel <- t(c1.inv.sva$v %*% (t(c1.inv.sva$u) * sqrt(c1.inv.sva$ d))) x <- x %*% c1.inv.wurzel # # # Recombine original Y-vector and standardized X-matrix to a matrix. # Sort x-vectors according to the sorted Y-values (sort Y from smallest to largest) # data[, 2:(p + 1)] <- x data.sort <- data[order(data[, 1]), ] # # # Determine size of slices as [n/H]; check whether division results in equally # sized slices; if not distribute remaining n-[n/H] observations to the slices in # such a manner that the first slice gets the X-observations corresponding to the # [n/H]+1 smallest Y-values the second slice the next [n/H]+ 1 Y-values # and so on, until all "remaining" observations are distributed. # The last slices then contain exactly [n/H] observations. The vector "scheiben" (slices) # contains the absolute frequency of observations for a given slice. # The vectors "scheiben.anfang" and "scheiben.ende" contain indicies of the first and last # observation for a given slide, that means slice i starts with X[scheiben.anfang[i],] and ends # with X[scheiben.ende[i],] (where X is sorted according to the sorted Y-values) # scheibengr <- floor(n/H) rest <- n - H * scheibengr scheiben <- rep(scheibengr, H) if(rest != 0) scheiben[1:rest] <- scheiben[1:rest] + 1 scheiben.ende <- cumsum(scheiben) scheiben.anfang <- c(1, (scheiben.ende + 1)[1:(H - 1)]) # # # # Determine L1-median for each slice # scheiben.l1 <- matrix(0, nrow = H, ncol = p) for(i in 1:H) { zwischen <- data.sort[scheiben.anfang[i]:scheiben.ende[i], 2:(p + 1)] zwischen <- l1.median(zwischen) scheiben.l1[i, ] <- zwischen } # # # # Eigenvalues and eigenvectors of the PP-Covariance estimator. # The estimator itself will not be determined explicitly, because # only the eigenvalues and eigenvectors will be needed. # "richtungen" containes all backtransformed eigenvectors; the # eigenvectors corresponding to the K largest eigenvalues # will then form the relevant dr-directions. # eigen.w.v <- pp.suche.light.start(scheiben.l1) eigenwerte <- eigen.w.v[[1]] eigenvektoren <- eigen.w.v[[2]] richtungen <- c1.inv.wurzel %*% eigenvektoren return(richtungen) } "l1.median"<- function(data) { n <- dim(data)[1] p <- dim(data)[2] eps <- 9.999999999999998e-008 found <- F iterate <- T result <- vector("numeric", p) index <- 1:n data.auswahl <- data[index, ] # # ######################################################### while(iterate == T) { # ######################################################### # # data.auswahl <- data.auswahl[index, ] if(is.null(dim(data.auswahl))) res <- data.auswahl else res <- apply(data.auswahl, 2, mean) result <- result + res data.ausw.n <- dim(data.auswahl)[1] # # ######################################################### # Computation of function, gradient and norm of gradient# ######################################################### # res <- rep(res, n) res <- matrix(res, nrow = n, byrow = T) data <- data - res data.norm <- data^2 data.norm.sum <- apply(data.norm, 1, sum) data.sum <- data.norm.sum[data.norm.sum > eps] data.hilf <- data[data.norm.sum > eps, ] mult <- length(data.norm.sum) - length(data.sum) data.sum <- sqrt(data.sum) r <- 1/data.sum f <- sum(data.sum) summe <- sum(r) r <- rep(r, p) r <- matrix(r, ncol = p) g <- data.hilf * r g <- apply(g, 2, sum) * (-1) norm <- g^2 norm <- (sum(norm)) norm <- sqrt(norm) # # ######################################################### # Check for extremal points # ######################################################### # if(norm < mult + eps) found <- T # # ######################################################### # Simplex ######################################################### # if(!is.null(data.ausw.n) & (!found)) { data.auswahl <- data[index, ] data.ausw.n <- dim(data.auswahl)[1] g1 <- rep(g, data.ausw.n) g1 <- matrix(g1, nrow = data.ausw.n, byrow = T) ang <- g1 * data.auswahl ang <- apply(ang, 1, sum) index <- 1:data.ausw.n index <- index[ang <= 0] if(length(index) == 0) iterate <- F } else iterate <- F } if(!found) { r <- (mult/norm - 1)/summe res <- r * g } # # ######################################################### # Newton-Raphson ######################################################### # while(!found) { mult <- 0 g <- vector("numeric", p) result <- result + res q <- matrix(0, nrow = p, ncol = p) res1 <- rep(res, n) res1 <- matrix(res1, nrow = n, byrow = T) data <- data - res data.norm <- data^2 data.norm.sum <- apply(data.norm, 1, sum) data.sum <- data.norm.sum[data.norm.sum != 0] if(length(data.sum) != 0) { data.hilf <- data[data.norm.sum != 0, ] data.sum <- sqrt(data.sum) r <- 1/data.sum r <- rep(r, p) r <- matrix(r, ncol = p) g <- data.hilf * r g <- apply(g, 2, sum) * (-1) rr <- r^3 zwischen <- data.hilf * rr q <- crossprod(zwischen, data.hilf) * (-1) # # ################################################################# # q als obere Dreiecksmatrix ################################################################# # q[row(q) > col(q)] <- 0 } norm <- sqrt(sum(g^2)) if(norm <= eps) { found <- T # } if(!found) { ifault <- 1 if((q[1, 1] + summe) <= 0) found <- T # } if(!found) { q[1, 1] <- sqrt(q[1, 1] + summe) q[2:p, 1] <- t(q[1, 2:p]/q[1, 1]) qhilf <- q[2:p, 1:(p - 1)] tmp <- (diag(q)) tmp <- tmp[2:p] if(p > 2) qhilf[row(qhilf) < col(qhilf)] <- 0 qhilf <- qhilf^2 if(p > 2) qhilf <- apply(qhilf, 1, sum) tmp <- tmp - qhilf if(sum(tmp <= 0) > 0) found <- T # if(!found) { tmp <- sqrt(tmp) tmp <- c(q[1, 1], tmp) diag(q) <- tmp if(p > 2) { qhilf <- q for(j in 2:p) { if((j + 1) <= p) for(k in (j + 1):p) { for(i in 1:(j - 1)) { qhilf[j, k] <- qhilf[j, k] - q[j, i] * q[k, i] } q[k, j] <- qhilf[j, k]/q[j, j] } } } } } if(!found) { res[1] <- g[1]/q[1, 1] * (-1) for(j in 2:p) { tmp <- g[j] * (-1) for(i in 1:(j - 1)) { tmp <- tmp - q[j, i] * res[i] } res[j] <- tmp/q[j, j] } res[p] <- res[p]/q[p, p] for(j2 in 1:(p - 1)) { tmp <- res[(p - j2)] for(j in (p - j2 + 1):p) { tmp <- tmp - q[j, (p - j2)] * res[j] } res[(p - j2)] <- tmp/q[(p - j2), (p - j2)] } ifault <- 0 } # # found <- T } return(result) } "pp.suche.light"<- function(daten, projekt.matrix) { eps <- 1e-006 n <- dim(daten)[1] p <- dim(daten)[2] dirs1.da <- F dirs2.da <- F l1med <- l1.median(daten) daten.stand <- scale(daten, center = l1med, scale = F) norm <- apply(daten.stand, 1, vecnorm) zwischen <- sum(norm < eps) if(zwischen == n) { cat("Fehler: alle Beobachtungen = L1-Median; Abbruch PP-Suche-Light \n" ) return(0) } daten.neu <- daten.stand[norm >= eps, ] norm <- norm[norm >= eps] norm <- matrix(rep(norm, p), ncol = p) testdir <- daten.neu/norm testdir <- testdir %*% projekt.matrix norm <- apply(testdir, 1, vecnorm) testdir <- testdir[norm >= eps, ] if(sum(norm >= eps) > 0) dirs1.da <- T if(dirs1.da) { norm <- norm[norm >= eps] norm <- matrix(rep(norm, p), ncol = p) testdir <- testdir/norm proj.vec.mat <- daten %*% t(testdir) krit <- apply(proj.vec.mat, 2, rcq.schaetzer) dirs1 <- rbind(t(testdir), krit) dirs1 <- dirs1[, rev(order(dirs1[(p + 1), ]))] } daten.neu <- daten[1, ] for(j in 2:n) daten.neu <- rbind(daten.neu, (daten[1:(n - j + 1), ] - daten[ j:n, ])) daten.neu <- daten.neu[-1, ] norm <- apply(daten.neu, 1, vecnorm) daten.neu <- daten.neu[norm >= eps, ] norm <- norm[norm >= eps] norm <- matrix(rep(norm, p), ncol = p) testdir <- daten.neu/norm testdir <- testdir %*% projekt.matrix norm <- apply(testdir, 1, vecnorm) testdir <- testdir[norm >= eps, ] if(sum(norm >= eps) > 0) dirs2.da <- T if(dirs2.da) { norm <- norm[norm >= eps] norm <- matrix(rep(norm, p), ncol = p) testdir <- testdir/norm proj.vec.mat <- daten %*% t(testdir) krit <- apply(proj.vec.mat, 2, rcq.schaetzer) dirs2 <- rbind(t(testdir), krit) dirs2 <- dirs2[, rev(order(dirs2[(p + 1), ]))] } if(dirs1.da & dirs2.da) { dirs <- cbind(dirs1, dirs2) dirs <- dirs[, rev(order(dirs[(p + 1), ]))] krit <- dirs[(p + 1), ] richtg <- dirs[1:p, ] best.krit <- krit[1] best.dir <- richtg[, 1] # ergebnis <- list(best.krit, best.dir) } else { if(dirs1.da) { dirs1 <- dirs1[, rev(order(dirs1[(p + 1), ]))] krit <- dirs1[(p + 1), ] richtg <- dirs1[1:p, ] best.krit <- krit[1] best.dir <- richtg[, 1] ergebnis <- list(best.krit, best.dir) } if(dirs2.da) { dirs2 <- dirs2[, rev(order(dirs2[(p + 1), ]))] krit <- dirs2[(p + 1), ] richtg <- dirs2[1:p, ] best.krit <- krit[1] best.dir <- richtg[, 1] ergebnis <- list(best.krit, best.dir) } if(!dirs1.da & !dirs2.da) ergebnis <- list(-10, 0) } return(ergebnis) } "pp.suche.light.start"<- function(daten) { p <- dim(daten)[2] proj <- diag(p) ident <- diag(p) eigenwerte <- 0 for(i in 1:p) { zwischen <- pp.suche.light(daten, proj) if(zwischen[[1]] == -10) { cat("Daten ergeben hoechstens", (i - 1), "Richtungen \n") break } eigenwerte <- c(eigenwerte, zwischen[[1]]) if(i == 1) { eigenvektoren <- zwischen[[2]] mat <- outer(eigenvektoren, eigenvektoren) } else { eigenvektoren <- cbind(eigenvektoren, zwischen[[2]]) mat <- eigenvektoren %*% t(eigenvektoren) } proj <- ident - mat } eigenwerte <- eigenwerte[-1] eigenwerte <- eigenwerte^2 zwischen <- rbind(eigenvektoren, eigenwerte) zwischen <- zwischen[, rev(order(zwischen[(p + 1), ]))] eigenwerte <- zwischen[(p + 1), ] names(eigenwerte)[1] <- "eigenwerte" eigenvektoren <- zwischen[ - (p + 1), ] return(list(eigenwerte, eigenvektoren)) } "rcq.schaetzer"<- function(daten) { n <- length(daten) h <- trunc(n/2) + 1 k <- trunc((h * (h - 1))/2) y <- sort(daten) left <- (n + 1):2 right <- rep(n, n) jhelp <- trunc((n * (n + 1))/2) knew <- k + jhelp n.links <- jhelp n.rechts <- n^2 found <- F while((n.rechts - n.links > n) & (!found)) { jhelp <- left + trunc((right - left + 1)/2) jhelp <- jhelp * (left <= right) zwischen <- n + 1 - jhelp work <- y - y[(zwischen)] work <- work[2:n] weight <- (right - left + 1) * (left <= right) weight <- weight[2:n] weight <- weight[!is.na(work)] work <- work[!is.na(work)] trial <- whimed(work, weight) vergleich <- round(100000 * trial + 0.5)/100000 P <- vector("numeric", n) Q <- vector("numeric", n) j <- 0 for(i in n:1) { while((j < n) & (round(100000 * (y[i] - y[n - j]) + 0.5 )/100000 < vergleich)) j <- j + 1 P[i] <- j } j <- n + 1 for(i in 1:n) { while((j > 1) & (round(100000 * (y[i] - y[n - j + 2]) + 0.5)/100000 > vergleich)) j <- j - 1 Q[i] <- j } P.summe <- sum(P) Q.summe <- sum(Q) - n if(knew <= P.summe) { right <- P n.rechts <- P.summe } else { if(knew > Q.summe) { left <- Q n.links <- Q.summe } else { Qv <- trial found <- T } } } if(!found) { work <- 0 for(i in 2:n) { if(left[i] <= right[i]) { for(j in left[i]:right[i]) { zwischen <- y[i] - y[n - j + 1] work <- c(work, zwischen) } } } work <- work[2:(length(work))] Qv <- sort(work) Qv <- Qv[(knew - n.links)] } # ################################################################# if(n <= 9) { if(n == 2) dn <- 0.399 if(n == 3) dn <- 0.994 if(n == 4) dn <- 0.512 if(n == 5) dn <- 0.844 if(n == 6) dn <- 0.611 if(n == 7) dn <- 0.857 if(n == 8) dn <- 0.669 if(n == 9) dn <- 0.872 } else { if(n %% 2 == 1) { dn <- n/(n + 1.4) } if(n %% 2 == 0) { dn <- n/(n + 3.8) } } Qn <- dn * 2.2219 * Qv # return(Qn) } "whimed"<- function(awork, weight) { nn <- length(awork) wtotal <- sum(weight) wrest <- 0 found <- F while(!found) { trial <- sort(awork) trial <- trial[(trunc(nn/2) + 1)] wleft <- weight[awork < trial] wleft <- sum(wleft) wright <- weight[awork > trial] wright <- sum(wright) wmid <- sum(weight) - wleft - wright if(2 * (wrest + wleft) > wtotal) { acand <- awork[awork < trial] wcand <- weight[awork < trial] nn <- length(acand) } else { if(2 * (wrest + wleft + wmid) > wtotal) { whimed <- trial found <- T } else { acand <- awork[awork > trial] wcand <- weight[awork > trial] nn <- length(acand) wrest <- wrest + wleft + wmid } } if(!found) { awork <- acand weight <- wcand } } return(whimed) }