"sir.orig"<- function(data, H) { # Input: matrix with matrix [Y|X], Y = response, X = regressors. # n = number of observations # p = dimension of X-observations # n <- dim(data)[1] p <- dim(data)[2] - 1 # # # extract X-matrix and center around mean; standardize using # S^(-1/2) # x <- data[, 2:(p + 1)] x <- scale(x, center = T, scale = F) s <- (cov.wt(x)[[1]] * (n - 1))/n s.inv <- solve(s) s.inv.sva <- svd(s.inv) s.inv.wurzel <- t(s.inv.sva$v %*% (t(s.inv.sva$u) * sqrt(s.inv.sva$d))) # x <- x %*% s.inv.wurzel # # # Recombine original Y-vector and standardized X-matrix to a matrix. # Sort x-vectors according to the sorted Y-values (sort Y from smallest to largest) # data[, 2:(p + 1)] <- x data.sort <- data[order(data[, 1]), ] # # # Determine size of slices as [n/H]; check whether division results in equally # sized slices; if not distribute remaining n-[n/H] observations to the slices in # such a manner that the first slice gets the X-observations corresponding to the # [n/H]+1 smallest Y-values the second slice the next [n/H]+ 1 Y-values # and so on, until all "remaining" observations are distributed. # The last slices then contain exactly [n/H] observations. The vector "scheiben" (slices) # contains the absolute frequency of observations for a given slice. # The vectors "scheiben.anfang" and "scheiben.ende" contain indicies of the first and last # observation for a given slide, that means slice i starts with X[scheiben.anfang[i],] and ends # with X[scheiben.ende[i],] (where X is sorted according to the sorted Y-values) # scheibengr <- floor(n/H) rest <- n - H * scheibengr scheiben <- rep(scheibengr, H) if(rest != 0) scheiben[1:rest] <- scheiben[1:rest] + 1 scheiben.ende <- cumsum(scheiben) scheiben.anfang <- c(1, (scheiben.ende + 1)[1:(H - 1)]) # # # Determine slice means and slice weights; Computation of # weighted covariance matrix of sliced means # scheiben.mittel <- matrix(0, nrow = H, ncol = p) for(i in 1:H) { zwischen <- data.sort[scheiben.anfang[i]:scheiben.ende[i], 2:(p + 1)] zwischen <- apply(zwischen, 2, mean) scheiben.mittel[i, ] <- zwischen } gewichte <- scheiben/n v.dach <- cov.wt(scheiben.mittel, wt = gewichte, center = F)[[1]] # # # # Eigenvalues and eigenvectors of the weigted covariance matrix. # The estimator itself will not be determined explicitly, because # only the eigenvalues and eigenvectors will be needed. # "richtungen" containes all backtransformed eigenvectors; the # eigenvectors corresponding to the K largest eigenvalues # will then form the relevant dr-directions. # eigen.w.v <- eigen(v.dach) eigenwerte <- eigen.w.v$values eigenvektoren <- eigen.w.v$vectors # richtungen <- s.inv.wurzel %*% eigenvektoren # # richtungen <- eigenvektoren eigen.summe <- n * sort(eigenwerte) eigen.summe <- cumsum(eigen.summe) # # # Determine K using Li's procedure: the standardized sum of all eigen- # values will be compared to a Chi^2 quantile; test H0: K=k vs. H1: K>k # successively until H0 won't be rejected anymore. # If the number of slices is bigger than dimension of the data, Li's test procedure # will be unproblematic. However if this condition is not fullfilled, it is not # possible to test for an arbitrary dimension (between 1 and p), because # negative values for the degrees of freedom of the Chi-square distribution will # appear. In that situation only a dimension up to H-1 # can be tested (more precisely:H0: K=H-2 vs. H1: K>H-2); a warning message # will be given in this situation # # The vector "chi.index" contains the degrees of freedom for the tests up to # a dimension of p, or of H-1 respectively; "vergleich" contains the corresponding # 0.95-Quantiles; "anzahl.richtungen" contains the number of relevant dr-directions # # if(H > p) { kvektor <- (p - 1):0 zwischen1 <- p - kvektor zwischen2 <- H - kvektor - 1 chi.index <- zwischen1 * zwischen2 vergleich <- qchisq(0.95, chi.index) # # print(eigen.summe) # print(vergleich) anzahl.richtungen <- sum(eigen.summe > vergleich) cat("Anzahl relevanter DR-Richtungen: ", anzahl.richtungen, "\n") DR.richtungen <- richtungen[, 1:anzahl.richtungen] } else { cat("Warnung: Es ist H <= p; daher nur noch Test auf Dimensionen bis zu ", H - 1, " moeglich!", "\n") kvektor <- (H - 2):0 zwischen1 <- p - kvektor zwischen2 <- H - kvektor - 1 chi.index <- zwischen1 * zwischen2 vergleich <- qchisq(0.95, chi.index) eigen.summe <- eigen.summe[(p - H + 2):p] anzahl.richtungen <- sum(eigen.summe > vergleich) cat("Anzahl relevanter DR-Richtungen: ", anzahl.richtungen, "\n") DR.richtungen <- richtungen[, 1:anzahl.richtungen] } return(DR.richtungen) }