******************************************************************* * * * Tests of Homogeneity * * * * ANOVA F-Test, Cochran, and Welch test * * * * Data example: Meier (1953), * * percentage of albumin in plasma protein in human subjects * * * * n = sample size * * mean = observed mean * * var = observed variance * * * *******************************************************************; OPTIONS NODATE NONUMBER; DATA albumin; INPUT experiment $ n mean var; weight = n / var; weight_mean = weight * mean; n_mean = n * mean; DATALINES; A 12 62.3 12.986 B 15 60.3 7.840 C 7 59.5 33.433 D 16 61.5 18.513 ; /* Calculate some sums */ PROC SUMMARY DATA=albumin SUM N; VAR weight weight_mean n_mean n; OUTPUT OUT=some_sums SUM=sum_weight sum_weight_mean sum_n_mean sum_n N = k; RUN; /* _NULL_ data set for creating macro variables */ DATA _NULL_; SET some_sums; gd = sum_weight_mean / sum_weight; ybar = sum_n_mean / sum_n; CALL SYMPUT ("w_sum", sum_weight); CALL SYMPUT ("n_sum", sum_n); CALL SYMPUT ("gd", gd); CALL SYMPUT ("ybar", ybar); CALL SYMPUT ("number",k); RUN; /* ANOVA F-Test */ DATA anova; SET albumin; num = n * (mean - &ybar)**2; den = (n-1) * var; RUN; PROC SUMMARY DATA=anova SUM; VAR num den; OUTPUT OUT=anova_sum SUM=num_sum den_sum; RUN; DATA anova (KEEP=test statistic p_value); SET anova_sum (KEEP=num_sum den_sum); test="ANOVA F"; statistic = ((&n_sum - &number)/(&number - 1)) * (num_sum/den_sum); p_value = 1 - CDF("F", statistic, &number-1, &n_sum - &number); RUN; /* Cochran's test */ DATA cochran; SET albumin; cochran_term = weight * (mean - &gd)**2; RUN; PROC SUMMARY DATA=cochran SUM; VAR cochran_term; OUTPUT OUT=cochran_sum SUM=cochran_sum; RUN; DATA cochran (DROP=cochran_sum); SET cochran_sum(KEEP=cochran_sum); test = "Cochran"; statistic = cochran_sum; p_value= 1- CDF("chisquare", statistic, &number - 1); RUN; /* Welch's test */ DATA welch; SET albumin; cochran_term = weight * (mean - &gd)**2; df_term = (1 - weight / &w_sum)**2 / (n-1); RUN; PROC SUMMARY DATA=welch SUM; VAR cochran_term df_term; OUTPUT OUT=welch_test SUM=cochran_sum df_sum; RUN; DATA welch (KEEP=test statistic p_value); SET welch_test (KEEP=cochran_sum df_sum); test = "Welch"; statistic = cochran_sum / ( (&number -1) + 2 * ((&number - 2) / (&number + 1)) * df_sum ); dof = (&number**2 - 1) / (3 * df_sum); p_value= 1- CDF("F", statistic, &number - 1, dof); RUN; /* Collection of all the results */ DATA all; LENGTH test $10.; SET anova cochran welch; RUN; PROC REPORT DATA=all HEADLINE HEADSKIP NOWINDOWS SPLIT="*"; COLUMN test statistic p_value; DEFINE test / DISPLAY "Method"; DEFINE statistic / DISPLAY "Test*statistic" FORMAT = 10.3; DEFINE p_value / DISPLAY "P-value" FORMAT = 10.3; TITLE "Tests of Homogeneity in the Common Mean Problem"; TITLE3 "Albumin example"; RUN;