Statistics: October 2007

Friedman Test for Randomized Block Designs

block <- rep(1:6,3); b <- length(unique(block))
task <- rep(c('A','B','C'),rep(6,3)); k<- length(unique(task))
time <- c(1.21,1.63,1.42,1.16,2.43,1.94,1.56,2.01,1.7,
1.27,2.64,2.81,1.48,1.63,2.06,1.27,1.98,2.44)


# rank for each block
rr <- tapply(time, block,rank)

R <- rep(0,length(time))
for(i in 1:b)
  for(j in 1:k)
    R[b*(j-1)+i] <- rr[[i]][j]

# rank sum of each treatment
S <- tapply(R,task,sum)

# test statistic
Fr <- 12/b/k/(k+1)*sum(S^2) - 3*b*(k+1)

# p-value
pvalue <- 1-pchisq(Fr,k-1)

# R function
friedman.test(time ~ task | block)

Kurskal-Wallis Test for the One-Way Layout

line <- rep(1:3,rep(10,3))
defects <- c(6,38,3,17,11,30,15,16,25,5,34,28,42,13,40,31,9,32,39,27,13,35,19,4,29,0,7,33,18,24)

R <- rank(defects)

defect <- data.frame(line, defects, R)

# rank sum of each group
S <- tapply(defect$R,defect$line,sum)

# sample size of each group
ni <- table(defect$line)

# total sample size
n <- sum(ni)

# number of groups
k <- length(unique(line))

# test statistic
H <- 12/(n*(n+1))*sum(S^2/ni) - 3*(n+1)

# p-value
pvalue <- 1-pchisq(H, k-1)

### R function
kruskal.test(defects~line, data=defect)

Wilcoxon's signed rank test - paired

math <- c(22,37,36,38,42,58,58,60,62,65,66,56,66,67,62)
art <- c(53,68,42,49,51,65,51,71,55,74,68,64,67,72,65)

# Rank of abolute difference
R <- rank(abs(math-art))

# sample size
n <- length(R)

# R+
Rp <- sum(R[math>art])

# Expected value and variance under H0
E0 <- n*(n+1)/4
V0 <- n*(n+1)*(2*n+1)/24

# Normal approximation (large sample only)
Z <- (Rp-E0)/sqrt(V0)

# p-value for 2-sided test
pvalue <- 2*(1-pnorm(abs(Z)))

Sign Test for a Matched Pairs Experiment

### Small Sample Size

# p=Pr(X>Y)
# H0: p=1/2
X <- c(172,165,206,184,174,142,190,169,161,200)
Y <- c(201,179,159,192,177,170,182,179,169,210)
n <- length(X)

D <- X-Y

# for Ha: p > 1/2
m <- sum(D>0) # number of positive differences
p_value <- 1-pbinom(m-1, 10, 0.5) # Pr(m >= 2)

# for Ha: p < 1/2
p_value <- pbinom(m, 10, 0.5) # Pr(m <= 2)

# for Ha: p is not equal to 1/2
p_value <- (m<0.5*n)*2* pbinom(m, 10, 0.5) + (m>=0.5*n)*2*(1-pbinom(m-1, 10, 0.5))

### Large Sample Size
Z <- (m - n/2)/sqrt(n/4)
p_value <- 2*(1-pnorm(abs(Z))) # two-sided

Wilcoxon Signed-Rank Test

### H0: median = tau vs Ha: median > tau
### Normal approximation

tau<-0.015
N <-20
x <- rnorm(N, 0.05, 0.1)

V <- sum(rank(abs(x-tau))[x-tau>0])
M <- N*(N+1)/4
sigma <- sqrt(N*(N+1)*(2*N+1)/24)

Z <- (V - M) / sigma

p_value <- 1-pnorm(Z)

wilcox.test(x, alternative="g", mu=tau)

Power and Sample Size for one-way ANOVA

### Power and Sample Size for one-way ANOVA

# Alternative Situation:
among 4 means, 2 are the same
the smallest is delta/2 smaller
the largest is delta/2 bigger

# Sample Size


sample_size <- function(n, beta){
  K<-4
  delta<-20
  sigma<-60
  alpha<-0.05

  # critical value
  f <- qf(0.95,K-1,n*(K-1)) 

  # noncentrality parameter
  ncp <- n*delta^2/(2*K*sigma^2) 

  # Pr{ F(ncp) < f } - beta 
  pf(f,K-1,n*(K-1),ncp) - beta 
}

# sample size satisfying power = 1-beta
uniroot(sample_size, c(10,10000), beta=0.2)$root

Type I ~ IV sum of squares

Model: Y ~ 1 + A + B + C + A*B + A*C + B*C + A*B*C

Type I - A partitioning of the model sum of squares into component sums of squares due to each variable or interaction as it is added sequentially to the model in the order prescribed by the MODEL statement.

ex) Type I sum of square for A*C is SS(A*C : A, B, C, A*B)

Type II - the increase in the model sum of squares due to adding the particular variable or interaction to a model that already contains all the other variables and interactions in the MODEL statement which do not notationally contain the particular variable or interaction.

ex) Type II sum of square for A*C is SS(A*C : A, B, C, A*B, B*C)

Type III - the increase in the model sum of squares due to adding the particular variable or interaction to a model that contains all the other variables and interactions listed in the MODEL statement.

ex) Type III sum of square for A*C is SS(A*C : A, B, C, A*B, B*C, A*B*C)

Type IV - In case there are empty cell, Type IV sums of squares are recommended. If there is no empty cell, Type IV = Type III

Sample Size Requirement to detect Gene-Environment interaction

e <- 0.05 # the prevalence of exposure
g <- 0.1 # the prevalence of genotype
Re <- 1 # the relative risk for exposure alone
Rg <- 1 # the relative risk for genotype alone
Ri <- 2 # the effect of the gene-environment interaction
alpha <- 0.05
power <- 0.8
k <- 2 # how many time more control than case?

### Make a 2*2*2 table
table <- rep(0,8)
dim(table) <- c(2,2,2)
a <- rep(0,2)
exp <- table
table[1,1,1] <- g*e*Rg*Re*Ri
table[1,1,2] <- g*e
table[1,2,1] <- (1-e)*g*Rg
table[1,2,2] <- (1-e)*g
table[2,1,1] <- (1-g)*e*Re
table[2,1,2] <- (1-g)*e
table[2,2,1] <- (1-g)*(1-e)
table[2,2,2] <- (1-g)*(1-e)

table[,,1] <- table[,,1]/sum(table[,,1])
table[,,2] <- k*table[,,2]

### Mantel-Haenszel common odds ratio
Rmh <- (table[1,1,1]*table[1,2,2]/sum(table[1,,])
+ table[2,1,1]*table[2,2,2]/sum(table[2,,]))/
(table[1,1,2]*table[1,2,1]/sum(table[1,,])
+ table[2,1,2]*table[2,2,1]/sum(table[2,,]));

MH <- function(x) Rmh*(sum(table[i,1,])-x)*(sum(table[i,,1])-x)
- x*(sum(table[i,,])-sum(table[i,1,])-sum(table[i,,1])+x)

OR <- function(x) x[1,1]*x[2,2]/x[1,2]/x[2,1]


for(i in 1:2){
  a[i]<-uniroot(MH, c(0,min(sum(table[i,1,]),sum(table[i,,1]))),tol = 1e-10)$root
  exp[i,,] <- c(a[i], sum(table[i,,1])-a[i], sum(table[i,1,])-a[i], 
  sum(table[i,,])-sum(table[i,1,])-sum(table[i,,1])+a[i])
}

OR(exp[1,,])
OR(exp[2,,])

vn <- sum(1/exp)
va <- sum(1/table)

n <- (qnorm(1-alpha/2)*sqrt(vn) + qnorm(power)*sqrt(va))^2/log(Ri)^2

유의수준 (level of significance)

가설 검정시에 항상 나오는것이 유의수준(level of significance)입니다.

유의수준은 가설검정시
귀무가설이 맞을때 그것을 기각하는 오류를 얼마나 허용할지를 말해줍니다.
이러한 오류를 1종오류라고도 합니다.

보통 1종오류를 일정수준으로 고정시킨후 가설검정을 합니다.
가장 많이 쓰는 값은 물론 0.05이지요.

예를 들어보면,
나뭇잎 하나를 발견했다고하죠.
그런데 그 나뭇잎이 A라는 나무의 잎하고 닮았습니다.
그래서 그 잎의 길이를 측정해서, A나무의 잎인지 아닌지 결정을 내리기로했습니다.

측정했더니 12.5cm였습니다.

A라는 나무의 잎의 길이는 10cm이고 표준편차가 1인 정규분포를 따른다고 하지요.

A나무의 잎은 평균보다 훨씬 큰 14cm일수도 있고 훨씬 작은 6cm일수도 있습니다. 그러나 평균에서 멀어질수록 가능성은 적어지죠.

여기서 유의수준을 0.05로 한다는것은 좀 극단적으로 표현하면
A나무의 잎이라도 평균에 가까운 100*(1-0.05)=95%만을 정상으로 생각하고
평균과 멀리 떨어진 나머지 5%는 비정상으로 생각한다는겁니다.

A나뭇잎 길이의 95% 신뢰구간을 구해보면,
(10-1.96*1, 10+1.96*1) = (8.04, 11.96)입니다.
A나뭇잎이 이 구간안에 있을 확률은 95%인데
미지의 나뭇잎은 12.5여서 비정상으로 분류된 5%에 속하므로
A나뭇잎이 아니라는 결론을 내립니다.

다른 말로, 8.04와 11.96이 기각력이 되고 측정된 검정통계량 12.5는 기각력에 속하므로
A나무의 잎이 아니라는 결론을 내립니다.

이 경우 p-value를 계산해보면
Pr(X >12.5 | mu=10) = Pr{(X-10)/1 > (12.5-10)/1} = Pr( Z > 2.5 ) = 0.0062인데
양측검정이므로 2배인 0.0124입니다.

유의확률 (p-value)

통계패키지로 데이타를 분석하다보면 항상 나오는게 p-value입니다.

많은 분들이 데이타 분석시 아래 정도만 알고계실겁니다.
p-value <= 0.05, 귀무가설 기각
p-value > 0.05, 귀무가설을 기각할수 없음.

매번 이렇게 결론을 내리다보면 찜찜하지요.

그럼 이 찜찜함을 해소해드리도록 노력해보겠습니다.

첫째, p-value는 확률입니다. 그러므로 (0, 1)의 값을 갖습니다.

둘째, p-value를 쉽게 설명하자면, 실험을 했건 설문조사를 했건 거기서 관찰된 데이타 또는 그것의 summary인 검정통계량(test statistic)이 귀무가설(H0)을 지지하는 정도입니다. 이 해석에 의하면 p-value가 작을수록 관찰된 데이타가 귀무가설을 지지하는 정도가 약해지므로 귀무가설을 기각하겠지요.

복잡한걸 싫어하시는 분은 여기까지만 아시면 되고요. 좀 더 알고 싶으신 분은 계속 읽어보세요.

셋째, p-value의 정확한 정의를 말로 써보면, 귀무가설이 맞다고 가정했을때 얻어진 검정통계량보다 더 극단적인 결과가 나올 확률입니다. 여기서 극단적이라함은 대립가설에 유리하게 나오는것을 의미합니다. 좀 어렵죠... 그래서 예를 들어보죠.

1970년대에 한국 성인 남자의 평균키가 170cm이었는데,
2000년에 키의 평균이 증가했을거라고 주장하고 표본을 뽑아서 측정했더니 175cm였습니다.

그러면 가설이 어떻게 될까요.
H0: mu=170 vs Ha: mu > 170
이겠지요.

여기서 2000년에 측정된 표본평균 175가 검정통계량입니다.

검정통계량이 크면 클수록 귀무가설에 불리하고 대립가설에 유리하겠지요.
p-value가 검정톨계량이 관찰치보다 더 대립가설에 유리하게 나올 확률이라고했으니,

p-value= Pr(표본평균 > 175) 입니다.

그런데 이게 다가 아니죠. 귀무가설이 맞다는 가정했을때라는 단서가 있으니

p-value = Pr(표본평균 > 175 | mu=170) 입니다.

그런데 여기서 귀무가설이 맞다고 가정하고 구한 이유에 주목해야합니다.

표본평균의 표준오차를 1이라고 하면
위의 확률을 구할때 "Z=(표본평균 - mu)/1=(표본평균-mu)"로 표준화시키겠죠.

p-value = Pr{ (표본평균-mu)/1 > (175-170)/1 } = Pr(Z > 5) = 0
으로 p-value가 0입니다.

만약 관찰된 검정통계량이 175가 아니고, 귀무가설에 가까와서 170.5라고 하면
p-value = Pr( Z > 0.5) = 0.3085375 여서 꽤 큰 값이 나오지요.

짐작 되시겠지만
귀무가설이 맞다고 가정함으로써
귀무가설을 기준으로 삼고
관찰된 검정통계량이 거기서 얼마나 멀리 떨어져있나 보는것입니다.

멀리 떨어지면 p-value가 작아 대립가설을 지지하고,
가까우면 p-value가 커므로 귀무가설을 지지하겠지요.

참고) 양측검정의 경우 (Ha: mu is not 170)는 대립가설을 지지하는게 170으로부터 양쪽으로 멀어지는거죠. 그래서 175보다 큰 경우와 mu=170을 중심으로 정반대인 165보다 작은 경우도 동시에 고려해야합니다. p-value = Pr( 표본평균 > 175 or 표본평균 < 165 | mu=170) 입니다.

그래서 정규분포같이 대칭인 경우는 양측검정의 p-value는 단측검정의 p-value의 2배입니다.

Parametric vs Nonparametric

Independent t-test vs Mann-Whitney test, Wilcoxon rank-sum test

Paired t-test vs Wilcoxon signed rank-sum test

1-way ANOVA vs Kruskal-Wallis test

2-way ANOVA vs Friedman test

Pearson correlation coefficient vs Spearman R, Kendal Tau or Gamma coefficient

Standard Error in a finite population

N, n, sigma

Variance of sample mean is
sigma^2 / n * (N-n)/(N-1)

Statistics

Sunday, October 28, 2007