R语言系列11——非参数检验

nonpara.csv344 · 百度网盘

非参数检验 non-parametric test

1.one-sample

##eg1 x1 <- c(36, 32, 31, 25, 28, 36, 40, 32, 41, 26, 35, 35, 32, 87, 33, 35)

(1)sign test

t.test(x1,mu=37) ## ##One Sample t-test ## ## data:x1 ## t = -0.14123, df = 15, p-value = 0.8896 ## alternative hypothesis: true mean is not equal to 37 ## 95 percent confidence interval: ##28.95415 44.04585 ## sample estimates: ## mean of x ##36.5 x1-37 ##[1]-1-5-6 -12-9-1 3-5 4 -11-2-2-550-4-2 (x1<37) ##[1]TRUETRUETRUETRUETRUETRUE FALSETRUE FALSETRUETRUETRUE ## [13]TRUE FALSETRUETRUE hist(x1) library(BSDA) ## Loading required package: lattice ## ## Attaching package: BSDA ## The following object is masked from package:datasets: ## ## Orange

SIGN.test(x1,md=37,alternative="two.sided",conf.level=0.95) ## ##One-sample Sign-Test ## ## data:x1 ## s = 3, p-value = 0.02127 ## alternative hypothesis: true median is not equal to 37 ## 95 percent confidence interval: ##31.51725 36.00000 ## sample estimates: ## median of x ##34 ## ## Achieved and Interpolated Confidence Intervals: ## ## Conf.LevelL.E.pt U.E.pt ## Lower Achieved CI 0.9232 32.0000 36 ## Interpolated CI 0.9500 31.5173 36 ## Upper Achieved CI 0.9787 31.0000 36 2*(1-pbinom(12,16,0.5)) ## [1] 0. 2*pbinom(3,16,0.5) ## [1] 0.

(2)wilcoxon signed rank test

wilcox.test(x1,mu=37,alternative="two.sided") ## Warning in wilcox.test.default(x1, mu = 37, alternative = "two.sided"): cannot ## compute exact p-value with ties ## ##Wilcoxon signed rank test with continuity correction ## ## data:x1 ## V = 29.5, p-value = 0.04904 ## alternative hypothesis: true location is not equal to 37

2.two-sample

2.1 paired samples

x2 <- c(1.2,-0.6,-0.3,1.1,-0.2,-0.2,-0.8,0.3,-0.2,-0.1) SIGN.test(x2,md=0,altervative="two.sided",conf.level=0.95) ## ##One-sample Sign-Test ## ## data:x2 ## s = 3, p-value = 0.3438 ## alternative hypothesis: true median is not equal to 0 ## 95 percent confidence interval: ##-0.0. ## sample estimates: ## median of x ##-0.2 ## ## Achieved and Interpolated Confidence Intervals: ## ## Conf.LevelL.E.pt U.E.pt ## Lower Achieved CI 0.8906 -0.3000 0.3000 ## Interpolated CI 0.9500 -0.5027 0.8404 ## Upper Achieved CI 0.9785 -0.6000 1.1000

wilcox.test(x2,md=0,altervative="two.sided",conf.level=0.95) ## Warning in wilcox.test.default(x2, md = 0, altervative = "two.sided", conf.level ## = 0.95): cannot compute exact p-value with ties ## ##Wilcoxon signed rank test with continuity correction ## ## data:x2 ## V = 24.5, p-value = 0.7982 ## alternative hypothesis: true location is not equal to 0

2.2 two independent samples

x <- c(82, 64, 53, 61, 59, 83, 76, 55, 70, 73) y <- c(80, 60, 65, 91, 86, 84, 77, 93, 75) ##wilcoxon rank sum test wilcox.test(x,y) ## ##Wilcoxon rank sum exact test ## ## data:x and y ## W = 19, p-value = 0.03499 ## alternative hypothesis: true location shift is not equal to 0

2.3 multiple samples

kruskal-wallis test

A <- c(60, 75, 62, 76, 73, 98, 86) B <- c(72, 52, 68, 82, 74, 64, 87) C <- c(61, 85, 78, 66, 70, 59, 69, 79) D <- c(63, 58, 65, 71, 84, 77, 80, 89) kruskal.test(list(A,B,C,D)) ## ##Kruskal-Wallis rank sum test ## ## data:list(A, B, C, D) ## Kruskal-Wallis chi-squared = 0.55369, df = 3, p-value = 0.9069

3. goodness of fit test

##eg1 x1 <- c(75, 125, 70, 80, 135, 115) chisq.test(x1) ## ##Chi-squared test for given probabilities ## ## data:x1 ## X-squared = 40, df = 5, p-value = 1.493e-07 ##eg2 x2 <- seq(1,9,1) f2 <- c(7, 12, 18, 17, 20, 13, 6, 7) lambda <- 4.33 prob <- c(ppois(1,4.33),dpois(2:7,4.33),1-ppois(7,4.33)) prob ## [1] 0. 0. 0. 0. 0. 0. 0. ## [8] 0. chisq.test(f2,p=prob) ## ##Chi-squared test for given probabilities ## ## data:f2 ## X-squared = 1.3068, df = 7, p-value = 0.9883 1-pchisq(1.3068,6) ## [1] 0. ##eg3 mu <- 126.37 sig <- 17.75 f3 <- c(5,8,22,27,17,9,12) cc <- c(-10000,seq(99.5,149.5,10)) prob <- numeric(6) for(i in 1:6){ prob[i] <- pnorm(cc[i+1],mu,sig)-pnorm(cc[i],mu,sig) } prob <- c(prob,1-sum(prob)) prob ## [1] 0. 0. 0. 0. 0. 0. 0. chisq.test(f3,p=prob) ## ##Chi-squared test for given probabilities ## ## data:f3 ## X-squared = 5.5142, df = 6, p-value = 0.4797 ##test of independence chisq.test(matrix(c(43,13,162,121),2,2),correct=F) ## ##Pearsons Chi-squared test ## ## data:matrix(c(43, 13, 162, 121), 2, 2) ## X-squared = 7.4688, df = 1, p-value = 0.006278

4. test of normality

setwd("/Users/sifan/R/datasets") x <- read.csv("nonpara.csv",header=F) x <- as.vector(as.matrix(x)) hist(x)

length(x) ## [1] 84

(1)偏度 峰度

library(moments) kurtosis(x) ## [1] 3. anscombe.test(x) ## ##Anscombe-Glynn kurtosis test ## ## data:x ## kurt = 3.3705, z = 1.0064, p-value = 0.3142 ## alternative hypothesis: kurtosis is not equal to 3 skewness(x) ## [1] -0. agostino.test(x) ## ##DAgostino skewness test ## ## data:x ## skew = -0.13613, z = -0.54550, p-value = 0.5854 ## alternative hypothesis: data have a skewness

(2)KS分布检验one-sample

X=c(420,500,920,1380,1510,1650,1760,2100,2300,2350) #某设备10次无故障工作时间的数据 lambda<-mean(X) lambda ## [1] 1489 ks.test(X,"pexp",1/1000) #检验是否服从参数为1/1000的指数分布 ## ##One-sample Kolmogorov-Smirnov test ## ## data:X ## D = 0.44842, p-value = 0.02363 ## alternative hypothesis: two-sided

two-sample

xx=c(0.61,0.29,0.06,0.59,-1.73,-0.74,0.51,-0.56,0.39,1.64,0.05,-0.06,0.64,-0.82,0.37,1.77,1.09,-1.28,2.36,1.31,1.05,-0.32,-0.40,1.06,-2.47) yy=c(2.20,1.66,1.38,0.20,0.36,0.00,0.96,1.56,0.44,1.50,-0.30,0.66,2.31,3.29,-0.27,-0.37,0.38,0.70,0.52,-0.71) ks.test(xx,yy) #检验两组数据是否服从同一分布 ## ##Two-sample Kolmogorov-Smirnov test ## ## data:xx and yy ## D = 0.23, p-value = 0.5286 ## alternative hypothesis: two-sided

应用于例子

ks.test(x,"pnorm",mean=mean(x),sd=sd(x)) ## Warning in ks.test(x, "pnorm", mean = mean(x), sd = sd(x)): ties should not be ## present for the Kolmogorov-Smirnov test ## ##One-sample Kolmogorov-Smirnov test ## ## data:x ## D = 0., p-value = 0.5771 ## alternative hypothesis: two-sided ks.test(scale(x),"pnorm") ## Warning in ks.test(scale(x), "pnorm"): ties should not be present for the ## Kolmogorov-Smirnov test ## ##One-sample Kolmogorov-Smirnov test ## ## data:scale(x) ## D = 0., p-value = 0.5771 ## alternative hypothesis: two-sided

lilliefor

library(nortest) lillie.test(x) ## ##Lilliefors (Kolmogorov-Smirnov) normality test ## ## data:x ## D = 0., p-value = 0.14

shapiro-wilk

shapiro.test(x) ## ##Shapiro-Wilk normality test ## ## data:x ## W = 0.98901, p-value = 0.7023

DAgostino Normality Test

library(fBasics) ## Loading required package: timeDate ## ## Attaching package: timeDate ## The following objects are masked from package:moments: ## ## kurtosis, skewness ## Loading required package: timeSeries shapiroTest(x) ## ## Title: ##Shapiro - Wilk Normality Test ## ## Test Results: ## STATISTIC: ## W: 0.989 ## P VALUE: ## 0.7023 ## ## Description: ##Sun Dec6 22:42:01 2020 by user: dagoTest(x) ## ## Title: ##DAgostino Normality Test ## ## Test Results: ## STATISTIC: ## Chi2 | Omnibus: 1.3104 ## Z3| Skewness: -0.5455 ## Z4| Kurtosis: 1.0064 ## P VALUE: ## OmnibusTest: 0.5193 ## Skewness Test: 0.5854 ## Kurtosis Test: 0.3142 ## ## Description: ##Sun Dec6 22:42:01 2020 by user: