2 Nonparametric Statistics

2.1 Correlation

# Membuat data contoh
# Membuat vektor untuk responden, X, dan Y
X <- c(2, 1, 6, 11, 7, 11, 1, 12, 13, 13, 11)
Y <- c(9, 8, 16, 13, 11, 12, 7, 7, 13, 17, 10)

# Membuat dataframe
dataku <- data.frame(X = X, Y = Y)

# Menampilkan data
dataku
#>     X  Y
#> 1   2  9
#> 2   1  8
#> 3   6 16
#> 4  11 13
#> 5   7 11
#> 6  11 12
#> 7   1  7
#> 8  12  7
#> 9  13 13
#> 10 13 17
#> 11 11 10

# Menggunakan fungsi cor.test untuk menghitung Tau-Kendall
cor.test(dataku$X, dataku$Y, method = "kendall")
#> Warning in cor.test.default(dataku$X, dataku$Y, method =
#> "kendall"): Cannot compute exact p-value with ties
#> 
#>  Kendall's rank correlation tau
#> 
#> data:  dataku$X and dataku$Y
#> z = 1.7529, p-value = 0.07962
#> alternative hypothesis: true tau is not equal to 0
#> sample estimates:
#>       tau 
#> 0.4273658

# Menggunakan fungsi cor.test untuk menghitung korelasi Spearman
cor.test(dataku$X, dataku$Y, method = "spearman")
#> Warning in cor.test.default(dataku$X, dataku$Y, method =
#> "spearman"): Cannot compute exact p-value with ties
#> 
#>  Spearman's rank correlation rho
#> 
#> data:  dataku$X and dataku$Y
#> S = 108.98, p-value = 0.1134
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#>       rho 
#> 0.5046513

2.1.1 Chi-Square Test

# Membuat data contoh
# Data asli dalam bentuk tabel silang
frekuensi <- matrix(c(30, 21, 30, 19, 15, 35), nrow = 2)
rownames(frekuensi) <- c("Kontrak", "Tetap")
colnames(frekuensi) <- c("Rendah", "Sedang", "Tinggi")

# Inisiasi vektor kosong untuk menyimpan data
Status_Pegawai <- c()
Tingkat_Produktivitas <- c()

# Mengulang setiap kombinasi sesuai dengan frekuensinya
for (i in 1:nrow(frekuensi)) {
  for (j in 1:ncol(frekuensi)) {
    Status_Pegawai <- c(Status_Pegawai, 
                        rep(rownames(frekuensi)[i],
                            frekuensi[i, j]))
    
    Tingkat_Produktivitas <- c(Tingkat_Produktivitas, 
                               rep(colnames(frekuensi)[j], 
                                   frekuensi[i, j]))
  }
}

# Membuat dataframe
dataku2 <- data.frame(Status_Pegawai, 
                      Tingkat_Produktivitas)
# Menampilkan data
head(dataku2)
#>   Status_Pegawai Tingkat_Produktivitas
#> 1        Kontrak                Rendah
#> 2        Kontrak                Rendah
#> 3        Kontrak                Rendah
#> 4        Kontrak                Rendah
#> 5        Kontrak                Rendah
#> 6        Kontrak                Rendah

# Transformasi menejadi factor
dataku2$Status_Pegawai <- as.factor(dataku2$Status_Pegawai)
dataku2$Tingkat_Produktivitas <- as.factor(dataku2$Tingkat_Produktivitas)

summary(dataku2)
#>  Status_Pegawai Tingkat_Produktivitas
#>  Kontrak:75     Rendah:51            
#>  Tetap  :75     Sedang:49            
#>                 Tinggi:50

# Melakukan tabel kontingensi
dataku2_kt <- table(dataku2$Status_Pegawai, dataku2$Tingkat_Produktivitas)
dataku2_kt
#>          
#>           Rendah Sedang Tinggi
#>   Kontrak     30     30     15
#>   Tetap       21     19     35

# Melakukan uji Chi-Square
chisq.test(dataku2_kt)
#> 
#>  Pearson's Chi-squared test
#> 
#> data:  dataku2_kt
#> X-squared = 12.058, df = 2, p-value = 0.002408

2.2 Difference Test

2.2.1 Two sample test (Independent)

2.2.1.1 Mann-Whitney Test

# Membuat data contoh
# Vektor data untuk efisiensi pada skala besar dan kecil
efisiensi_besar <- c(1.31, 1.25, 1.32, 1.3, 1.33, 1.31, 1.35, 1.34, 0.28, 1.34, 1.28)
efisiensi_kecil <- c(1.21, 1.28, 1.32, 1.25, 1.27, 1.31, 1.26, 1.31, 1.24, 1.22)

wilcox.test(efisiensi_besar, efisiensi_kecil)
#> Warning in wilcox.test.default(efisiensi_besar,
#> efisiensi_kecil): cannot compute exact p-value with ties
#> 
#>  Wilcoxon rank sum test with continuity correction
#> 
#> data:  efisiensi_besar and efisiensi_kecil
#> W = 82.5, p-value = 0.05614
#> alternative hypothesis: true location shift is not equal to 0

2.2.1.2 Chi-Square Test

2.2.2 More than two sample test (Independent)

2.2.2.1 Kruskal-Wallis Test

# Membuat data contoh
# Membuat vektor untuk Industri A, B, dan C
industri_A <- c(2.33, 2.79, 3.01, 2.33, 1.22, 2.79, 1.9, 1.65)
industri_B <- c(2.33, 2.33, 2.79, 3.01, 1.99, 2.45)
industri_C <- c(1.06, 1.37, 1.09, 1.65, 1.44, 1.11) 

# Membuat vektor industri
industri <- c(rep("Industri A", length(industri_A)), 
              rep("Industri B", length(industri_B)), 
              rep("Industri C", length(industri_C)))

# Menggabungkan semua vektor value
nilai <- c(industri_A, industri_B, industri_C)

# Membuat data frame
dataku4 <- data.frame(industri, nilai)

# Menampilkan data frame
dataku4$industri <- as.factor(dataku4$industri)
dataku4
#>      industri nilai
#> 1  Industri A  2.33
#> 2  Industri A  2.79
#> 3  Industri A  3.01
#> 4  Industri A  2.33
#> 5  Industri A  1.22
#> 6  Industri A  2.79
#> 7  Industri A  1.90
#> 8  Industri A  1.65
#> 9  Industri B  2.33
#> 10 Industri B  2.33
#> 11 Industri B  2.79
#> 12 Industri B  3.01
#> 13 Industri B  1.99
#> 14 Industri B  2.45
#> 15 Industri C  1.06
#> 16 Industri C  1.37
#> 17 Industri C  1.09
#> 18 Industri C  1.65
#> 19 Industri C  1.44
#> 20 Industri C  1.11

# Uji kruskal wallis
kruskal.test(nilai ~ industri, data = dataku4)
#> 
#>  Kruskal-Wallis rank sum test
#> 
#> data:  nilai by industri
#> Kruskal-Wallis chi-squared = 10.619, df = 2, p-value
#> = 0.004943

# Post hoc kruskal-wallis - Uji Dun
#installed.packages("FSA")
library(FSA)
#> Warning: package 'FSA' was built under R version 4.2.3
#> ## FSA v0.9.5. See citation('FSA') if used in publication.
#> ## Run fishR() for related website and fishR('IFAR') for related book.
dunnTest(nilai ~ industri, data = dataku4)
#> Dunn (1964) Kruskal-Wallis multiple comparison
#>   p-values adjusted with the Holm method.
#>                Comparison          Z     P.unadj      P.adj
#> 1 Industri A - Industri B -0.6428883 0.520296550 0.52029655
#> 2 Industri A - Industri C  2.6109139 0.009030062 0.01806012
#> 3 Industri B - Industri C  3.0436533 0.002337243 0.00701173

2.2.2.2 Chi-Square Test

2.2.3 Two sample test (Dependent)

2.2.3.1 Sign Test

# Membuat data contoh
# Data Skor Kepuasan
produk_lama <- c(16, 15, 18, 16, 17, 18, 20, 15, 14, 16, 19, 17)
produk_baru <- c(18, 17, 16, 19, 17, 20, 18, 16, 15, 18, 20, 18)
# Data Responden
responden <- c(1:12)
# Membuat data frame
dataku5 <- data.frame(Responden = c(rep(responden, 2)),
                      Produk = factor(c(rep("Produk Lama", length(produk_lama)), 
                                        rep("Produk Baru", length(produk_baru)))),
                      Skor_Kepuasan = c(produk_lama, produk_baru))
# Menampilkan data frame
dataku5
#>    Responden      Produk Skor_Kepuasan
#> 1          1 Produk Lama            16
#> 2          2 Produk Lama            15
#> 3          3 Produk Lama            18
#> 4          4 Produk Lama            16
#> 5          5 Produk Lama            17
#> 6          6 Produk Lama            18
#> 7          7 Produk Lama            20
#> 8          8 Produk Lama            15
#> 9          9 Produk Lama            14
#> 10        10 Produk Lama            16
#> 11        11 Produk Lama            19
#> 12        12 Produk Lama            17
#> 13         1 Produk Baru            18
#> 14         2 Produk Baru            17
#> 15         3 Produk Baru            16
#> 16         4 Produk Baru            19
#> 17         5 Produk Baru            17
#> 18         6 Produk Baru            20
#> 19         7 Produk Baru            18
#> 20         8 Produk Baru            16
#> 21         9 Produk Baru            15
#> 22        10 Produk Baru            18
#> 23        11 Produk Baru            20
#> 24        12 Produk Baru            18

# Menghitung perbedaan
diff <- dataku5[dataku5$Produk == 'Produk Baru', ]$Skor_Kepuasan - dataku5[dataku5$Produk == 'Produk Lama', ]$Skor_Kepuasan

# Menghitung jumlah perbedaan yang positif
jumlah_positif <- sum(diff > 0)

# Melakukan uji tanda
binom.test(jumlah_positif, length(diff), 
           p = 0.5, 
           alternative = "two.sided")
#> 
#>  Exact binomial test
#> 
#> data:  jumlah_positif and length(diff)
#> number of successes = 9, number of trials = 12,
#> p-value = 0.146
#> alternative hypothesis: true probability of success is not equal to 0.5
#> 95 percent confidence interval:
#>  0.4281415 0.9451394
#> sample estimates:
#> probability of success 
#>                   0.75

Interpretation: https://www.geeksforgeeks.org/sign-test-in-r/

2.2.4 More than two sample test (Dependent)

2.2.4.1 Friedman Test

# Membuat data contoh
dataku6 <- matrix(c(1.24,1.50,1.62,
              1.71,1.85,2.05,
              1.37,2.12,1.68,
              2.53,1.87,2.62,
              1.23,1.34,1.51,
              1.94,2.33,2.86,
              1.72,1.43,2.86), nrow = 7, byrow = TRUE,
              dimnames = list(Person= as.character(1:7),
              Obat = c("Obat A","Obat B","Obat C")))
dataku6
#>       Obat
#> Person Obat A Obat B Obat C
#>      1   1.24   1.50   1.62
#>      2   1.71   1.85   2.05
#>      3   1.37   2.12   1.68
#>      4   2.53   1.87   2.62
#>      5   1.23   1.34   1.51
#>      6   1.94   2.33   2.86
#>      7   1.72   1.43   2.86

friedman.test(dataku6)
#> 
#>  Friedman rank sum test
#> 
#> data:  dataku6
#> Friedman chi-squared = 8.8571, df = 2, p-value =
#> 0.01193

2.2.4.2 Cochran Test

# Membuat data contoh
## Input data
responden <- c(1:8)
produk_A <- c("Tidak","Tidak","Ya","Ya","Ya","Tidak","Tidak","Tidak")
produk_B <- c("Tidak","Ya","Ya","Ya","Tidak","Tidak","Ya","Tidak")
produk_C <- c("Ya","Tidak","Tidak","Ya","Tidak","Ya","Ya","Tidak")
dataku7 <- data.frame(responden, produk_A, produk_B, produk_C)
dataku7$produk_A <- as.factor(dataku7$produk_A)
dataku7$produk_B <- as.factor(dataku7$produk_B)
dataku7$produk_C <- as.factor(dataku7$produk_C)
dataku7
#>   responden produk_A produk_B produk_C
#> 1         1    Tidak    Tidak       Ya
#> 2         2    Tidak       Ya    Tidak
#> 3         3       Ya       Ya    Tidak
#> 4         4       Ya       Ya       Ya
#> 5         5       Ya    Tidak    Tidak
#> 6         6    Tidak    Tidak       Ya
#> 7         7    Tidak       Ya       Ya
#> 8         8    Tidak    Tidak    Tidak

dataku7 <-ifelse(dataku7=="Ya", 1,0)

#install.packages("nonpar")
library(nonpar)
cochrans.q(as.matrix(dataku7[,-1]), alpha = 0.05)
#> 
#>  Cochran's Q Test 
#>  
#>  H0: There is no difference in the effectiveness of treatments. 
#>  HA: There is a difference in the effectiveness of treatments. 
#>  
#>  Q = 0.333333333333333 
#>  
#>  Degrees of Freedom = 2 
#>  
#>  Significance Level = 0.05 
#>  The p-value is  0.846481724890614 
#>   
#>

1 Basics of R

3 Logistic Regression