3 Logistic Regression

3.1 Regresi Logistik Biner

3.1.1 Data

credit <- read.csv("Data/credit.csv")
head(credit[,1:5],10)
#>    creditability account.balance duration credit.amount
#> 1              1               1       18          1049
#> 2              1               1        9          2799
#> 3              1               2       12           841
#> 4              1               1       12          2122
#> 5              1               1       12          2171
#> 6              1               1       10          2241
#> 7              1               1        8          3398
#> 8              1               1        6          1361
#> 9              1               4       18          1098
#> 10             1               2       24          3758
#>    saving.balance
#> 1               1
#> 2               1
#> 3               2
#> 4               1
#> 5               1
#> 6               1
#> 7               1
#> 8               1
#> 9               1
#> 10              3
str(credit)
#> 'data.frame':    1000 obs. of  14 variables:
#>  $ creditability   : int  1 1 1 1 1 1 1 1 1 1 ...
#>  $ account.balance : int  1 1 2 1 1 1 1 1 4 2 ...
#>  $ duration        : int  18 9 12 12 12 10 8 6 18 24 ...
#>  $ credit.amount   : int  1049 2799 841 2122 2171 2241 3398 1361 1098 3758 ...
#>  $ saving.balance  : int  1 1 2 1 1 1 1 1 1 3 ...
#>  $ employment.year : int  2 3 4 3 3 2 4 2 1 1 ...
#>  $ installment.rate: int  4 2 2 3 4 1 1 2 4 1 ...
#>  $ marital.status  : int  2 3 2 3 3 3 3 3 2 2 ...
#>  $ duration.address: int  4 2 4 2 4 3 4 4 4 4 ...
#>  $ age             : int  21 36 23 39 38 48 39 40 65 23 ...
#>  $ dependents      : int  1 2 1 2 1 2 1 2 1 1 ...
#>  $ number.of.credit: int  1 2 1 2 2 2 2 1 2 1 ...
#>  $ occupation      : int  3 3 2 2 2 2 2 2 1 1 ...
#>  $ previous.credit : int  4 4 2 4 4 4 4 4 4 2 ...
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
credit <- credit %>% mutate(across(-c(duration,
                            credit.amount,
                            age),as.factor))
str(credit)
#> 'data.frame':    1000 obs. of  14 variables:
#>  $ creditability   : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
#>  $ account.balance : Factor w/ 4 levels "1","2","3","4": 1 1 2 1 1 1 1 1 4 2 ...
#>  $ duration        : int  18 9 12 12 12 10 8 6 18 24 ...
#>  $ credit.amount   : int  1049 2799 841 2122 2171 2241 3398 1361 1098 3758 ...
#>  $ saving.balance  : Factor w/ 5 levels "1","2","3","4",..: 1 1 2 1 1 1 1 1 1 3 ...
#>  $ employment.year : Factor w/ 5 levels "1","2","3","4",..: 2 3 4 3 3 2 4 2 1 1 ...
#>  $ installment.rate: Factor w/ 4 levels "1","2","3","4": 4 2 2 3 4 1 1 2 4 1 ...
#>  $ marital.status  : Factor w/ 4 levels "1","2","3","4": 2 3 2 3 3 3 3 3 2 2 ...
#>  $ duration.address: Factor w/ 4 levels "1","2","3","4": 4 2 4 2 4 3 4 4 4 4 ...
#>  $ age             : int  21 36 23 39 38 48 39 40 65 23 ...
#>  $ dependents      : Factor w/ 2 levels "1","2": 1 2 1 2 1 2 1 2 1 1 ...
#>  $ number.of.credit: Factor w/ 4 levels "1","2","3","4": 1 2 1 2 2 2 2 1 2 1 ...
#>  $ occupation      : Factor w/ 4 levels "1","2","3","4": 3 3 2 2 2 2 2 2 1 1 ...
#>  $ previous.credit : Factor w/ 5 levels "0","1","2","3",..: 5 5 3 5 5 5 5 5 5 3 ...

3.1.2 Pemodelan

logreg1 <- glm(creditability~.,data=credit,family = "binomial")
summary(logreg1)
#> 
#> Call:
#> glm(formula = creditability ~ ., family = "binomial", data = credit)
#> 
#> Deviance Residuals: 
#>     Min       1Q   Median       3Q      Max  
#> -2.5496  -0.7882   0.4288   0.7441   2.0738  
#> 
#> Coefficients:
#>                     Estimate Std. Error z value Pr(>|z|)
#> (Intercept)        2.990e-01  8.942e-01   0.334 0.738097
#> account.balance2   4.346e-01  2.013e-01   2.159 0.030852
#> account.balance3   9.490e-01  3.602e-01   2.635 0.008421
#> account.balance4   1.804e+00  2.222e-01   8.119 4.69e-16
#> duration          -2.705e-02  8.818e-03  -3.068 0.002156
#> credit.amount     -1.025e-04  4.161e-05  -2.465 0.013718
#> saving.balance2    1.293e-01  2.701e-01   0.479 0.632222
#> saving.balance3    4.144e-01  3.987e-01   1.039 0.298644
#> saving.balance4    1.241e+00  5.032e-01   2.467 0.013629
#> saving.balance5    8.811e-01  2.463e-01   3.577 0.000347
#> employment.year2  -1.432e-01  4.109e-01  -0.348 0.727561
#> employment.year3   2.530e-01  3.957e-01   0.639 0.522582
#> employment.year4   7.646e-01  4.258e-01   1.796 0.072572
#> employment.year5   2.386e-01  3.962e-01   0.602 0.547012
#> installment.rate2 -2.841e-01  2.953e-01  -0.962 0.336089
#> installment.rate3 -5.122e-01  3.217e-01  -1.592 0.111374
#> installment.rate4 -9.279e-01  2.872e-01  -3.230 0.001236
#> marital.status2    1.744e-01  3.742e-01   0.466 0.641255
#> marital.status3    7.482e-01  3.670e-01   2.039 0.041468
#> marital.status4    5.577e-01  4.371e-01   1.276 0.201928
#> duration.address2 -7.104e-01  2.832e-01  -2.509 0.012122
#> duration.address3 -5.443e-01  3.163e-01  -1.721 0.085314
#> duration.address4 -4.386e-01  2.762e-01  -1.588 0.112244
#> age                1.125e-02  8.468e-03   1.329 0.183990
#> dependents2       -2.607e-01  2.387e-01  -1.092 0.274669
#> number.of.credit2 -4.177e-01  2.315e-01  -1.805 0.071133
#> number.of.credit3 -4.131e-01  5.951e-01  -0.694 0.487625
#> number.of.credit4 -4.589e-01  9.908e-01  -0.463 0.643240
#> occupation2       -8.953e-02  6.276e-01  -0.143 0.886557
#> occupation3       -1.487e-01  6.048e-01  -0.246 0.805804
#> occupation4        1.276e-02  6.087e-01   0.021 0.983277
#> previous.credit1  -3.136e-01  5.178e-01  -0.606 0.544686
#> previous.credit2   6.063e-01  4.149e-01   1.461 0.143896
#> previous.credit3   8.090e-01  4.531e-01   1.785 0.074205
#> previous.credit4   1.511e+00  4.169e-01   3.625 0.000288
#>                      
#> (Intercept)          
#> account.balance2  *  
#> account.balance3  ** 
#> account.balance4  ***
#> duration          ** 
#> credit.amount     *  
#> saving.balance2      
#> saving.balance3      
#> saving.balance4   *  
#> saving.balance5   ***
#> employment.year2     
#> employment.year3     
#> employment.year4  .  
#> employment.year5     
#> installment.rate2    
#> installment.rate3    
#> installment.rate4 ** 
#> marital.status2      
#> marital.status3   *  
#> marital.status4      
#> duration.address2 *  
#> duration.address3 .  
#> duration.address4    
#> age                  
#> dependents2          
#> number.of.credit2 .  
#> number.of.credit3    
#> number.of.credit4    
#> occupation2          
#> occupation3          
#> occupation4          
#> previous.credit1     
#> previous.credit2     
#> previous.credit3  .  
#> previous.credit4  ***
#> ---
#> Signif. codes:  
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 1221.7  on 999  degrees of freedom
#> Residual deviance:  956.0  on 965  degrees of freedom
#> AIC: 1026
#> 
#> Number of Fisher Scoring iterations: 5

3.1.3 Odds Ratio

beta = round(coef(logreg1),2)
OR = round(exp(beta),2)
cbind(beta, OR)
#>                    beta   OR
#> (Intercept)        0.30 1.35
#> account.balance2   0.43 1.54
#> account.balance3   0.95 2.59
#> account.balance4   1.80 6.05
#> duration          -0.03 0.97
#> credit.amount      0.00 1.00
#> saving.balance2    0.13 1.14
#> saving.balance3    0.41 1.51
#> saving.balance4    1.24 3.46
#> saving.balance5    0.88 2.41
#> employment.year2  -0.14 0.87
#> employment.year3   0.25 1.28
#> employment.year4   0.76 2.14
#> employment.year5   0.24 1.27
#> installment.rate2 -0.28 0.76
#> installment.rate3 -0.51 0.60
#> installment.rate4 -0.93 0.39
#> marital.status2    0.17 1.19
#> marital.status3    0.75 2.12
#> marital.status4    0.56 1.75
#> duration.address2 -0.71 0.49
#> duration.address3 -0.54 0.58
#> duration.address4 -0.44 0.64
#> age                0.01 1.01
#> dependents2       -0.26 0.77
#> number.of.credit2 -0.42 0.66
#> number.of.credit3 -0.41 0.66
#> number.of.credit4 -0.46 0.63
#> occupation2       -0.09 0.91
#> occupation3       -0.15 0.86
#> occupation4        0.01 1.01
#> previous.credit1  -0.31 0.73
#> previous.credit2   0.61 1.84
#> previous.credit3   0.81 2.25
#> previous.credit4   1.51 4.53

3.1.4 Multikolineratitas

library(car)
#> Warning: package 'car' was built under R version 4.2.3
#> Loading required package: carData
#> 
#> Attaching package: 'car'
#> The following object is masked from 'package:dplyr':
#> 
#>     recode
vif(logreg1)
#>                      GVIF Df GVIF^(1/(2*Df))
#> account.balance  1.283532  3        1.042480
#> duration         1.828834  1        1.352344
#> credit.amount    2.284117  1        1.511330
#> saving.balance   1.286469  4        1.031989
#> employment.year  2.406179  4        1.116005
#> installment.rate 1.443706  3        1.063114
#> marital.status   1.439516  3        1.062599
#> duration.address 1.502426  3        1.070201
#> age              1.365556  1        1.168570
#> dependents       1.177252  1        1.085012
#> number.of.credit 2.060162  3        1.128020
#> occupation       1.893863  3        1.112307
#> previous.credit  2.136438  4        1.099541

3.1.5 Akurasi

pred_clas <- ifelse(logreg1$fitted.values > 0.5, 1, 0)
conf_matrix <- table(credit$creditability, pred_clas)
conf_matrix
#>    pred_clas
#>       0   1
#>   0 145 155
#>   1  68 632
paste0("Akurasi Model:")
#> [1] "Akurasi Model:"
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
accuracy
#> [1] 0.777

3.1.6 Kebaikan Model

#install.packages("performance")
library(performance)
#> Warning: package 'performance' was built under R version
#> 4.2.3
#Outliers
performance::check_outliers(logreg1)
#> OK: No outliers detected.
#> - Based on the following method and threshold: cook (0.8).
#> - For variable: (Whole model)
#Metrik
performance(logreg1)
#> # Indices of model performance
#> 
#> AIC      |     AICc |      BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
#> -----------------------------------------------------------------------------------------------------------
#> 1025.995 | 1028.609 | 1197.767 |     0.254 | 0.395 | 0.995 |    0.478 |      -Inf |           0.001 | 0.687
#Goodness Of Fit
performance_hosmer(logreg1)
#> # Hosmer-Lemeshow Goodness-of-Fit Test
#> 
#>   Chi-squared: 8.472
#>            df: 8    
#>       p-value: 0.389
#> Summary: model seems to fit well.

3.2 Regresi Logistik Nominal atau Multinominal

3.2.1 Data

library(readxl)
#> Warning: package 'readxl' was built under R version 4.2.3
students <- read_excel("Data/students.xlsx")
head(students,10)
#> # A tibble: 10 × 6
#>    gender ses    prog      read write  math
#>    <chr>  <chr>  <chr>    <dbl> <dbl> <dbl>
#>  1 female low    vocation    34    35    41
#>  2 male   middle general     34    33    41
#>  3 male   high   vocation    39    39    44
#>  4 male   low    vocation    37    37    42
#>  5 male   middle vocation    39    31    40
#>  6 female high   general     42    36    42
#>  7 male   middle vocation    31    36    46
#>  8 male   middle vocation    50    31    40
#>  9 female middle vocation    39    41    33
#> 10 male   middle vocation    34    37    46
str(students)
#> tibble [200 × 6] (S3: tbl_df/tbl/data.frame)
#>  $ gender: chr [1:200] "female" "male" "male" "male" ...
#>  $ ses   : chr [1:200] "low" "middle" "high" "low" ...
#>  $ prog  : chr [1:200] "vocation" "general" "vocation" "vocation" ...
#>  $ read  : num [1:200] 34 34 39 37 39 42 31 50 39 34 ...
#>  $ write : num [1:200] 35 33 39 37 31 36 36 31 41 37 ...
#>  $ math  : num [1:200] 41 41 44 42 40 42 46 40 33 46 ...

3.2.2 Ubah jadi faktor

library(dplyr)
students <- students %>% mutate(across(-c(read,write,math),as.factor))
students$prog2 <- relevel(students$prog, ref = "academic")
str(students)
#> tibble [200 × 7] (S3: tbl_df/tbl/data.frame)
#>  $ gender: Factor w/ 2 levels "female","male": 1 2 2 2 2 1 2 2 1 2 ...
#>  $ ses   : Factor w/ 3 levels "high","low","middle": 2 3 1 2 3 1 3 3 3 3 ...
#>  $ prog  : Factor w/ 3 levels "academic","general",..: 3 2 3 3 3 2 3 3 3 3 ...
#>  $ read  : num [1:200] 34 34 39 37 39 42 31 50 39 34 ...
#>  $ write : num [1:200] 35 33 39 37 31 36 36 31 41 37 ...
#>  $ math  : num [1:200] 41 41 44 42 40 42 46 40 33 46 ...
#>  $ prog2 : Factor w/ 3 levels "academic","general",..: 3 2 3 3 3 2 3 3 3 3 ...
table(students$ses, students$prog)
#>         
#>          academic general vocation
#>   high         42       9        7
#>   low          19      16       12
#>   middle       44      20       31
table(students$gender, students$prog)
#>         
#>          academic general vocation
#>   female       58      24       27
#>   male         47      21       23

3.2.3 Pemodelan

#install.packages("nnet")
library(nnet)
#> Warning: package 'nnet' was built under R version 4.2.3
logmultinom <- multinom(prog2 ~ ses + gender + write + read, data = students)
#> # weights:  21 (12 variable)
#> initial  value 219.722458 
#> iter  10 value 176.754587
#> final  value 174.725397 
#> converged
summary(logmultinom)
#> Call:
#> multinom(formula = prog2 ~ ses + gender + write + read, data = students)
#> 
#> Coefficients:
#>          (Intercept)    seslow sesmiddle gendermale
#> general     2.621831 1.0038426 0.5651588  0.1273914
#> vocation    6.505182 0.6239396 1.1539447 -0.3105237
#>                write        read
#> general  -0.02860308 -0.04730781
#> vocation -0.08243508 -0.07108839
#> 
#> Std. Errors:
#>          (Intercept)    seslow sesmiddle gendermale
#> general     1.434514 0.5323398 0.4713812  0.4137756
#> vocation    1.524572 0.6200276 0.5231819  0.4414783
#>               write       read
#> general  0.02686316 0.02480868
#> vocation 0.02793343 0.02752520
#> 
#> Residual Deviance: 349.4508 
#> AIC: 373.4508
z <- summary(logmultinom)$coefficients/summary(logmultinom)$standard.errors
# 2-tailed z test
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
#>           (Intercept)     seslow  sesmiddle gendermale
#> general  6.759775e-02 0.05933302 0.23055043  0.7581770
#> vocation 1.982164e-05 0.31426675 0.02741006  0.4818237
#>                write        read
#> general  0.286980200 0.056532815
#> vocation 0.003166173 0.009804037

3.2.4 Odds Ratio

exp(coef(logmultinom))
#>          (Intercept)   seslow sesmiddle gendermale
#> general      13.7609 2.728747  1.759727   1.135862
#> vocation    668.5973 1.866266  3.170676   0.733063
#>              write      read
#> general  0.9718021 0.9537938
#> vocation 0.9208712 0.9313796

3.2.5 Multikolineratitas

library(car)
vif(logmultinom)
#> Warning in vif.default(logmultinom): No intercept: vifs may
#> not be sensible.
#>             GVIF Df GVIF^(1/(2*Df))
#> ses     6.640420  2        1.605273
#> gender  2.650955  1        1.628175
#> write  66.396002  1        8.148374
#> read   53.940932  1        7.344449

3.2.6 Akurasi

df <- students[,c("ses","gender","write","read")]
#install.packages("caret")
library(caret)
#> Warning: package 'caret' was built under R version 4.2.3
#> Loading required package: ggplot2
#> Warning: package 'ggplot2' was built under R version 4.2.3
#> Loading required package: lattice
#> Warning: package 'lattice' was built under R version 4.2.3
prediksi <- predict(logmultinom, df, type = "class")
confusionMatrix(as.factor(prediksi), 
                students$prog2)
#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction academic general vocation
#>   academic       90      25       21
#>   general         3       7        4
#>   vocation       12      13       25
#> 
#> Overall Statistics
#>                                          
#>                Accuracy : 0.61           
#>                  95% CI : (0.5387, 0.678)
#>     No Information Rate : 0.525          
#>     P-Value [Acc > NIR] : 0.009485       
#>                                          
#>                   Kappa : 0.3094         
#>                                          
#>  Mcnemar's Test P-Value : 1.959e-05      
#> 
#> Statistics by Class:
#> 
#>                      Class: academic Class: general
#> Sensitivity                   0.8571         0.1556
#> Specificity                   0.5158         0.9548
#> Pos Pred Value                0.6618         0.5000
#> Neg Pred Value                0.7656         0.7957
#> Prevalence                    0.5250         0.2250
#> Detection Rate                0.4500         0.0350
#> Detection Prevalence          0.6800         0.0700
#> Balanced Accuracy             0.6865         0.5552
#>                      Class: vocation
#> Sensitivity                   0.5000
#> Specificity                   0.8333
#> Pos Pred Value                0.5000
#> Neg Pred Value                0.8333
#> Prevalence                    0.2500
#> Detection Rate                0.1250
#> Detection Prevalence          0.2500
#> Balanced Accuracy             0.6667

3.2.7 Kebaikan Model

logmultinom0 <- multinom(prog2 ~ 1, data = students)
#> # weights:  6 (2 variable)
#> initial  value 219.722458 
#> final  value 204.096674 
#> converged
#install.packages("lmtest")
library(lmtest)
#> Loading required package: zoo
#> Warning: package 'zoo' was built under R version 4.2.3
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
lrtest(logmultinom0,logmultinom)
#> Likelihood ratio test
#> 
#> Model 1: prog2 ~ 1
#> Model 2: prog2 ~ ses + gender + write + read
#>   #Df  LogLik Df  Chisq Pr(>Chisq)    
#> 1   2 -204.10                         
#> 2  12 -174.72 10 58.743  6.263e-09 ***
#> ---
#> Signif. codes:  
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.3 Regresi Logistik Ordinal

3.3.1 Data

crash <- read.csv("Data/crash.csv")
head(crash,10)
#>    Gender Location SeatBelt Respon
#> 1  Female    Urban      Yes      1
#> 2    Male    Urban      Yes      1
#> 3    Male    Urban       No      1
#> 4  Female    Urban       No      1
#> 5    Male    Rural      Yes      1
#> 6  Female    Rural      Yes      1
#> 7    Male    Rural       No      1
#> 8  Female    Rural       No      1
#> 9  Female    Urban       No      3
#> 10 Female    Rural       No      3
library(dplyr)
crash <- crash %>% mutate(across(-c(Respon),as.factor))
str(crash)
#> 'data.frame':    80 obs. of  4 variables:
#>  $ Gender  : Factor w/ 2 levels "Female","Male": 1 2 2 1 2 1 2 1 1 1 ...
#>  $ Location: Factor w/ 2 levels "Rural","Urban": 2 2 2 2 1 1 1 1 2 1 ...
#>  $ SeatBelt: Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 1 1 1 1 ...
#>  $ Respon  : int  1 1 1 1 1 1 1 1 3 3 ...
crash$Respon <- ordered(crash$Respon, levels=c("1","2","3","4","5"))
str(crash)
#> 'data.frame':    80 obs. of  4 variables:
#>  $ Gender  : Factor w/ 2 levels "Female","Male": 1 2 2 1 2 1 2 1 1 1 ...
#>  $ Location: Factor w/ 2 levels "Rural","Urban": 2 2 2 2 1 1 1 1 2 1 ...
#>  $ SeatBelt: Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 1 1 1 1 ...
#>  $ Respon  : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 1 1 1 1 3 3 ...

3.3.2 Pemodelan

#install.packages("MASS")
library(MASS)
#> Warning: package 'MASS' was built under R version 4.2.3
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
orderlog <- polr(Respon~., method='logistic',data=crash)
summary(orderlog)
#> 
#> Re-fitting to get Hessian
#> Call:
#> polr(formula = Respon ~ ., data = crash, method = "logistic")
#> 
#> Coefficients:
#>                  Value Std. Error t value
#> GenderMale    -0.05369     0.3974 -0.1351
#> LocationUrban  0.05661     0.3958  0.1430
#> SeatBeltYes   -0.31102     0.3974 -0.7827
#> 
#> Intercepts:
#>     Value   Std. Error t value
#> 1|2 -1.5425  0.4450    -3.4664
#> 2|3 -0.5523  0.4060    -1.3603
#> 3|4  0.2649  0.3966     0.6678
#> 4|5  1.2472  0.4264     2.9249
#> 
#> Residual Deviance: 256.8444 
#> AIC: 270.8444

3.3.3 Odds Ratio

koefisien<-coef(summary(orderlog)) 
#> 
#> Re-fitting to get Hessian
exp(koefisien[,1])
#>    GenderMale LocationUrban   SeatBeltYes           1|2 
#>     0.9477303     1.0582414     0.7327004     0.2138542 
#>           2|3           3|4           4|5 
#>     0.5756362     1.3032362     3.4805710
# menghitung pvalue
p <- pnorm(abs(koefisien[,"t value"]), lower.tail = FALSE)*2
(ctabel<-cbind(round(koefisien,2), "pvalue"=round(p,3))) 
#>               Value Std. Error t value pvalue
#> GenderMale    -0.05       0.40   -0.14  0.893
#> LocationUrban  0.06       0.40    0.14  0.886
#> SeatBeltYes   -0.31       0.40   -0.78  0.434
#> 1|2           -1.54       0.44   -3.47  0.001
#> 2|3           -0.55       0.41   -1.36  0.174
#> 3|4            0.26       0.40    0.67  0.504
#> 4|5            1.25       0.43    2.92  0.003

3.3.4 Multikolineratitas

library(car)
vif(orderlog)
#> 
#> Re-fitting to get Hessian
#>   Gender Location SeatBelt 
#> 1.002035 1.001265 1.001814

3.3.5 Akurasi

df <- crash[,1:3]
prediksi <- predict(orderlog, df, type = "class")
confusionMatrix(as.factor(prediksi), 
                crash$Respon)
#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction  1  2  3  4  5
#>          1 10  8  7  7  8
#>          2  0  0  0  0  0
#>          3  0  0  0  0  0
#>          4  0  0  0  0  0
#>          5  6  8  9  9  8
#> 
#> Overall Statistics
#>                                           
#>                Accuracy : 0.225           
#>                  95% CI : (0.1391, 0.3321)
#>     No Information Rate : 0.2             
#>     P-Value [Acc > NIR] : 0.3292          
#>                                           
#>                   Kappa : 0.0312          
#>                                           
#>  Mcnemar's Test P-Value : NA              
#> 
#> Statistics by Class:
#> 
#>                      Class: 1 Class: 2 Class: 3 Class: 4
#> Sensitivity            0.6250      0.0      0.0      0.0
#> Specificity            0.5312      1.0      1.0      1.0
#> Pos Pred Value         0.2500      NaN      NaN      NaN
#> Neg Pred Value         0.8500      0.8      0.8      0.8
#> Prevalence             0.2000      0.2      0.2      0.2
#> Detection Rate         0.1250      0.0      0.0      0.0
#> Detection Prevalence   0.5000      0.0      0.0      0.0
#> Balanced Accuracy      0.5781      0.5      0.5      0.5
#>                      Class: 5
#> Sensitivity               0.5
#> Specificity               0.5
#> Pos Pred Value            0.2
#> Neg Pred Value            0.8
#> Prevalence                0.2
#> Detection Rate            0.1
#> Detection Prevalence      0.5
#> Balanced Accuracy         0.5

3.3.6 Kebaikan Model

orderlog0 <-polr(Respon~1, method = "logistic", data = crash)
#install.packages("lmtest")
library(lmtest)
lrtest(orderlog0,orderlog)
#> Likelihood ratio test
#> 
#> Model 1: Respon ~ 1
#> Model 2: Respon ~ Gender + Location + SeatBelt
#>   #Df  LogLik Df  Chisq Pr(>Chisq)
#> 1   4 -128.75                     
#> 2   7 -128.42  3 0.6657     0.8813