Your solution
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-3
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(123)
idx <- createDataPartition(cancer_df$disease, p = .8, list = FALSE)
train <- cancer_df[idx, ]
test <- cancer_df[-idx, ]
X <- train %>% select(-sample, -disease) %>% as.matrix()
Y <- train %>% pull(disease)
cv <- cv.glmnet(X, Y, family = "binomial", alpha = .9)
cv
##
## Call: cv.glmnet(x = X, y = Y, family = "binomial", alpha = 0.9)
##
## Measure: Binomial Deviance
##
## Lambda Index Measure SE Nonzero
## min 0.01534 72 0.3969 0.06508 70
## 1se 0.05140 46 0.4561 0.05503 40
new <- test %>% select(-sample, -disease) %>% as.matrix()
pred <- predict(cv, s = "lambda.1se", newx = new, type = "class")
postResample(pred, test$disease)
## Accuracy Kappa
## 0.9361702 0.8721668
confusionMatrix(as.factor(pred), test$disease)
## Confusion Matrix and Statistics
##
## Reference
## Prediction normal tumor
## normal 21 1
## tumor 2 23
##
## Accuracy : 0.9362
## 95% CI : (0.8246, 0.9866)
## No Information Rate : 0.5106
## P-Value [Acc > NIR] : 2.926e-10
##
## Kappa : 0.8722
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9130
## Specificity : 0.9583
## Pos Pred Value : 0.9545
## Neg Pred Value : 0.9200
## Prevalence : 0.4894
## Detection Rate : 0.4468
## Detection Prevalence : 0.4681
## Balanced Accuracy : 0.9357
##
## 'Positive' Class : normal
##