# Gerko Vink
# 

Load the data

load("challenge/challenge_data.RData")

Your solution

library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-3
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
set.seed(123)
idx <- createDataPartition(cancer_df$disease, p = .8, list = FALSE)
train <- cancer_df[idx, ]
test <- cancer_df[-idx, ]

X <- train %>% select(-sample, -disease) %>% as.matrix()
Y <- train %>% pull(disease)

cv <- cv.glmnet(X, Y, family = "binomial", alpha = .9)
cv
## 
## Call:  cv.glmnet(x = X, y = Y, family = "binomial", alpha = 0.9) 
## 
## Measure: Binomial Deviance 
## 
##      Lambda Index Measure      SE Nonzero
## min 0.01534    72  0.3969 0.06508      70
## 1se 0.05140    46  0.4561 0.05503      40
new <- test %>% select(-sample, -disease) %>% as.matrix()
pred <- predict(cv, s = "lambda.1se", newx = new, type = "class")
postResample(pred, test$disease)
##  Accuracy     Kappa 
## 0.9361702 0.8721668
confusionMatrix(as.factor(pred), test$disease)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction normal tumor
##     normal     21     1
##     tumor       2    23
##                                           
##                Accuracy : 0.9362          
##                  95% CI : (0.8246, 0.9866)
##     No Information Rate : 0.5106          
##     P-Value [Acc > NIR] : 2.926e-10       
##                                           
##                   Kappa : 0.8722          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9130          
##             Specificity : 0.9583          
##          Pos Pred Value : 0.9545          
##          Neg Pred Value : 0.9200          
##              Prevalence : 0.4894          
##          Detection Rate : 0.4468          
##    Detection Prevalence : 0.4681          
##       Balanced Accuracy : 0.9357          
##                                           
##        'Positive' Class : normal          
##