Code for K-Fold Cross Validation using Scikit-Learn in Python Tutorial


View on Github

kfold_cross_validation.py

# -*- coding: utf-8 -*-
"""CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/15FFmKBlvdAFCP4-Ka2SoFsWC93PjdxJH
"""

# Load libraries
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# digits dataset loading
digits = datasets.load_digits()
# Create features matrix
features = digits.data
# Create target vector
target = digits.target

# standardization
standard_scaler = StandardScaler()
# logistic regression creation
logit = LogisticRegression()

# pipeline creation for standardization and performing logistic regression
pipeline = make_pipeline(standard_scaler, logit)
# perform k-Fold cross-validation
kf = KFold(n_splits=11, shuffle=True, random_state=2)
# k-fold cross-validation conduction
cv_results = cross_val_score(pipeline, # Pipeline
                             features, # Feature matrix
                             target, # Target vector
                             cv=kf, # Cross-validation technique
                             scoring="accuracy", # Loss function
                             n_jobs=-1) # Use all CPU cores
# View score for all 11 folds
cv_results

# Calculate mean
cv_results.mean()