kfold_cross_validation.py
# -*- coding: utf-8 -*-
"""CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/15FFmKBlvdAFCP4-Ka2SoFsWC93PjdxJH
"""
# Load libraries
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
# digits dataset loading
digits = datasets.load_digits()
# Create features matrix
features = digits.data
# Create target vector
target = digits.target
# standardization
standard_scaler = StandardScaler()
# logistic regression creation
logit = LogisticRegression()
# pipeline creation for standardization and performing logistic regression
pipeline = make_pipeline(standard_scaler, logit)
# perform k-Fold cross-validation
kf = KFold(n_splits=11, shuffle=True, random_state=2)
# k-fold cross-validation conduction
cv_results = cross_val_score(pipeline, # Pipeline
features, # Feature matrix
target, # Target vector
cv=kf, # Cross-validation technique
scoring="accuracy", # Loss function
n_jobs=-1) # Use all CPU cores
# View score for all 11 folds
cv_results
# Calculate mean
cv_results.mean()