Code for Clustering Algorithms in Machine Learning with Python Tutorial

kmeans_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import KMeans
from matplotlib import pyplot

# 2 features, 2 informative, 0 redundant, 1 cluster per class
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10) 

# 2 clusters
m = KMeans(n_clusters=2) 
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('K-means (No. of Clusters = 3)')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

minibatch_kmeans_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MiniBatchKMeans
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# 3 clusters
m = MiniBatchKMeans(n_clusters=3) 
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Mini Batch K-means')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

time_diff_minibatch_and_kmeans.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
from matplotlib import pyplot
import timeit

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# start timer for Mini Batch K-Means
t1_mkm = timeit.default_timer() 
m = MiniBatchKMeans(n_clusters=2)
m.fit(X)
p = m.predict(X)
# stop timer for Mini Batch K-Means
t2_mkm = timeit.default_timer()
# start timer for K-Means
t1_km = timeit.default_timer()
m = KMeans(n_clusters=2)
m.fit(X)
p = m.predict(X)
# stop timer for K-Means
t2_km = timeit.default_timer()
# print time difference
print("Time difference between Mini Batch K-Means and K-Means = ",
      (t2_km-t1_km)-(t2_mkm-t1_mkm))

affinity_propagation.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import AffinityPropagation
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)

# initialize the model
m = AffinityPropagation(damping=0.9)
# fit the model
m.fit(X)
# predict the cluster for each data point
p = m.predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Affinity Propagation Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

dbscan_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import DBSCAN
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model
m = DBSCAN(eps=0.05, min_samples=10)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('DBSCAN Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

optics.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import OPTICS
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)

# init the model
m = OPTICS(eps=0.5, min_samples=10)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('OPTICS Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

birch.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import Birch
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 2 clusters
m = Birch(threshold=0.05, n_clusters=2)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Birch Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

agglomerative_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import AgglomerativeClustering
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 3 clusters
m = AgglomerativeClustering(n_clusters=3)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X) 
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Agglomerative Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

meanshift_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import MeanShift
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model
m = MeanShift()
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Mean Shift Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

spectral_clustering.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.cluster import SpectralClustering
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 3 clusters
m = SpectralClustering(n_clusters=3)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Spectral Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

gmm.py

import numpy as np
from sklearn.datasets import make_classification
from sklearn.mixture import GaussianMixture
from matplotlib import pyplot

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2,
                           n_redundant=0, n_clusters_per_class=1, random_state=10)
# init the model with 2 components
m = GaussianMixture(n_components=2)
# predict the cluster for each data point after fitting the model
p = m.fit_predict(X)
# unique clusters
cl = np.unique(p)
# plot the data points and cluster centers
for c in cl:
    r = np.where(c == p)
    pyplot.title('Gaussian Mixture Clustering')
    pyplot.scatter(X[r, 0], X[r, 1])
# show the plot
pyplot.show()

metrics.py

from sklearn import metrics

y_true = [5, 3, 5, 4, 4, 5]
y_pred = [3, 5, 5, 4, 3, 4]
# homogeneity: each cluster contains only members of a single class.
print(metrics.homogeneity_score(y_true, y_pred))
# completeness: all members of a given class are assigned to the same cluster.
print(metrics.completeness_score(y_true, y_pred))
# v-measure: harmonic mean of homogeneity and completeness
print(metrics.v_measure_score(y_true, y_pred))

Ethical Hacking with Python EBook - Topic - Top

New Tutorials

Building a Full-Stack RAG Chatbot with FastAPI, OpenAI, and Streamlit

How to Recover Deleted Files with Python

How to Use Python to Track Google Search Results and Reviews Over Time

YouTube Video Transcription Summarization with Python

Getting Started with Python for SaaS Applications

Code for Clustering Algorithms in Machine Learning with Python Tutorial

Tags

New Tutorials

Popular Tutorials