Project \epsilon -insensitive Linkage

Project \( \epsilon \) -insensitive Linkage#

This projects start with Mr. Saeedi in 1403 in PR course Contact: Telegram :@amirahs2000

Saeedi_1403

This project is worth 2 points out of 20

Complete with details and corrections in native English. Additionally, include any necessary innovations for improvement.

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import pdist
# define custom epsilon-insensitive distance metric function
def epsilon_insensitive_distance(u, v, epsilon):
  return np.maximum(0, np.abs(u - v) - epsilon)
# Generate synthetic data with outliers
X, _ = make_blobs(n_samples=30, centers=3, cluster_std=1.0, random_state=42)
outliers = np.array([[10, 10], [12, 12]])
X = np.vstack([X, outliers])

# define epsilon value
epsilon = 1
# plot the data
plt.figure(figsize=(9, 7))
plt.scatter(X[:, 0], X[:, 1], s=50)

# annotate each datapoint with its index
for i, (x, y) in enumerate(X):
  plt.annotate(i, (x, y), textcoords='offset points', xytext=(5, 5), ha='center')

plt.title('Data with Outliers')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(True)
plt.show()
../../_images/6a8492b3ee0b5a5996873e47a222eb27d88c03f4e699a5f22f576393c3bc075e.png
# calculate pairwise distance using the custom epsilon-insensitive distance metric
custom_distance = pdist(X, lambda u, v: epsilon_insensitive_distance(u, v, epsilon).sum())
# perform hierarchical clustering using the custom distance metric
custom_linkage = linkage(custom_distance, method='single')
# plot dendrogram
dendrogram(custom_linkage)
# show plot
plt.show()
../../_images/c874f16bc6cba707b0898ad3d0bc4cf06240ba91eb2d75e84d197ff299dd72f2.png
# use square form loss function
euclidean_distance = pdist(X, lambda u, v: np.sqrt(((u-v)**2).sum()))
# perform hierarchical clustering using the euclidean distance metric
common_linkage = linkage(euclidean_distance, method='single')
# plot dendrogram
dendrogram(common_linkage)
# show plot
plt.show()
../../_images/18fa465f2c51e99a02cd84d7bd848f1cbd547959347220edbbd02f45724081ef.png