Sign up to take part
Registered users can ask their own questions, contribute to discussions, and be part of the Community!
Registered users can ask their own questions, contribute to discussions, and be part of the Community!
Hello,
I'm actually trying to calculate the silhouette score for each of my clusters, any ideas?
I don't know if my logic is correct or there is faster way to do it since I'm pretty new to DSS and my solution is more coding-oriented. One way I can think of is to create a csv with the results [x1, x2, ..., xn, clusters] and then you can create a python recipe or notebook to calculate the silhouette score. Points should be a double with your features, clusters a list with the clusters values (0,1,2...) and you should also calculate the centroids for each cluster.
from typing import List
import numpy as np
def centroids(points: np.array, clusters: np.array)->List:
centroids = []
for i in range(3):
indices = np.where(clusters == i)
cluster_points = points[indices]
centroids.append(cluster_points.mean(axis=0))
return centroids
def euclidean_dist(x1: np.array, x2: np.array) -> float:
dist = np.sqrt(sum((x1 - x2)**2))
return dist
def silhouette_score(points:List, clusters:List, centroids:List) -> List:
if type(points) is list:
points = np.asarray(points)
clusters = np.asarray(clusters)
centroids = np.asarray(centroids)
silhouette_scores = []
no_clusters = clusters.max() + 1
for i in range(no_clusters):
# Calculate a(i)
indices = np.where(clusters == i)
cluster_points = points[indices]
dist = 0
for c in cluster_points:
dist += euclidean_dist(centroids[i], c)
a_i = dist/len(cluster_points)
# Calculate b(i)
dist = []
for c in centroids:
dist.append(euclidean_dist(centroids[i], c))
dist = np.asarray(dist)
closest_centroid = np.argsort(dist)[1]
indices = np.where(clusters == closest_centroid)
cluster_points = points[indices]
dist = 0
for c in cluster_points:
dist += euclidean_dist(centroids[i], c)
b_i = dist/len(cluster_points)
# Silhouette score of a single cluster
s_i = (b_i - a_i) / max(b_i, a_i)
silhouette_scores.append(s_i)
return silhouette_scores
.