Skip to content

Add Quantum k-Means Clustering Implementation #11664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

111 changes: 111 additions & 0 deletions quantum/quantum_kmeans_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import cirq
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler


def generate_data(n_samples=100, n_features=2, n_clusters=2):

Check failure on line 8 in quantum/quantum_kmeans_clustering.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

quantum/quantum_kmeans_clustering.py:1:1: I001 Import block is un-sorted or un-formatted
data, labels = make_blobs(
n_samples=n_samples, centers=n_clusters, n_features=n_features, random_state=42
)
return MinMaxScaler().fit_transform(data), labels


def quantum_distance(point1, point2):
"""
Computes the quantum distance between two points.

:param point1: First point as a numpy array.
:param point2: Second point as a numpy array.
:return: Quantum distance between the two points.

>>> point_a = np.array([1.0, 2.0])
>>> point_b = np.array([1.5, 2.5])
>>> result = quantum_distance(point_a, point_b)
>>> assert isinstance(result, float)
"""
qubit = cirq.LineQubit(0)
diff = np.clip(np.linalg.norm(point1 - point2), 0, 1)
theta = 2 * np.arcsin(diff)

circuit = cirq.Circuit(cirq.ry(theta)(qubit), cirq.measure(qubit, key="result"))

result = cirq.Simulator().run(circuit, repetitions=1000)
return result.histogram(key="result").get(1, 0) / 1000


def initialize_centroids(data: np.ndarray, k: int) -> np.ndarray:
"""
Initializes centroids for k-means clustering.

:param data: The dataset from which to initialize centroids.
:param k: The number of centroids to initialize.
:return: An array of initialized centroids.

>>> data = np.array([[1, 2], [3, 4], [5, 6]])
>>> centroids = initialize_centroids(data, 2)
>>> assert centroids.shape == (2, 2)
"""
return data[np.random.choice(len(data), k, replace=False)]

Check failure on line 50 in quantum/quantum_kmeans_clustering.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (NPY002)

quantum/quantum_kmeans_clustering.py:50:17: NPY002 Replace legacy `np.random.choice` call with `np.random.Generator`


def assign_clusters(data, centroids):
clusters = [[] for _ in range(len(centroids))]
for point in data:
closest = min(
range(len(centroids)), key=lambda i: quantum_distance(point, centroids[i])
)
clusters[closest].append(point)
return clusters


def recompute_centroids(clusters):
return np.array([np.mean(cluster, axis=0) for cluster in clusters if cluster])


def quantum_kmeans(data, k, max_iters=10):
centroids = initialize_centroids(data, k)

for _ in range(max_iters):
clusters = assign_clusters(data, centroids)
new_centroids = recompute_centroids(clusters)
if np.allclose(new_centroids, centroids):
break
centroids = new_centroids

return centroids, clusters


# Main execution
n_samples, n_clusters = 10, 2
data, labels = generate_data(n_samples, n_clusters=n_clusters)

plt.figure(figsize=(12, 5))

plt.subplot(121)
plt.scatter(data[:, 0], data[:, 1], c=labels)
plt.title("Generated Data")

final_centroids, final_clusters = quantum_kmeans(data, n_clusters)

plt.subplot(122)
for i, cluster in enumerate(final_clusters):
cluster = np.array(cluster)
plt.scatter(cluster[:, 0], cluster[:, 1], label=f"Cluster {i+1}")
plt.scatter(
final_centroids[:, 0],
final_centroids[:, 1],
color="red",
marker="x",
s=200,
linewidths=3,
label="Centroids",
)
plt.title("Quantum k-Means Clustering with Cirq")
plt.legend()

plt.tight_layout()
plt.show()

print(f"Final Centroids:\n{final_centroids}")
Loading