common.py
848 Bytes
"""
Common utilities for testing clustering.
"""
import numpy as np
###############################################################################
# Generate sample data
def generate_clustered_data(seed=0, n_clusters=3, n_features=2,
n_samples_per_cluster=20, std=.4):
prng = np.random.RandomState(seed)
# the data is voluntary shifted away from zero to check clustering
# algorithm robustness with regards to non centered data
means = np.array([[1, 1, 1, 0],
[-1, -1, 0, 1],
[1, -1, 1, 1],
[-1, 1, 1, 0],
]) + 10
X = np.empty((0, n_features))
for i in range(n_clusters):
X = np.r_[X, means[i][:n_features]
+ std * prng.randn(n_samples_per_cluster, n_features)]
return X