-
-
Notifications
You must be signed in to change notification settings - Fork 46.9k
Add quantum/breast_cancer.py #2983
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
3b09d12
9743b92
fccec22
5475279
3a2aa88
1a5ae93
02fac3f
3e15438
2f75396
577843f
58dc11d
fb24067
38b4fe4
7607eae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import numpy as np | ||
from sklearn import datasets | ||
from sklearn.decomposition import PCA | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.preprocessing import MinMaxScaler, StandardScaler | ||
|
||
|
||
def breast_cancer(training_size, test_size, n, plot_data=False): | ||
"""returns breast cancer dataset | ||
|
||
Wikipedia reference: https://en.m.wikipedia.org/wiki/Breast_cancer | ||
|
||
>>> breast_cancer(10, 4, 7) | ||
24.9 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function return four values: |
||
""" | ||
class_labels = [r"A", r"B"] | ||
data, target = datasets.load_breast_cancer(return_X_y=True) | ||
sample_train, sample_test, label_train, label_test = train_test_split( | ||
data, target, test_size=0.3, random_state=12 | ||
) | ||
|
||
# Now we standardize for gaussian around 0 with unit variance | ||
std_scale = StandardScaler().fit(sample_train) | ||
sample_train = std_scale.transform(sample_train) | ||
sample_test = std_scale.transform(sample_test) | ||
|
||
# Now reduce number of features to number of qubits | ||
pca = PCA(n_components=n).fit(sample_train) | ||
sample_train = pca.transform(sample_train) | ||
sample_test = pca.transform(sample_test) | ||
|
||
# Scale to the range (-1,+1) | ||
samples = np.append(sample_train, sample_test, axis=0) | ||
minmax_scale = MinMaxScaler((-1, 1)).fit(samples) | ||
sample_train = minmax_scale.transform(sample_train) | ||
sample_test = minmax_scale.transform(sample_test) | ||
|
||
# Pick training size number of samples from each distro | ||
training_input = { | ||
key: (sample_train[label_train == k, :])[:training_size] | ||
for k, key in enumerate(class_labels) | ||
} | ||
test_input = { | ||
key: (sample_test[label_test == k, :])[:test_size] | ||
for k, key in enumerate(class_labels) | ||
} | ||
|
||
if plot_data: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Algorithmic functions should not print() or plot as discussed in CONTRIBUTING.md. So this function should return values an allow the caller to catch the exception, print(), and/or plot. |
||
try: | ||
import matplotlib.pyplot as plt | ||
except ImportError as e: | ||
print(e) | ||
for k in range(0, 2): | ||
plt.scatter( | ||
sample_train[label_train == k, 0][:training_size], | ||
sample_train[label_train == k, 1][:training_size], | ||
) | ||
|
||
plt.title("PCA dim. reduced Breast cancer dataset") | ||
plt.show() | ||
|
||
return sample_train, training_input, test_input, class_labels |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs type hints and doctests as discussed in CONTRIBUTING.md.