relax skewness assumption (scikit-learn#7573)

Romain Brault · amueller · commit 15b7d47a2ff3 · 2017-05-18T14:17:10.000-04:00
relax skewedness assumption

relax skewedness assumption

relax skewedness assumption

relax skewedness assumption

relax skewedness assumption

last corrections

whats_new

merge

complying whats_new

removed unnecessary _assert_X

increased coverage

relax skewness assumption

relax skewedness assumption

relax skewedness assumption

relax skewedness assumption

relax skewedness assumption

relax skewedness assumption

last corrections

whats_new

merge

complying whats_new

removed unnecessary _assert_X

increased coverage

remove cythonize.dat and merge

simplify tests

simplify tests
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -87,6 +87,13 @@ Enhancements
      by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
      and :user:`Stephen Hoover <stephen-hoover>`.
 
+   - Relax assumption on the data for the ``SkewedChi2Sampler``. Since the
+     Skewed-Chi2 kernel is defined on the open interval :math: `(-skewedness;
+     +\infty)^d`, the transform function should not check whether X < 0 but
+     whether ``X < -self.skewedness``. (`#7573
+     <https://github.com/scikit-learn/scikit-learn/pull/7573>`_) by `Romain
+     Brault`_.
+
    - The ``min_weight_fraction_leaf`` constraint in tree construction is now
      more efficient, taking a fast path to declare a node a leaf if its weight
      is less than 2 * the minimum. Note that the constructed tree will be
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
@@ -185,7 +185,8 @@ def transform(self, X, y=None):
         ----------
         X : array-like, shape (n_samples, n_features)
             New data, where n_samples in the number of samples
-            and n_features is the number of features.
+            and n_features is the number of features. All values of X must be
+            strictly greater than "-skewedness".
 
         Returns
         -------
@@ -195,8 +196,9 @@ def transform(self, X, y=None):
 
         X = as_float_array(X, copy=True)
         X = check_array(X, copy=False)
-        if (X < 0).any():
-            raise ValueError("X may not contain entries smaller than zero.")
+        if (X <= -self.skewedness).any():
+            raise ValueError("X may not contain entries smaller than"
+                             " -skewedness.")
 
         X += self.skewedness
         np.log(X, X)
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
@@ -84,6 +84,11 @@ def test_skewed_chi2_sampler():
 
     # compute exact kernel
     c = 0.03
+    # set on negative component but greater than c to ensure that the kernel
+    # approximation is valid on the group (-c; +\infty) endowed with the skewed
+    # multiplication.
+    Y[0, 0] = -c / 2.
+
     # abbreviations for easier formula
     X_c = (X + c)[:, np.newaxis, :]
     Y_c = (Y + c)[np.newaxis, :, :]
@@ -103,10 +108,14 @@ def test_skewed_chi2_sampler():
 
     kernel_approx = np.dot(X_trans, Y_trans.T)
     assert_array_almost_equal(kernel, kernel_approx, 1)
+    assert_true(np.isfinite(kernel).all(),
+                'NaNs found in the Gram matrix')
+    assert_true(np.isfinite(kernel_approx).all(),
+                'NaNs found in the approximate Gram matrix')
 
-    # test error is raised on negative input
+    # test error is raised on when inputs contains values smaller than -c
     Y_neg = Y.copy()
-    Y_neg[0, 0] = -1
+    Y_neg[0, 0] = -c * 2.
     assert_raises(ValueError, transform.transform, Y_neg)