Add tests and documentation for column feature

dukebody · dukebody · commit 79a79f1e033d · 2017-04-08T17:29:29.000+02:00
diff --git a/README.rst b/README.rst
@@ -103,6 +103,18 @@ Now that the transformation is trained, we confirm that it works on new data::
     array([[ 1.  ,  0.  ,  0.  ,  1.04]])
 
 
+Output features names
+*********************
+
+In certain cases, like when studying the feature importances for some model,
+we want to be able to associate the original features to the ones generated by
+the dataframe mapper. We can do so by inspecting the automatically generated
+ ``transformed_names_`` attribute of the mapper after transformation::
+
+    >>> mapper.transformed_names_
+    ['pet_cat', 'pet_dog', 'pet_fish', 'children']
+
+
 Outputting a dataframe
 **********************
 
@@ -123,6 +135,9 @@ By default the output of the dataframe mapper is a numpy array. This is so becau
     6      1.0      0.0       0.0      1.04
     7      0.0      0.0       1.0      0.21
 
+The names for the columns are the same ones present in the ``transformed_names_``
+attribute.
+
 Note this does not work together with the ``default=True`` or ``sparse=True`` arguments to the mapper.
 
 Transform Multiple Columns
@@ -252,6 +267,11 @@ Sklearn-pandas' ``cross_val_score`` function provides exactly the same interface
 Changelog
 ---------
 
+Development
+***********
+* Capture output columns generated names in ``transformed_names_`` attribute (#78).
+
+
 1.3.0 (2017-01-21)
 ******************
 
@@ -308,5 +328,6 @@ Other contributors:
 * Jeremy Howard
 * Olivier Grisel
 * Paul Butler
+* Ritesh Agrawal
 * Vitaley Zaretskey
 * Zac Stewart
diff --git a/tests/test_dataframe_mapper.py b/tests/test_dataframe_mapper.py
@@ -96,6 +96,28 @@ def complex_dataframe():
                          'feat2': [1, 2, 3, 2, 3, 4]})
 
 
+def test_transformed_names_simple(simple_dataframe):
+    """
+    Get transformed names of features in `transformed_names` attribute
+    for simple transformation
+    """
+    df = simple_dataframe
+    mapper = DataFrameMapper([('a', None)])
+    mapper.fit_transform(df)
+    assert mapper.transformed_names_ == ['a']
+
+
+def test_transformed_names_binarizer(complex_dataframe):
+    """
+    Get transformed names of features in `transformed_names` attribute
+    for a transformation that multiplies the number of columns
+    """
+    df = complex_dataframe
+    mapper = DataFrameMapper([('target', LabelBinarizer())])
+    mapper.fit_transform(df)
+    mapper.transformed_names_ == ['target_a', 'target_b']
+
+
 def test_simple_df(simple_dataframe):
     """
     Get a dataframe from a simple mapped dataframe