pandas-dev
diff --git a/‎doc/source/io.rst
+8-6 b/‎doc/source/io.rst
+8-6
diff --git a/‎doc/source/release.rst
+2 b/‎doc/source/release.rst
+2
diff --git a/‎doc/source/v0.13.0.txt
+3 b/‎doc/source/v0.13.0.txt
+3
diff --git a/‎pandas/io/tests/test_parsers.py
+114-37 b/‎pandas/io/tests/test_parsers.py
+114-37
@@ -118,9 +118,11 @@ They can take a number of arguments:
     date_converters.py
   - ``dayfirst``: if True then uses the DD/MM international/European date format
     (This is False by default)
-  - ``thousands``: sepcifies the thousands separator. If not None, then parser
-    will try to look for it in the output and parse relevant data to integers.
-    Because it has to essentially scan through the data again, this causes a
+  - ``thousands``: specifies the thousands separator. If not None, this character will
+    be stripped from numeric dtypes. However, if it is the first character in a field,
+    that column will be imported as a string. In the PythonParser, if not None,
+    then parser will try to look for it in the output and parse relevant data to numeric
+    dtypes. Because it has to essentially scan through the data again, this causes a
     significant performance hit so only use if necessary.
   - ``lineterminator`` : string (length 1), default ``None``, Character to break file into lines. Only valid with C parser
   - ``quotechar`` : string, The character to used to denote the start and end of a quoted item.
@@ -506,8 +508,8 @@ DD/MM/YYYY instead. For convenience, a ``dayfirst`` keyword is provided:
 
 Thousand Separators
 ~~~~~~~~~~~~~~~~~~~
-For large integers that have been written with a thousands separator, you can
-set the ``thousands`` keyword to ``True`` so that integers will be parsed
+For large numbers that have been written with a thousands separator, you can
+set the ``thousands`` keyword to a string of length 1 so that integers will be parsed
 correctly:
 
 .. ipython:: python
@@ -521,7 +523,7 @@ correctly:
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
 
-By default, integers with a thousands separator will be parsed as strings
+By default, numbers with a thousands separator will be parsed as strings
 
 .. ipython:: python
 
 
@@ -273,6 +273,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
   - Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`)
   - Fix assignment with iloc/loc involving a dtype change in an existing column (:issue:`4312`)
     have internal setitem_with_indexer in core/indexing to use Block.setitem
+  - Fixed bug where thousands operator was not handled correctly for floating point numbers
+    in csv_import (:issue:`4322`)
 
 pandas 0.12
 ===========
 
@@ -286,6 +286,9 @@ Bug Fixes
 
   - Suppressed DeprecationWarning associated with internal calls issued by repr() (:issue:`4391`)
 
+  - Fixed bug where thousands operator was not handled correctly for floating point numbers
+    in csv_import (:issue:`4322`)
+
 See the :ref:`full release notes
 <release>` or issue tracker
 on GitHub for a complete list.
@@ -13,17 +13,13 @@
 
 from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex
 from pandas.compat import(
-    StringIO, BytesIO, PY3, range, long, lrange, lmap, u, map, StringIO
+    StringIO, BytesIO, PY3, range, long, lrange, lmap, u
 )
 from pandas.io.common import urlopen, URLError
 import pandas.io.parsers as parsers
 from pandas.io.parsers import (read_csv, read_table, read_fwf,
                                TextFileReader, TextParser)
-from pandas.util.testing import (assert_almost_equal,
-                                 assert_series_equal,
-                                 makeCustomDataframe as mkdf,
-                                 network,
-                                 ensure_clean)
+
 import pandas.util.testing as tm
 import pandas as pd
 
@@ -67,6 +63,35 @@ def setUp(self):
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
 
+    def test_multi_character_decimal_marker(self):
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        self.assertRaises(ValueError, read_csv, StringIO(data), decimal=',,')
+
+    def test_empty_decimal_marker(self):
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        self.assertRaises(ValueError, read_csv, StringIO(data), decimal='')
+
+    def test_empty_thousands_marker(self):
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        self.assertRaises(ValueError, read_csv, StringIO(data), thousands='')
+
+
+    def test_multi_character_decimal_marker(self):
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        self.assertRaises(ValueError, read_csv, StringIO(data), thousands=',,')
+
     def test_empty_string(self):
         data = """\
 One,Two,Three
@@ -164,14 +189,48 @@ def test_1000_sep(self):
 1|2,334|5
 10|13|10.
 """
-        expected = [[1, 2334., 5],
-                    [10, 13, 10]]
+        expected = DataFrame({
+            'A': [1, 10],
+            'B': [2334, 13],
+            'C': [5, 10.]
+        })
 
         df = self.read_csv(StringIO(data), sep='|', thousands=',')
-        assert_almost_equal(df.values, expected)
+        tm.assert_frame_equal(df, expected)
 
         df = self.read_table(StringIO(data), sep='|', thousands=',')
-        assert_almost_equal(df.values, expected)
+        tm.assert_frame_equal(df, expected)
+
+    def test_1000_sep_with_decimal(self):
+        data = """A|B|C
+1|2,334.01|5
+10|13|10.
+"""
+        expected = DataFrame({
+            'A': [1, 10],
+            'B': [2334.01, 13],
+            'C': [5, 10.]
+        })
+
+        tm.assert_equal(expected.A.dtype, 'int64')
+        tm.assert_equal(expected.B.dtype, 'float')
+        tm.assert_equal(expected.C.dtype, 'float')
+
+        df = self.read_csv(StringIO(data), sep='|', thousands=',', decimal='.')
+        tm.assert_frame_equal(df, expected)
+
+        df = self.read_table(StringIO(data), sep='|', thousands=',', decimal='.')
+        tm.assert_frame_equal(df, expected)
+
+        data_with_odd_sep = """A|B|C
+1|2.334,01|5
+10|13|10,
+"""
+        df = self.read_csv(StringIO(data_with_odd_sep), sep='|', thousands='.', decimal=',')
+        tm.assert_frame_equal(df, expected)
+
+        df = self.read_table(StringIO(data_with_odd_sep), sep='|', thousands='.', decimal=',')
+        tm.assert_frame_equal(df, expected)
 
     def test_squeeze(self):
         data = """\
@@ -183,7 +242,7 @@ def test_squeeze(self):
         result = self.read_table(StringIO(data), sep=',', index_col=0,
                                  header=None, squeeze=True)
         tm.assert_isinstance(result, Series)
-        assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
     def test_inf_parsing(self):
         data = """\
@@ -201,9 +260,9 @@ def test_inf_parsing(self):
         inf = float('inf')
         expected = Series([inf, -inf] * 5)
         df = read_csv(StringIO(data), index_col=0)
-        assert_almost_equal(df['A'].values, expected.values)
+        tm.assert_almost_equal(df['A'].values, expected.values)
         df = read_csv(StringIO(data), index_col=0, na_filter=False)
-        assert_almost_equal(df['A'].values, expected.values)
+        tm.assert_almost_equal(df['A'].values, expected.values)
 
     def test_multiple_date_col(self):
         # Can use multiple date parsers
@@ -524,7 +583,7 @@ def test_passing_dtype(self):
 
         df = DataFrame(np.random.rand(5,2),columns=list('AB'),index=['1A','1B','1C','1D','1E'])
 
-        with ensure_clean('__passing_str_as_dtype__.csv') as path:
+        with tm.ensure_clean('__passing_str_as_dtype__.csv') as path:
             df.to_csv(path)
 
             # GH 3795
@@ -566,7 +625,7 @@ def test_quoting(self):
 
     def test_non_string_na_values(self):
         # GH3611, na_values that are not a string are an issue
-        with ensure_clean('__non_string_na_values__.csv') as path:
+        with tm.ensure_clean('__non_string_na_values__.csv') as path:
             df = DataFrame({'A' : [-999, 2, 3], 'B' : [1.2, -999, 4.5]})
             df.to_csv(path, sep=' ', index=False)
             result1 = read_csv(path, sep= ' ', header=0, na_values=['-999.0','-999'])
@@ -617,15 +676,15 @@ def test_custom_na_values(self):
                     [7, 8, nan]]
 
         df = self.read_csv(StringIO(data), na_values=['baz'], skiprows=[1])
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
 
         df2 = self.read_table(StringIO(data), sep=',', na_values=['baz'],
                               skiprows=[1])
-        assert_almost_equal(df2.values, expected)
+        tm.assert_almost_equal(df2.values, expected)
 
         df3 = self.read_table(StringIO(data), sep=',', na_values='baz',
                               skiprows=[1])
-        assert_almost_equal(df3.values, expected)
+        tm.assert_almost_equal(df3.values, expected)
 
     def test_nat_parse(self):
 
@@ -635,7 +694,7 @@ def test_nat_parse(self):
                     'B' : pd.Timestamp('20010101') }))
         df.iloc[3:6,:] = np.nan
 
-        with ensure_clean('__nat_parse_.csv') as path:
+        with tm.ensure_clean('__nat_parse_.csv') as path:
             df.to_csv(path)
             result = read_csv(path,index_col=0,parse_dates=['B'])
             tm.assert_frame_equal(result,df)
@@ -686,7 +745,7 @@ def test_detect_string_na(self):
                     [nan, nan]]
 
         df = self.read_csv(StringIO(data))
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
 
     def test_unnamed_columns(self):
         data = """A,B,C,,
@@ -698,7 +757,7 @@ def test_unnamed_columns(self):
                     [6, 7, 8, 9, 10],
                     [11, 12, 13, 14, 15]]
         df = self.read_table(StringIO(data), sep=',')
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
         self.assert_(np.array_equal(df.columns,
                                     ['A', 'B', 'C', 'Unnamed: 3',
                                      'Unnamed: 4']))
@@ -849,8 +908,8 @@ def test_no_header(self):
         expected = [[1, 2, 3, 4, 5.],
                     [6, 7, 8, 9, 10],
                     [11, 12, 13, 14, 15]]
-        assert_almost_equal(df.values, expected)
-        assert_almost_equal(df.values, df2.values)
+        tm.assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, df2.values)
 
         self.assert_(np.array_equal(df_pref.columns,
                                     ['X0', 'X1', 'X2', 'X3', 'X4']))
@@ -1113,7 +1172,7 @@ def test_header_not_first_line(self):
         tm.assert_frame_equal(df, expected)
 
     def test_header_multi_index(self):
-        expected = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
+        expected = tm.makeCustomDataframe(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
 
         data = """\
 C0,,C_l0_g0,C_l0_g1,C_l0_g2
@@ -1413,7 +1472,7 @@ def test_na_value_dict(self):
         tm.assert_frame_equal(df, xp)
 
     @slow
-    @network
+    @tm.network
     def test_url(self):
         try:
             # HTTP(S)
@@ -1428,7 +1487,7 @@ def test_url(self):
 
         except URLError:
             try:
-                with closing(urlopen('http://www.google.com')) as resp:
+                with tm.closing(urlopen('http://www.google.com')) as resp:
                     pass
             except URLError:
                 raise nose.SkipTest
@@ -1533,11 +1592,11 @@ def test_comment(self):
         expected = [[1., 2., 4.],
                     [5., np.nan, 10.]]
         df = self.read_csv(StringIO(data), comment='#')
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
 
         df = self.read_table(StringIO(data), sep=',', comment='#',
                              na_values=['NaN'])
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
 
     def test_bool_na_values(self):
         data = """A,B,C
@@ -1595,7 +1654,7 @@ def test_utf16_bom_skiprows(self):
 
         path = '__%s__.csv' % tm.rands(10)
 
-        with ensure_clean(path) as path:
+        with tm.ensure_clean(path) as path:
             for sep, dat in [('\t', data), (',', data2)]:
                 for enc in ['utf-16', 'utf-16le', 'utf-16be']:
                     bytes = dat.encode(enc)
@@ -1860,7 +1919,25 @@ def test_1000_fwf(self):
                     [10, 13, 10]]
         df = read_fwf(StringIO(data), colspecs=[(0, 3), (3, 11), (12, 16)],
                       thousands=',')
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
+
+    def test_1000_sep_with_decimal(self):
+        data = """A|B|C
+1|2,334.01|5
+10|13|10.
+"""
+
+        expected = DataFrame({
+            'A': [1, 10],
+            'B': [2334.01, 13],
+            'C': [5, 10.]
+        })
+
+        df = self.read_csv(StringIO(data), sep='|', thousands=',')
+        tm.assert_frame_equal(df, expected)
+
+        df = self.read_table(StringIO(data), sep='|', thousands=',')
+        tm.assert_frame_equal(df, expected)
 
     def test_comment_fwf(self):
         data = """
@@ -1871,7 +1948,7 @@ def test_comment_fwf(self):
                     [5, np.nan, 10.]]
         df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)],
                       comment='#')
-        assert_almost_equal(df.values, expected)
+        tm.assert_almost_equal(df.values, expected)
 
     def test_fwf(self):
         data_expected = """\
@@ -1993,7 +2070,7 @@ def test_iteration_open_handle(self):
         if PY3:
             raise nose.SkipTest
 
-        with ensure_clean() as path:
+        with tm.ensure_clean() as path:
             with open(path, 'wb') as f:
                 f.write('AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG')
 
@@ -2212,7 +2289,7 @@ def test_decompression(self):
         data = open(self.csv1, 'rb').read()
         expected = self.read_csv(self.csv1)
 
-        with ensure_clean() as path:
+        with tm.ensure_clean() as path:
             tmp = gzip.GzipFile(path, mode='wb')
             tmp.write(data)
             tmp.close()
@@ -2223,7 +2300,7 @@ def test_decompression(self):
             result = self.read_csv(open(path, 'rb'), compression='gzip')
             tm.assert_frame_equal(result, expected)
 
-        with ensure_clean() as path:
+        with tm.ensure_clean() as path:
             tmp = bz2.BZ2File(path, mode='wb')
             tmp.write(data)
             tmp.close()
@@ -2248,15 +2325,15 @@ def test_decompression_regex_sep(self):
         data = data.replace(b',', b'::')
         expected = self.read_csv(self.csv1)
 
-        with ensure_clean() as path:
+        with tm.ensure_clean() as path:
             tmp = gzip.GzipFile(path, mode='wb')
             tmp.write(data)
             tmp.close()
 
             result = self.read_csv(path, sep='::', compression='gzip')
             tm.assert_frame_equal(result, expected)
 
-        with ensure_clean() as path:
+        with tm.ensure_clean() as path:
             tmp = bz2.BZ2File(path, mode='wb')
             tmp.write(data)
             tmp.close()
@@ -2470,7 +2547,7 @@ def test_convert_sql_column_decimals(self):
 
 def assert_same_values_and_dtype(res, exp):
     assert(res.dtype == exp.dtype)
-    assert_almost_equal(res, exp)
+    tm.assert_almost_equal(res, exp)
 
 
 if __name__ == '__main__':