pandas-dev · max-sixty · Dec 23, 2016 · Dec 23, 2016 · Dec 24, 2016 · Jan 2, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -585,6 +585,7 @@ Bug Fixes
 - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`)
 - Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`)
 - Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`)
+- Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`)
 - Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`)
 - Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`)
 - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -471,8 +471,8 @@ def _value_counts_arraylike(values, dropna=True):
         # dtype handling
         if is_datetimetz_type:
             keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
-        if is_period_type:
-            keys = PeriodIndex._simple_new(keys, freq=freq)
+        elif is_period_type:
+            keys = PeriodIndex._from_ordinals(keys, freq=freq)
 
     elif is_signed_integer_dtype(dtype):
         values = _ensure_int64(values)

diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
@@ -88,6 +88,11 @@ def _new_Index(cls, d):
     """ This is called upon unpickling, rather than the default which doesn't
     have arguments and breaks __new__
     """
+    # required for backward compat, because PI can't be instantiated with
+    # ordinals through __new__ GH #13277
+    if issubclass(cls, ABCPeriodIndex):
+        from pandas.tseries.period import _new_PeriodIndex
+        return _new_PeriodIndex(cls, **d)
     return cls.__new__(cls, **d)
 
 

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
@@ -573,7 +573,7 @@ def decode(obj):
     elif typ == u'period_index':
         data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
         d = dict(name=obj[u'name'], freq=obj[u'freq'])
-        return globals()[obj[u'klass']](data, **d)
+        return globals()[obj[u'klass']]._from_ordinals(data, **d)
     elif typ == u'datetime_index':
         data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
         d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)

diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py
@@ -120,7 +120,7 @@ def test_constructor_fromarraylike(self):
 
         self.assertRaises(ValueError, PeriodIndex, idx._values)
         self.assertRaises(ValueError, PeriodIndex, list(idx._values))
-        self.assertRaises(ValueError, PeriodIndex,
+        self.assertRaises(TypeError, PeriodIndex,
                           data=Period('2007', freq='A'))
 
         result = PeriodIndex(iter(idx))
@@ -285,12 +285,15 @@ def test_constructor_simple_new_empty(self):
         result = idx._simple_new(idx, name='p', freq='M')
         tm.assert_index_equal(result, idx)
 
-    def test_constructor_simple_new_floats(self):
+    def test_constructor_floats(self):
         # GH13079
-        for floats in [[1.1], np.array([1.1])]:
+        for floats in [[1.1, 2.1], np.array([1.1, 2.1])]:
             with self.assertRaises(TypeError):
                 pd.PeriodIndex._simple_new(floats, freq='M')
 
+            with self.assertRaises(TypeError):
+                pd.PeriodIndex(floats, freq='M')
+
     def test_constructor_nat(self):
         self.assertRaises(ValueError, period_range, start='NaT',
                           end='2011-01-01', freq='M')

diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
@@ -17,7 +17,6 @@
                                  is_period_dtype,
                                  is_bool_dtype,
                                  pandas_dtype,
-                                 _ensure_int64,
                                  _ensure_object)
 from pandas.types.dtypes import PeriodDtype
 from pandas.types.generic import ABCSeries
@@ -114,6 +113,13 @@ def wrapper(self, other):
     return wrapper
 
 
+def _new_PeriodIndex(cls, **d):
+    # GH13277 for unpickling
+    if d['data'].dtype == 'int64':
+        values = d.pop('data')
+    return cls._from_ordinals(values=values, **d)
+
+
 class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
     """
     Immutable ndarray holding ordinal values indicating regular periods in
@@ -209,17 +215,56 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
                 msg = 'specified freq and dtype are different'
                 raise IncompatibleFrequency(msg)
 
+        # coerce freq to freq object, otherwise it can be coerced elementwise
+        # which is slow
+        if freq:
+            freq = Period._maybe_convert_freq(freq)
+
         if data is None:
             if ordinal is not None:
                 data = np.asarray(ordinal, dtype=np.int64)
             else:
                 data, freq = cls._generate_range(start, end, periods,
                                                  freq, kwargs)
-        else:
-            ordinal, freq = cls._from_arraylike(data, freq, tz)
-            data = np.array(ordinal, dtype=np.int64, copy=copy)
+            return cls._from_ordinals(data, name=name, freq=freq)
 
-        return cls._simple_new(data, name=name, freq=freq)
+        if isinstance(data, PeriodIndex):
+            if freq is None or freq == data.freq:  # no freq change
+                freq = data.freq
+                data = data._values
+            else:
+                base1, _ = _gfc(data.freq)
+                base2, _ = _gfc(freq)
+                data = period.period_asfreq_arr(data._values,
+                                                base1, base2, 1)
+            return cls._simple_new(data, name=name, freq=freq)
+
+        # not array / index
+        if not isinstance(data, (np.ndarray, PeriodIndex,
+                                 DatetimeIndex, Int64Index)):
+            if is_scalar(data) or isinstance(data, Period):
+                cls._scalar_data_error(data)
+
+            # other iterable of some kind
+            if not isinstance(data, (list, tuple)):
+                data = list(data)
+
+            data = np.asarray(data)
+
+        # datetime other than period
+        if is_datetime64_dtype(data.dtype):
+            data = dt64arr_to_periodarr(data, freq, tz)
+            return cls._from_ordinals(data, name=name, freq=freq)
+
+        # check not floats
+        if infer_dtype(data) == 'floating' and len(data) > 0:
+            raise TypeError("PeriodIndex can't take floats")
+
+        # anything else, likely an array of strings or periods
+        data = _ensure_object(data)
+        freq = freq or period.extract_freq(data)
+        data = period.extract_ordinals(data, freq)
+        return cls._from_ordinals(data, name=name, freq=freq)
 
     @classmethod
     def _generate_range(cls, start, end, periods, freq, fields):
@@ -240,85 +285,34 @@ def _generate_range(cls, start, end, periods, freq, fields):
 
         return subarr, freq
 
-    @classmethod
-    def _from_arraylike(cls, data, freq, tz):
-        if freq is not None:
-            freq = Period._maybe_convert_freq(freq)
-
-        if not isinstance(data, (np.ndarray, PeriodIndex,
-                                 DatetimeIndex, Int64Index)):
-            if is_scalar(data) or isinstance(data, Period):
-                raise ValueError('PeriodIndex() must be called with a '
-                                 'collection of some kind, %s was passed'
-                                 % repr(data))
-
-            # other iterable of some kind
-            if not isinstance(data, (list, tuple)):
-                data = list(data)
-
-            try:
-                data = _ensure_int64(data)
-                if freq is None:
-                    raise ValueError('freq not specified')
-                data = np.array([Period(x, freq=freq) for x in data],
-                                dtype=np.int64)
-            except (TypeError, ValueError):
-                data = _ensure_object(data)
-
-                if freq is None:
-                    freq = period.extract_freq(data)
-                data = period.extract_ordinals(data, freq)
-        else:
-            if isinstance(data, PeriodIndex):
-                if freq is None or freq == data.freq:
-                    freq = data.freq
-                    data = data._values
-                else:
-                    base1, _ = _gfc(data.freq)
-                    base2, _ = _gfc(freq)
-                    data = period.period_asfreq_arr(data._values,
-                                                    base1, base2, 1)
-            else:
-                if is_object_dtype(data):
-                    inferred = infer_dtype(data)
-                    if inferred == 'integer':
-                        data = data.astype(np.int64)
-
-                if freq is None and is_object_dtype(data):
-                    # must contain Period instance and thus extract ordinals
-                    freq = period.extract_freq(data)
-                    data = period.extract_ordinals(data, freq)
-
-                if freq is None:
-                    msg = 'freq not specified and cannot be inferred'
-                    raise ValueError(msg)
-
-                if data.dtype != np.int64:
-                    if np.issubdtype(data.dtype, np.datetime64):
-                        data = dt64arr_to_periodarr(data, freq, tz)
-                    else:
-                        data = _ensure_object(data)
-                        data = period.extract_ordinals(data, freq)
-
-        return data, freq
-
     @classmethod
     def _simple_new(cls, values, name=None, freq=None, **kwargs):
-
+        """
+        Values can be any type that can be coerced to Periods.
+        Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
+        """
         if not is_integer_dtype(values):
             values = np.array(values, copy=False)
-            if (len(values) > 0 and is_float_dtype(values)):
+            if len(values) > 0 and is_float_dtype(values):
                 raise TypeError("PeriodIndex can't take floats")
-            else:
-                return cls(values, name=name, freq=freq, **kwargs)
+            return cls(values, name=name, freq=freq, **kwargs)
+
+        return cls._from_ordinals(values, name, freq, **kwargs)
+
+    @classmethod
+    def _from_ordinals(cls, values, name=None, freq=None, **kwargs):
+        """
+        Values should be int ordinals
+        `__new__` & `_simple_new` cooerce to ordinals and call this method
+        """
 
         values = np.array(values, dtype='int64', copy=False)
 
         result = object.__new__(cls)
         result._data = values
         result.name = name
         if freq is None:
-            raise ValueError('freq is not specified')
+            raise ValueError('freq is not specified and cannot be inferred')
         result.freq = Period._maybe_convert_freq(freq)
         result._reset_identity()
         return result
@@ -327,13 +321,13 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
         """ we always want to return a PeriodIndex """
         return self._shallow_copy(values=values, **kwargs)
 
-    def _shallow_copy(self, values=None, **kwargs):
-        if kwargs.get('freq') is None:
-            # freq must be provided
-            kwargs['freq'] = self.freq
+    def _shallow_copy(self, values=None, freq=None, **kwargs):
+        if freq is None:
+            freq = self.freq
         if values is None:
             values = self._values
-        return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs)
+        return super(PeriodIndex, self)._shallow_copy(values=values,
+                                                      freq=freq, **kwargs)
 
     def _coerce_scalar_to_index(self, item):
         """
@@ -413,7 +407,7 @@ def __array_wrap__(self, result, context=None):
             return result
         # the result is object dtype array of Period
         # cannot pass _simple_new as it is
-        return PeriodIndex(result, freq=self.freq, name=self.name)
+        return self._shallow_copy(result, freq=self.freq, name=self.name)
 
     @property
     def _box_func(self):
@@ -708,7 +702,7 @@ def shift(self, n):
         values = self._values + n * self.freq.n
         if self.hasnans:
             values[self._isnan] = tslib.iNaT
-        return PeriodIndex(data=values, name=self.name, freq=self.freq)
+        return self._shallow_copy(values=values)
 
     @cache_readonly
     def dtype(self):
@@ -945,7 +939,8 @@ def _wrap_union_result(self, other, result):
 
     def _apply_meta(self, rawarr):
         if not isinstance(rawarr, PeriodIndex):
-            rawarr = PeriodIndex(rawarr, freq=self.freq)
+            rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq,
+                                                name=self.name)
         return rawarr
 
     def _format_native_types(self, na_rep=u('NaT'), date_format=None,

diff --git a/setup.cfg b/setup.cfg
@@ -13,6 +13,7 @@ parentdir_prefix = pandas-
 
 [flake8]
 ignore = E731,E402
+max-line-length = 79
 
 [yapf]
 based_on_style = pep8