Skip to content

Commit 8562adc

Browse files
committed
Merge commit 'v0.8.0b2-68-g7240b87' into debian-0.8
* commit 'v0.8.0b2-68-g7240b87': (68 commits) TST: additional coverage and cruft removal for ts plotting pandas-dev#1245 BUG: test coverage, groupby bug fixes BUG: fix NumPy 1.7 argmin workaround, test coverage BUG: out of bounds on buffer access if time doesn't exist in TimeSeries.at_time BUG: revert mpl hackaround TST: resample test coverage etc. pandas-dev#1245 BUG: test coverage and misc bug fixes, cruft deletion in period.py pandas-dev#1245 TST: finish test coverage of pandas.tseries.index pandas-dev#1245 BUG: fix closed='left' resample bug. test coverage pandas-dev#1245 TST: test coverage pandas-dev#1245 BUG: raise exception in DataFrame.fillna when axis=1 and pass dict/Series. close pandas-dev#1485 BUG: fillna called with Series should be analogous to with dict close pandas-dev#1486 BUG: fix MS/BMS range generation / onOffset bugs causing pandas-dev#1483 ENH: at_time/between_time work with tz-localized time series. refactoring and cleanup close pandas-dev#1481 BUG: label slicing with duplicate values, close pandas-dev#1480 TST: remove rogue print statement BUG: fixed broken imports BUG: do not convert bday freq in ts plots pandas-dev#1482 BUG: mask NaNs in non-ts plots TST: test case for tseries plots with data gaps ...
2 parents a1d7688 + 7240b87 commit 8562adc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2425
-1605
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ pandas 0.8.0
139139
- Series.append and DataFrame.append no longer check for duplicate indexes
140140
by default, add verify_integrity parameter (#1394)
141141
- Refactor Factor class, old constructor moved to Factor.from_array
142+
- Modified internals of MultiIndex to use less memory (no longer represented
143+
as array of tuples) internally, speed up construction time and many methods
144+
which construct intermediate hierarchical indexes (#1467)
142145

143146
**Bug fixes**
144147

@@ -186,6 +189,7 @@ pandas 0.8.0
186189
- Reset index mapping when grouping Series in Cython (#1423)
187190
- Fix outer/inner DataFrame.join with non-unique indexes (#1421)
188191
- Fix MultiIndex groupby bugs with empty lower levels (#1401)
192+
- Calling fillna with a Series will have same behavior as with dict (#1486)
189193

190194
pandas 0.7.3
191195
============

doc/make.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def latex():
7979

8080
os.chdir('../..')
8181
else:
82-
print 'latex build has not been tested on windows'
82+
print('latex build has not been tested on windows')
8383

8484
def check_build():
8585
build_dirs = [

doc/source/visualization.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ The ``plot`` method on Series and DataFrame is just a simple wrapper around
4444
@savefig series_plot_basic.png width=4.5in
4545
ts.plot()
4646
47-
If the index consists of dates, it calls ``gca().autofmt_xdate()`` to try to
47+
If the index consists of dates, it calls ``gcf().autofmt_xdate()`` to try to
4848
format the x-axis nicely as per above. The method takes a number of arguments
4949
for controlling the look of the plot:
5050

pandas/core/algorithms.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -78,21 +78,21 @@ def _count_generic(values, table_type, type_caster):
7878
from pandas.core.series import Series
7979

8080
values = type_caster(values)
81-
table = table_type(len(values))
81+
table = table_type(min(len(values), 1000000))
8282
uniques, labels, counts = table.factorize(values)
8383

8484
return Series(counts, index=uniques)
8585

8686
def _match_generic(values, index, table_type, type_caster):
8787
values = type_caster(values)
8888
index = type_caster(index)
89-
table = table_type(len(index))
89+
table = table_type(min(len(index), 1000000))
9090
table.map_locations(index)
9191
return table.lookup(values)
9292

9393
def _unique_generic(values, table_type, type_caster):
9494
values = type_caster(values)
95-
table = table_type(len(values))
95+
table = table_type(min(len(values), 1000000))
9696
uniques = table.unique(values)
9797
return type_caster(uniques)
9898

@@ -223,17 +223,25 @@ def quantile(x, q, interpolation_method='fraction'):
223223
score : float
224224
Score at percentile.
225225
226-
Examples
226+
Examplesb
227227
--------
228228
>>> from scipy import stats
229229
>>> a = np.arange(100)
230230
>>> stats.scoreatpercentile(a, 50)
231231
49.5
232232
233233
"""
234-
values = np.sort(np.asarray(x))
234+
x = np.asarray(x)
235+
mask = com.isnull(x)
236+
237+
x = x[-mask]
238+
239+
values = np.sort(x)
235240

236241
def _get_score(at):
242+
if len(values) == 0:
243+
return np.nan
244+
237245
idx = at * (len(values) - 1)
238246
if (idx % 1 == 0):
239247
score = values[idx]

pandas/core/api.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.core.algorithms import factorize, match, unique, value_counts
77

88
from pandas.core.common import isnull, notnull, save, load
9-
from pandas.core.factor import Factor
9+
from pandas.core.categorical import Categorical, Factor
1010
from pandas.core.format import (set_printoptions, reset_printoptions,
1111
set_eng_float_format)
1212
from pandas.core.index import Index, Int64Index, MultiIndex
@@ -15,17 +15,16 @@
1515
from pandas.core.frame import DataFrame
1616
from pandas.core.panel import Panel
1717
from pandas.core.groupby import groupby
18-
from pandas.core.reshape import pivot_simple as pivot
18+
from pandas.core.reshape import pivot_simple as pivot, get_dummies
1919

2020
WidePanel = Panel
2121

22-
from pandas.core.daterange import DateRange # deprecated
23-
2422
from pandas.tseries.offsets import DateOffset
2523
from pandas.tseries.tools import to_datetime
2624
from pandas.tseries.index import (DatetimeIndex, Timestamp,
2725
date_range, bdate_range)
2826
from pandas.tseries.period import Period, PeriodIndex
2927

3028
# legacy
29+
from pandas.core.daterange import DateRange # deprecated
3130
import pandas.core.datetools as datetools

pandas/core/factor.py renamed to pandas/core/categorical.py

+41-24
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import pandas.core.common as com
77

88

9-
def _factor_compare_op(op):
9+
def _cat_compare_op(op):
1010
def f(self, other):
11-
if isinstance(other, (Factor, np.ndarray)):
11+
if isinstance(other, (Categorical, np.ndarray)):
1212
values = np.asarray(self)
1313
f = getattr(values, op)
1414
return f(np.asarray(other))
@@ -23,7 +23,7 @@ def f(self, other):
2323

2424
return f
2525

26-
class Factor(object):
26+
class Categorical(object):
2727
"""
2828
Represents a categorical variable in classic R / S-plus fashion
2929
@@ -41,12 +41,6 @@ class Factor(object):
4141
* levels : ndarray
4242
"""
4343
def __init__(self, labels, levels, name=None):
44-
from pandas.core.index import _ensure_index
45-
46-
levels = _ensure_index(levels)
47-
if not levels.is_unique:
48-
raise ValueError('Factor levels must be unique')
49-
5044
self.labels = labels
5145
self.levels = levels
5246
self.name = name
@@ -58,28 +52,49 @@ def from_array(cls, data):
5852
except TypeError:
5953
labels, levels, _ = factorize(data, sort=False)
6054

61-
return Factor(labels, levels)
55+
return Categorical(labels, levels,
56+
name=getattr(data, 'name', None))
57+
58+
_levels = None
59+
def _set_levels(self, levels):
60+
from pandas.core.index import _ensure_index
61+
62+
levels = _ensure_index(levels)
63+
if not levels.is_unique:
64+
raise ValueError('Categorical levels must be unique')
65+
self._levels = levels
66+
67+
def _get_levels(self):
68+
return self._levels
6269

63-
levels = None
70+
levels = property(fget=_get_levels, fset=_set_levels)
6471

65-
__eq__ = _factor_compare_op('__eq__')
66-
__ne__ = _factor_compare_op('__ne__')
67-
__lt__ = _factor_compare_op('__lt__')
68-
__gt__ = _factor_compare_op('__gt__')
69-
__le__ = _factor_compare_op('__le__')
70-
__ge__ = _factor_compare_op('__ge__')
72+
__eq__ = _cat_compare_op('__eq__')
73+
__ne__ = _cat_compare_op('__ne__')
74+
__lt__ = _cat_compare_op('__lt__')
75+
__gt__ = _cat_compare_op('__gt__')
76+
__le__ = _cat_compare_op('__le__')
77+
__ge__ = _cat_compare_op('__ge__')
7178

7279
def __array__(self, dtype=None):
73-
return com.take_1d(self.levels, self.labels)
80+
return com.take_1d(self.levels.values, self.labels)
7481

7582
def __len__(self):
7683
return len(self.labels)
7784

7885
def __repr__(self):
79-
temp = 'Factor:%s\n%s\nLevels (%d): %s'
86+
temp = 'Categorical: %s\n%s\n%s'
8087
values = np.asarray(self)
88+
levheader = 'Levels (%d): ' % len(self.levels)
89+
levstring = np.array_repr(self.levels,
90+
max_line_width=60)
91+
92+
indent = ' ' * (levstring.find('[') + len(levheader) + 1)
93+
lines = levstring.split('\n')
94+
levstring = '\n'.join([lines[0]] + [indent + x.lstrip() for x in lines[1:]])
95+
8196
return temp % ('' if self.name is None else self.name,
82-
repr(values), len(self.levels), self.levels)
97+
repr(values), levheader + levstring)
8398

8499
def __getitem__(self, key):
85100
if isinstance(key, (int, np.integer)):
@@ -89,22 +104,24 @@ def __getitem__(self, key):
89104
else:
90105
return self.levels[i]
91106
else:
92-
return Factor(self.labels[key], self.levels)
107+
return Categorical(self.labels[key], self.levels)
93108

94109
def equals(self, other):
95110
"""
96-
Returns True if factors are equal
111+
Returns True if categorical arrays are equal
97112
98113
Parameters
99114
----------
100-
other : Factor
115+
other : Categorical
101116
102117
Returns
103118
-------
104119
are_equal : boolean
105120
"""
106-
if not isinstance(other, Factor):
121+
if not isinstance(other, Categorical):
107122
return False
108123

109124
return (self.levels.equals(other.levels) and
110125
np.array_equal(self.labels, other.labels))
126+
127+
Factor = Categorical

pandas/core/common.py

+25-16
Original file line numberDiff line numberDiff line change
@@ -56,29 +56,36 @@ def isnull(obj):
5656
return lib.checknull(obj)
5757

5858
from pandas.core.generic import PandasObject
59-
from pandas import Series
6059
if isinstance(obj, np.ndarray):
61-
if obj.dtype.kind in ('O', 'S'):
62-
# Working around NumPy ticket 1542
63-
shape = obj.shape
64-
result = np.empty(shape, dtype=bool)
65-
vec = lib.isnullobj(obj.ravel())
66-
result[:] = vec.reshape(shape)
67-
68-
if isinstance(obj, Series):
69-
result = Series(result, index=obj.index, copy=False)
70-
elif obj.dtype == np.dtype('M8[ns]'):
71-
# this is the NaT pattern
72-
result = np.array(obj).view('i8') == lib.iNaT
73-
else:
74-
result = -np.isfinite(obj)
75-
return result
60+
return _isnull_ndarraylike(obj)
7661
elif isinstance(obj, PandasObject):
7762
# TODO: optimize for DataFrame, etc.
7863
return obj.apply(isnull)
64+
elif hasattr(obj, '__array__'):
65+
return _isnull_ndarraylike(obj)
7966
else:
8067
return obj is None
8168

69+
def _isnull_ndarraylike(obj):
70+
from pandas import Series
71+
values = np.asarray(obj)
72+
73+
if values.dtype.kind in ('O', 'S'):
74+
# Working around NumPy ticket 1542
75+
shape = values.shape
76+
result = np.empty(shape, dtype=bool)
77+
vec = lib.isnullobj(values.ravel())
78+
result[:] = vec.reshape(shape)
79+
80+
if isinstance(obj, Series):
81+
result = Series(result, index=obj.index, copy=False)
82+
elif values.dtype == np.dtype('M8[ns]'):
83+
# this is the NaT pattern
84+
result = values.view('i8') == lib.iNaT
85+
else:
86+
result = -np.isfinite(obj)
87+
return result
88+
8289
def notnull(obj):
8390
'''
8491
Replacement for numpy.isfinite / -numpy.isnan which is suitable
@@ -482,6 +489,8 @@ def _possibly_cast_item(obj, item, dtype):
482489

483490
def _is_bool_indexer(key):
484491
if isinstance(key, np.ndarray) and key.dtype == np.object_:
492+
key = np.asarray(key)
493+
485494
if not lib.is_bool_array(key):
486495
if isnull(key).any():
487496
raise ValueError('cannot index with vector containing '

pandas/core/format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,8 @@ def _has_names(index):
672672
# Global formatting options
673673

674674
def set_printoptions(precision=None, column_space=None, max_rows=None,
675-
max_columns=None, colheader_justify='right',
676-
max_colwidth=50, notebook_repr_html=None,
675+
max_columns=None, colheader_justify=None,
676+
max_colwidth=None, notebook_repr_html=None,
677677
date_dayfirst=None, date_yearfirst=None):
678678
"""
679679
Alter default behavior of DataFrame.toString

0 commit comments

Comments
 (0)