diff --git a/ci/install.sh b/ci/install.sh index 660b05932a5ec..cb0dff6e5c7d9 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -37,9 +37,9 @@ pip install wheel # comment this line to disable the fetching of wheel files base_url=http://cache27diy-cpycloud.rhcloud.com wheel_box=${TRAVIS_PYTHON_VERSION}${JOB_TAG} -PIP_ARGS+=" -I --use-wheel --find-links=$base_url/$wheel_box/" +PIP_ARGS+=" -I --use-wheel --find-links=$base_url/$wheel_box/ --allow-external --allow-insecure" -# Force virtualenv to accpet system_site_packages +# Force virtualenv to accept system_site_packages rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt @@ -49,11 +49,12 @@ if [ -n "$LOCALE_OVERRIDE" ]; then time sudo locale-gen "$LOCALE_OVERRIDE" fi -time pip install $PIP_ARGS -r ci/requirements-${wheel_box}.txt # we need these for numpy time sudo apt-get $APT_ARGS install libatlas-base-dev gfortran +time pip install $PIP_ARGS -r ci/requirements-${wheel_box}.txt + # Need to enable for locale testing. The location of the locale file(s) is # distro specific. For example, on Arch Linux all of the locales are in a diff --git a/pandas/core/series.py b/pandas/core/series.py index 918043dcacd37..a5459bfb2ae8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1349,11 +1349,7 @@ def describe(self, percentile_width=50): ------- desc : Series """ - try: - from collections import Counter - except ImportError: # pragma: no cover - # For Python < 2.7, we include a local copy of this: - from pandas.util.counter import Counter + from pandas.compat import Counter if self.dtype == object: names = ['count', 'unique'] diff --git a/pandas/util/counter.py b/pandas/util/counter.py deleted file mode 100644 index 75f7b214ce6a5..0000000000000 --- a/pandas/util/counter.py +++ /dev/null @@ -1,296 +0,0 @@ -# This is copied from collections in Python 2.7, for compatibility with older -# versions of Python. It can be dropped when we depend on Python 2.7/3.1 - -from pandas import compat -import heapq as _heapq -from itertools import repeat as _repeat, chain as _chain, starmap as _starmap -from operator import itemgetter as _itemgetter -from pandas.compat import map - -try: - from collections import Mapping -except: - # ABCs were only introduced in Python 2.6, so this is a hack for Python 2.5 - Mapping = dict - - -class Counter(dict): - '''Dict subclass for counting hashable items. Sometimes called a bag - or multiset. Elements are stored as dictionary keys and their counts - are stored as dictionary values. - - >>> c = Counter('abcdeabcdabcaba') # count elements from a string - - >>> c.most_common(3) # three most common elements - [('a', 5), ('b', 4), ('c', 3)] - >>> sorted(c) # list all unique elements - ['a', 'b', 'c', 'd', 'e'] - >>> ''.join(sorted(c.elements())) # list elements with repetitions - 'aaaaabbbbcccdde' - >>> sum(c.values()) # total of all counts - 15 - - >>> c['a'] # count of letter 'a' - 5 - >>> for elem in 'shazam': # update counts from an iterable - ... c[elem] += 1 # by adding 1 to each element's count - >>> c['a'] # now there are seven 'a' - 7 - >>> del c['b'] # remove all 'b' - >>> c['b'] # now there are zero 'b' - 0 - - >>> d = Counter('simsalabim') # make another counter - >>> c.update(d) # add in the second counter - >>> c['a'] # now there are nine 'a' - 9 - - >>> c.clear() # empty the counter - >>> c - Counter() - - Note: If a count is set to zero or reduced to zero, it will remain - in the counter until the entry is deleted or the counter is cleared: - - >>> c = Counter('aaabbc') - >>> c['b'] -= 2 # reduce the count of 'b' by two - >>> c.most_common() # 'b' is still in, but its count is zero - [('a', 3), ('c', 1), ('b', 0)] - - ''' - # References: - # http://en.wikipedia.org/wiki/Multiset - # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html - # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm - # http://code.activestate.com/recipes/259174/ - # Knuth, TAOCP Vol. II section 4.6.3 - - def __init__(self, iterable=None, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. - - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - super(Counter, self).__init__() - self.update(iterable, **kwds) - - def __missing__(self, key): - 'The count of elements not in the Counter is zero.' - # Needed so that self[missing_item] does not raise KeyError - return 0 - - def most_common(self, n=None): - '''List the n most common elements and their counts from the most - common to the least. If n is None, then list all element counts. - - >>> Counter('abcdeabcdabcaba').most_common(3) - [('a', 5), ('b', 4), ('c', 3)] - - ''' - # Emulate Bag.sortedByCount from Smalltalk - if n is None: - return sorted(compat.iteritems(self), key=_itemgetter(1), reverse=True) - return _heapq.nlargest(n, compat.iteritems(self), key=_itemgetter(1)) - - def elements(self): - '''Iterator over elements repeating each as many times as its count. - - >>> c = Counter('ABCABC') - >>> sorted(c.elements()) - ['A', 'A', 'B', 'B', 'C', 'C'] - - # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 - >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) - >>> product = 1 - >>> for factor in prime_factors.elements(): # loop over factors - ... product *= factor # and multiply them - >>> product - 1836 - - Note, if an element's count has been set to zero or is a negative - number, elements() will ignore it. - - ''' - # Emulate Bag.do from Smalltalk and Multiset.begin from C++. - return _chain.from_iterable(_starmap(_repeat, compat.iteritems(self))) - - # Override dict methods where necessary - - @classmethod - def fromkeys(cls, iterable, v=None): - # There is no equivalent method for counters because setting v=1 - # means that no element can have a count greater than one. - raise NotImplementedError( - 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - - def update(self, iterable=None, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - # The regular dict.update() operation makes no sense here because the - # replace behavior results in the some of original untouched counts - # being mixed-in with all of the other counts for a mismash that - # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we implement straight-addition. Both the inputs - # and outputs are allowed to contain zero and negative counts. - - if iterable is not None: - if isinstance(iterable, Mapping): - if self: - self_get = self.get - for elem, count in compat.iteritems(iterable): - self[elem] = self_get(elem, 0) + count - else: - # fast path when counter is empty - super(Counter, self).update(iterable) - else: - self_get = self.get - for elem in iterable: - self[elem] = self_get(elem, 0) + 1 - if kwds: - self.update(kwds) - - def subtract(self, iterable=None, **kwds): - '''Like dict.update() but subtracts counts instead of replacing them. - Counts can be reduced below zero. Both the inputs and outputs are - allowed to contain zero and negative counts. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.subtract('witch') # subtract elements from another iterable - >>> c.subtract(Counter('watch')) # subtract elements from another counter - >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch - 0 - >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch - -1 - - ''' - if iterable is not None: - self_get = self.get - if isinstance(iterable, Mapping): - for elem, count in iterable.items(): - self[elem] = self_get(elem, 0) - count - else: - for elem in iterable: - self[elem] = self_get(elem, 0) - 1 - if kwds: - self.subtract(kwds) - - def copy(self): - 'Return a shallow copy.' - return self.__class__(self) - - def __reduce__(self): - return self.__class__, (dict(self),) - - def __delitem__(self, elem): - """ - Like dict.__delitem__() but does not raise KeyError for missing values. - """ - if elem in self: - super(Counter, self).__delitem__(elem) - - def __repr__(self): - if not self: - return '%s()' % self.__class__.__name__ - items = ', '.join(map('%r: %r'.__mod__, self.most_common())) - return '%s({%s})' % (self.__class__.__name__, items) - - # Multiset-style mathematical operations discussed in: - # Knuth TAOCP Volume II section 4.6.3 exercise 19 - # and at http://en.wikipedia.org/wiki/Multiset - # - # Outputs guaranteed to only include positive counts. - # - # To strip negative and zero counts, add-in an empty counter: - # c += Counter() - - def __add__(self, other): - '''Add counts from two counters. - - >>> Counter('abbb') + Counter('bcc') - Counter({'b': 4, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - newcount = count + other[elem] - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count > 0: - result[elem] = count - return result - - def __sub__(self, other): - ''' Subtract count, but keep only results with positive counts. - - >>> Counter('abbbc') - Counter('bccd') - Counter({'b': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - newcount = count - other[elem] - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count < 0: - result[elem] = 0 - count - return result - - def __or__(self, other): - '''Union is the maximum of value in either of the input counters. - - >>> Counter('abbb') | Counter('bcc') - Counter({'b': 3, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - other_count = other[elem] - newcount = other_count if count < other_count else count - if newcount > 0: - result[elem] = newcount - for elem, count in other.items(): - if elem not in self and count > 0: - result[elem] = count - return result - - def __and__(self, other): - ''' Intersection is the minimum of corresponding counts. - - >>> Counter('abbb') & Counter('bcc') - Counter({'b': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem, count in self.items(): - other_count = other[elem] - newcount = count if count < other_count else other_count - if newcount > 0: - result[elem] = newcount - return result