Skip to content

Cut enhance for issue #14714 #14735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
fb922d6
changes for GH 13747
Nov 7, 2016
c83d000
added test case for unicode round trip
Nov 8, 2016
edb8553
refactored the new unicode test to be in sync with the rest of the file
Nov 8, 2016
66d8ebf
removed the disabled tag for clipboard test so that we can check if t…
Nov 8, 2016
14d94a0
changed the pandas util clipboard file to return unicode if the pytho…
aileronajay Nov 11, 2016
d565b1f
updated pyperclip to the latest version
Nov 12, 2016
f708c2e
Merge branch 'master' of https://github.com/aileronajay/pandas
Nov 12, 2016
c5a87d8
Merge branch 'test_branch' of https://github.com/aileronajay/pandas i…
Nov 12, 2016
825bbe2
all files related to pyperclip are under pandas.util.clipboard
Nov 12, 2016
02f87b0
removed duplicate files
Nov 12, 2016
71d58d0
testing encoding in kwargs to to_clipboard and test case for the same
Nov 12, 2016
dd57ae3
code review changes and read clipboard invalid encoding test
Nov 12, 2016
d202fd0
added test for valid encoding, modified setup.py so that pandas/util/…
Nov 12, 2016
0665fd4
fixed linting and test case as per code review
Nov 16, 2016
ed1375f
Merge branch 'test_branch'
Nov 16, 2016
9946fb7
Merge branch 'master' of https://github.com/pandas-dev/pandas into te…
Nov 16, 2016
c0aafd7
Merge branch 'test_branch'
Nov 16, 2016
b03ed56
changed whatsnew file
Nov 16, 2016
ac8ae60
skip clipboard test if clipboard primitives are absent
Nov 17, 2016
7af95da
merging lastest changes
Nov 17, 2016
cedb690
merge conflict in whats new file
Nov 17, 2016
98b61e8
merge conflict
Nov 17, 2016
1dca292
conflict resolution
Nov 17, 2016
9db42d8
whatsnew conflict
Nov 17, 2016
b74fbc1
ignore lint test for pyperclip files
Nov 17, 2016
2aafb66
moved comment inside test and added github issue labels to test
Nov 17, 2016
c7c9e12
Merge branch 'master' of https://github.com/pandas-dev/pandas
Nov 23, 2016
5d8abab
initial iteration for adding cut feature to time data
Nov 25, 2016
b0341d7
Merge remote-tracking branch 'intial/master' into cut_enhance
Nov 25, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ Bug Fixes


- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
- BUG in clipboard (linux, python2) with unicode and separator (:issue:`13747`)
- BUG: clipboard functions windows 10 py3.5 (:issue:`14362`)
- BUG: test_clipboard fails (:issue:`12807`)
- to_clipboard is no longer Excel compatible (:issue:`12529`)
39 changes: 28 additions & 11 deletions pandas/tools/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
from pandas.types.missing import isnull
from pandas.types.common import (is_float, is_integer,
is_scalar)

from pandas.tseries.timedeltas import to_timedelta
from pandas.core.api import Series
from pandas.core.categorical import Categorical
import pandas.core.algorithms as algos
import pandas.core.nanops as nanops
from pandas.compat import zip

from pandas import lib
import numpy as np
from pandas.types.common import (needs_i8_conversion)


def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
Expand Down Expand Up @@ -81,6 +82,13 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
array([1, 1, 1, 1, 1], dtype=int64)
"""
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
# for handling the cut for datetime and timedelta objects
if needs_i8_conversion(x):
x = x.values.view('i8')
time_data = True
else:
time_data = False

if not np.iterable(bins):
if is_scalar(bins) and bins < 1:
raise ValueError("`bins` should be a positive integer.")
Expand Down Expand Up @@ -116,7 +124,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,

return _bins_to_cuts(x, bins, right=right, labels=labels,
retbins=retbins, precision=precision,
include_lowest=include_lowest)
include_lowest=include_lowest, time_data=time_data)


def qcut(x, q, labels=None, retbins=False, precision=3):
Expand Down Expand Up @@ -176,7 +184,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3):


def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
precision=3, name=None, include_lowest=False):
precision=3, name=None, include_lowest=False,
time_data=False):
x_is_series = isinstance(x, Series)
series_index = None

Expand Down Expand Up @@ -205,7 +214,8 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
while True:
try:
levels = _format_levels(bins, precision, right=right,
include_lowest=include_lowest)
include_lowest=include_lowest,
time_data=time_data)
except ValueError:
increases += 1
precision += 1
Expand Down Expand Up @@ -239,7 +249,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,


def _format_levels(bins, prec, right=True,
include_lowest=False):
include_lowest=False, time_data=False):
fmt = lambda v: _format_label(v, precision=prec)
if right:
levels = []
Expand All @@ -248,16 +258,23 @@ def _format_levels(bins, prec, right=True,

if a != b and fa == fb:
raise ValueError('precision too low')

formatted = '(%s, %s]' % (fa, fb)

if time_data:
formatted = '(%s, %s]' % (to_timedelta(float(fa), unit='ns'),
to_timedelta(float(fb), unit='ns'))
else:
formatted = '(%s, %s]' % (fa, fb)
levels.append(formatted)

if include_lowest:
levels[0] = '[' + levels[0][1:]
else:
levels = ['[%s, %s)' % (fmt(a), fmt(b))
for a, b in zip(bins, bins[1:])]
if time_data:
levels = ['[%s, %s)' % (to_timedelta(float(fmt(fa)), unit='ns'),
to_timedelta(float(fmt(b)), unit='ns'))
for a, b in zip(bins, bins[1:])]
else:
levels = ['[%s, %s)' % (fmt(a), fmt(b))
for a, b in zip(bins, bins[1:])]

return levels

Expand Down