Skip to content

Commit 133aef9

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into add-nrows-to-read-json
merge conflicts
2 parents 2ce74db + 035e1fe commit 133aef9

File tree

13 files changed

+361
-102
lines changed

13 files changed

+361
-102
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,7 @@ I/O
950950
- Bug in :meth:`~DataFrame.read_feather` was raising an `ArrowIOError` when reading an s3 or http file path (:issue:`29055`)
951951
- Bug in :meth:`~DataFrame.to_excel` could not handle the column name `render` and was raising an ``KeyError`` (:issue:`34331`)
952952
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
953+
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
953954

954955
Plotting
955956
^^^^^^^^

pandas/_libs/tslibs/dtypes.pxd

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
cdef enum PeriodDtypeCode:
3+
# Annual freqs with various fiscal year ends.
4+
# eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005
5+
A = 1000 # Default alias
6+
A_DEC = 1000 # Annual - December year end
7+
A_JAN = 1001 # Annual - January year end
8+
A_FEB = 1002 # Annual - February year end
9+
A_MAR = 1003 # Annual - March year end
10+
A_APR = 1004 # Annual - April year end
11+
A_MAY = 1005 # Annual - May year end
12+
A_JUN = 1006 # Annual - June year end
13+
A_JUL = 1007 # Annual - July year end
14+
A_AUG = 1008 # Annual - August year end
15+
A_SEP = 1009 # Annual - September year end
16+
A_OCT = 1010 # Annual - October year end
17+
A_NOV = 1011 # Annual - November year end
18+
19+
# Quarterly frequencies with various fiscal year ends.
20+
# eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005
21+
Q_DEC = 2000 # Quarterly - December year end
22+
Q_JAN = 2001 # Quarterly - January year end
23+
Q_FEB = 2002 # Quarterly - February year end
24+
Q_MAR = 2003 # Quarterly - March year end
25+
Q_APR = 2004 # Quarterly - April year end
26+
Q_MAY = 2005 # Quarterly - May year end
27+
Q_JUN = 2006 # Quarterly - June year end
28+
Q_JUL = 2007 # Quarterly - July year end
29+
Q_AUG = 2008 # Quarterly - August year end
30+
Q_SEP = 2009 # Quarterly - September year end
31+
Q_OCT = 2010 # Quarterly - October year end
32+
Q_NOV = 2011 # Quarterly - November year end
33+
34+
M = 3000 # Monthly
35+
36+
W_SUN = 4000 # Weekly - Sunday end of week
37+
W_MON = 4001 # Weekly - Monday end of week
38+
W_TUE = 4002 # Weekly - Tuesday end of week
39+
W_WED = 4003 # Weekly - Wednesday end of week
40+
W_THU = 4004 # Weekly - Thursday end of week
41+
W_FRI = 4005 # Weekly - Friday end of week
42+
W_SAT = 4006 # Weekly - Saturday end of week
43+
44+
B = 5000 # Business days
45+
D = 6000 # Daily
46+
H = 7000 # Hourly
47+
T = 8000 # Minutely
48+
S = 9000 # Secondly
49+
L = 10000 # Millisecondly
50+
U = 11000 # Microsecondly
51+
N = 12000 # Nanosecondly
52+
53+
54+
cdef class PeriodPseudoDtype:
55+
cdef readonly:
56+
PeriodDtypeCode dtype_code

pandas/_libs/tslibs/dtypes.pyx

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# period frequency constants corresponding to scikits timeseries
2+
# originals
3+
4+
5+
cdef class PeriodPseudoDtype:
6+
"""
7+
Similar to an actual dtype, this contains all of the information
8+
describing a PeriodDtype in an integer code.
9+
"""
10+
# cdef readonly:
11+
# PeriodDtypeCode dtype_code
12+
13+
def __cinit__(self, PeriodDtypeCode code):
14+
self.dtype_code = code
15+
16+
def __eq__(self, other):
17+
if not isinstance(other, PeriodPseudoDtype):
18+
return False
19+
if not isinstance(self, PeriodPseudoDtype):
20+
# cython semantics, this is a reversed op
21+
return False
22+
return self.dtype_code == other.dtype_code
23+
24+
@property
25+
def date_offset(self):
26+
"""
27+
Corresponding DateOffset object.
28+
29+
This mapping is mainly for backward-compatibility.
30+
"""
31+
from .offsets import to_offset
32+
33+
freqstr = _reverse_period_code_map.get(self.dtype_code)
34+
# equiv: freqstr = libfrequencies.get_freq_str(self.dtype_code)
35+
36+
return to_offset(freqstr)
37+
38+
@classmethod
39+
def from_date_offset(cls, offset):
40+
code = offset._period_dtype_code
41+
return cls(code)
42+
43+
44+
_period_code_map = {
45+
# Annual freqs with various fiscal year ends.
46+
# eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
47+
"A-DEC": 1000, # Annual - December year end
48+
"A-JAN": 1001, # Annual - January year end
49+
"A-FEB": 1002, # Annual - February year end
50+
"A-MAR": 1003, # Annual - March year end
51+
"A-APR": 1004, # Annual - April year end
52+
"A-MAY": 1005, # Annual - May year end
53+
"A-JUN": 1006, # Annual - June year end
54+
"A-JUL": 1007, # Annual - July year end
55+
"A-AUG": 1008, # Annual - August year end
56+
"A-SEP": 1009, # Annual - September year end
57+
"A-OCT": 1010, # Annual - October year end
58+
"A-NOV": 1011, # Annual - November year end
59+
60+
# Quarterly frequencies with various fiscal year ends.
61+
# eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
62+
"Q-DEC": 2000, # Quarterly - December year end
63+
"Q-JAN": 2001, # Quarterly - January year end
64+
"Q-FEB": 2002, # Quarterly - February year end
65+
"Q-MAR": 2003, # Quarterly - March year end
66+
"Q-APR": 2004, # Quarterly - April year end
67+
"Q-MAY": 2005, # Quarterly - May year end
68+
"Q-JUN": 2006, # Quarterly - June year end
69+
"Q-JUL": 2007, # Quarterly - July year end
70+
"Q-AUG": 2008, # Quarterly - August year end
71+
"Q-SEP": 2009, # Quarterly - September year end
72+
"Q-OCT": 2010, # Quarterly - October year end
73+
"Q-NOV": 2011, # Quarterly - November year end
74+
75+
"M": 3000, # Monthly
76+
77+
"W-SUN": 4000, # Weekly - Sunday end of week
78+
"W-MON": 4001, # Weekly - Monday end of week
79+
"W-TUE": 4002, # Weekly - Tuesday end of week
80+
"W-WED": 4003, # Weekly - Wednesday end of week
81+
"W-THU": 4004, # Weekly - Thursday end of week
82+
"W-FRI": 4005, # Weekly - Friday end of week
83+
"W-SAT": 4006, # Weekly - Saturday end of week
84+
85+
"B": 5000, # Business days
86+
"D": 6000, # Daily
87+
"H": 7000, # Hourly
88+
"T": 8000, # Minutely
89+
"S": 9000, # Secondly
90+
"L": 10000, # Millisecondly
91+
"U": 11000, # Microsecondly
92+
"N": 12000, # Nanosecondly
93+
}
94+
95+
_reverse_period_code_map = {
96+
_period_code_map[key]: key for key in _period_code_map}
97+
98+
# Yearly aliases; careful not to put these in _reverse_period_code_map
99+
_period_code_map.update({"Y" + key[1:]: _period_code_map[key]
100+
for key in _period_code_map
101+
if key.startswith("A-")})
102+
103+
_period_code_map.update({
104+
"Q": 2000, # Quarterly - December year end (default quarterly)
105+
"A": 1000, # Annual
106+
"W": 4000, # Weekly
107+
"C": 5000, # Custom Business Day
108+
})

pandas/_libs/tslibs/frequencies.pyx

+2-67
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ from pandas._libs.tslibs.offsets import (
1212
opattern,
1313
)
1414

15+
from .dtypes import _period_code_map, _reverse_period_code_map
16+
1517
# ---------------------------------------------------------------------
1618
# Period codes
1719

@@ -31,73 +33,6 @@ class FreqGroup:
3133
FR_NS = 12000
3234

3335

34-
# period frequency constants corresponding to scikits timeseries
35-
# originals
36-
_period_code_map = {
37-
# Annual freqs with various fiscal year ends.
38-
# eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
39-
"A-DEC": 1000, # Annual - December year end
40-
"A-JAN": 1001, # Annual - January year end
41-
"A-FEB": 1002, # Annual - February year end
42-
"A-MAR": 1003, # Annual - March year end
43-
"A-APR": 1004, # Annual - April year end
44-
"A-MAY": 1005, # Annual - May year end
45-
"A-JUN": 1006, # Annual - June year end
46-
"A-JUL": 1007, # Annual - July year end
47-
"A-AUG": 1008, # Annual - August year end
48-
"A-SEP": 1009, # Annual - September year end
49-
"A-OCT": 1010, # Annual - October year end
50-
"A-NOV": 1011, # Annual - November year end
51-
52-
# Quarterly frequencies with various fiscal year ends.
53-
# eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
54-
"Q-DEC": 2000, # Quarterly - December year end
55-
"Q-JAN": 2001, # Quarterly - January year end
56-
"Q-FEB": 2002, # Quarterly - February year end
57-
"Q-MAR": 2003, # Quarterly - March year end
58-
"Q-APR": 2004, # Quarterly - April year end
59-
"Q-MAY": 2005, # Quarterly - May year end
60-
"Q-JUN": 2006, # Quarterly - June year end
61-
"Q-JUL": 2007, # Quarterly - July year end
62-
"Q-AUG": 2008, # Quarterly - August year end
63-
"Q-SEP": 2009, # Quarterly - September year end
64-
"Q-OCT": 2010, # Quarterly - October year end
65-
"Q-NOV": 2011, # Quarterly - November year end
66-
67-
"M": 3000, # Monthly
68-
69-
"W-SUN": 4000, # Weekly - Sunday end of week
70-
"W-MON": 4001, # Weekly - Monday end of week
71-
"W-TUE": 4002, # Weekly - Tuesday end of week
72-
"W-WED": 4003, # Weekly - Wednesday end of week
73-
"W-THU": 4004, # Weekly - Thursday end of week
74-
"W-FRI": 4005, # Weekly - Friday end of week
75-
"W-SAT": 4006, # Weekly - Saturday end of week
76-
77-
"B": 5000, # Business days
78-
"D": 6000, # Daily
79-
"H": 7000, # Hourly
80-
"T": 8000, # Minutely
81-
"S": 9000, # Secondly
82-
"L": 10000, # Millisecondly
83-
"U": 11000, # Microsecondly
84-
"N": 12000} # Nanosecondly
85-
86-
87-
_reverse_period_code_map = {
88-
_period_code_map[key]: key for key in _period_code_map}
89-
90-
# Yearly aliases; careful not to put these in _reverse_period_code_map
91-
_period_code_map.update({'Y' + key[1:]: _period_code_map[key]
92-
for key in _period_code_map
93-
if key.startswith('A-')})
94-
95-
_period_code_map.update({
96-
"Q": 2000, # Quarterly - December year end (default quarterly)
97-
"A": 1000, # Annual
98-
"W": 4000, # Weekly
99-
"C": 5000}) # Custom Business Day
100-
10136
# Map attribute-name resolutions to resolution abbreviations
10237
_attrname_to_abbrevs = {
10338
"year": "A",

pandas/_libs/tslibs/nattype.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ class NaTType(_NaT):
397397
398398
Parameters
399399
----------
400-
locale : string, default None (English locale)
400+
locale : str, default None (English locale)
401401
Locale determining the language in which to return the month name.
402402
403403
Returns
@@ -414,7 +414,7 @@ class NaTType(_NaT):
414414
415415
Parameters
416416
----------
417-
locale : string, default None (English locale)
417+
locale : str, default None (English locale)
418418
Locale determining the language in which to return the day name.
419419
420420
Returns

pandas/_libs/tslibs/offsets.pyx

+30-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ from pandas._libs.tslibs.np_datetime cimport (
4848
from pandas._libs.tslibs.timezones cimport utc_pytz as UTC
4949
from pandas._libs.tslibs.tzconversion cimport tz_convert_single
5050

51+
from .dtypes cimport PeriodDtypeCode
5152
from .timedeltas cimport delta_to_nanoseconds
5253

5354

@@ -892,36 +893,43 @@ cdef class Tick(SingleConstructorOffset):
892893
cdef class Day(Tick):
893894
_nanos_inc = 24 * 3600 * 1_000_000_000
894895
_prefix = "D"
896+
_period_dtype_code = PeriodDtypeCode.D
895897

896898

897899
cdef class Hour(Tick):
898900
_nanos_inc = 3600 * 1_000_000_000
899901
_prefix = "H"
902+
_period_dtype_code = PeriodDtypeCode.H
900903

901904

902905
cdef class Minute(Tick):
903906
_nanos_inc = 60 * 1_000_000_000
904907
_prefix = "T"
908+
_period_dtype_code = PeriodDtypeCode.T
905909

906910

907911
cdef class Second(Tick):
908912
_nanos_inc = 1_000_000_000
909913
_prefix = "S"
914+
_period_dtype_code = PeriodDtypeCode.S
910915

911916

912917
cdef class Milli(Tick):
913918
_nanos_inc = 1_000_000
914919
_prefix = "L"
920+
_period_dtype_code = PeriodDtypeCode.L
915921

916922

917923
cdef class Micro(Tick):
918924
_nanos_inc = 1000
919925
_prefix = "U"
926+
_period_dtype_code = PeriodDtypeCode.U
920927

921928

922929
cdef class Nano(Tick):
923930
_nanos_inc = 1
924931
_prefix = "N"
932+
_period_dtype_code = PeriodDtypeCode.N
925933

926934

927935
def delta_to_tick(delta: timedelta) -> Tick:
@@ -1281,7 +1289,7 @@ cdef class BusinessDay(BusinessMixin):
12811289
"""
12821290
DateOffset subclass representing possibly n business days.
12831291
"""
1284-
1292+
_period_dtype_code = PeriodDtypeCode.B
12851293
_prefix = "B"
12861294
_attributes = tuple(["n", "normalize", "offset"])
12871295

@@ -1945,6 +1953,15 @@ cdef class YearEnd(YearOffset):
19451953
_prefix = "A"
19461954
_day_opt = "end"
19471955

1956+
cdef readonly:
1957+
int _period_dtype_code
1958+
1959+
def __init__(self, n=1, normalize=False, month=None):
1960+
# Because YearEnd can be the freq for a Period, define its
1961+
# _period_dtype_code at construction for performance
1962+
YearOffset.__init__(self, n, normalize, month)
1963+
self._period_dtype_code = PeriodDtypeCode.A + self.month % 12
1964+
19481965

19491966
cdef class YearBegin(YearOffset):
19501967
"""
@@ -2099,6 +2116,14 @@ cdef class QuarterEnd(QuarterOffset):
20992116
_prefix = "Q"
21002117
_day_opt = "end"
21012118

2119+
cdef readonly:
2120+
int _period_dtype_code
2121+
2122+
def __init__(self, n=1, normalize=False, startingMonth=None):
2123+
# Because QuarterEnd can be the freq for a Period, define its
2124+
# _period_dtype_code at construction for performance
2125+
QuarterOffset.__init__(self, n, normalize, startingMonth)
2126+
self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12
21022127

21032128
cdef class QuarterBegin(QuarterOffset):
21042129
"""
@@ -2148,6 +2173,7 @@ cdef class MonthEnd(MonthOffset):
21482173
"""
21492174
DateOffset of one month end.
21502175
"""
2176+
_period_dtype_code = PeriodDtypeCode.M
21512177
_prefix = "M"
21522178
_day_opt = "end"
21532179

@@ -2452,6 +2478,7 @@ cdef class Week(SingleConstructorOffset):
24522478

24532479
cdef readonly:
24542480
object weekday # int or None
2481+
int _period_dtype_code
24552482

24562483
def __init__(self, n=1, normalize=False, weekday=None):
24572484
BaseOffset.__init__(self, n, normalize)
@@ -2461,6 +2488,8 @@ cdef class Week(SingleConstructorOffset):
24612488
if self.weekday < 0 or self.weekday > 6:
24622489
raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}")
24632490

2491+
self._period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7
2492+
24642493
cpdef __setstate__(self, state):
24652494
self.n = state.pop("n")
24662495
self.normalize = state.pop("normalize")

0 commit comments

Comments
 (0)