Skip to content

Commit 8786bf9

Browse files
committed
ENH: add fill_value argument to Series.reindex, DataFrame next, #784
1 parent 0794d11 commit 8786bf9

File tree

5 files changed

+175
-59
lines changed

5 files changed

+175
-59
lines changed

pandas/core/common.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ def _unpickle_array(bytes):
103103
arr = read_array(BytesIO(bytes))
104104
return arr
105105

106-
def _take_1d_bool(arr, indexer, out):
106+
def _take_1d_bool(arr, indexer, out, fill_value=np.nan):
107107
view = arr.view(np.uint8)
108108
outview = out.view(np.uint8)
109-
lib.take_1d_bool(view, indexer, outview)
109+
lib.take_1d_bool(view, indexer, outview, fill_value=fill_value)
110110

111111
def _take_2d_axis0_bool(arr, indexer, out):
112112
view = arr.view(np.uint8)
@@ -148,7 +148,7 @@ def _get_take2d_function(dtype_str, axis=0):
148148
else:
149149
return _take2d_axis1_dict[dtype_str]
150150

151-
def take_1d(arr, indexer, out=None):
151+
def take_1d(arr, indexer, out=None, fill_value=np.nan):
152152
"""
153153
Specialized Cython take which sets NaN values in one pass
154154
"""
@@ -167,7 +167,7 @@ def take_1d(arr, indexer, out=None):
167167
if out is None:
168168
out = np.empty(n, dtype=arr.dtype)
169169
take_f = _take1d_dict[dtype_str]
170-
take_f(arr, indexer, out=out)
170+
take_f(arr, indexer, out=out, fill_value=fill_value)
171171
except ValueError:
172172
mask = indexer == -1
173173
if len(arr) == 0:
@@ -180,12 +180,12 @@ def take_1d(arr, indexer, out=None):
180180
raise Exception('out with dtype %s does not support NA' %
181181
out.dtype)
182182
out = _maybe_upcast(out)
183-
np.putmask(out, mask, np.nan)
183+
np.putmask(out, mask, fill_value)
184184
elif dtype_str in ('float64', 'object'):
185185
if out is None:
186186
out = np.empty(n, dtype=arr.dtype)
187187
take_f = _take1d_dict[dtype_str]
188-
take_f(arr, indexer, out=out)
188+
take_f(arr, indexer, out=out, fill_value=fill_value)
189189
else:
190190
out = arr.take(indexer, out=out)
191191
mask = indexer == -1
@@ -194,7 +194,7 @@ def take_1d(arr, indexer, out=None):
194194
raise Exception('out with dtype %s does not support NA' %
195195
out.dtype)
196196
out = _maybe_upcast(out)
197-
np.putmask(out, mask, np.nan)
197+
np.putmask(out, mask, fill_value)
198198

199199
return out
200200

pandas/core/series.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1841,7 +1841,8 @@ def _reindex_indexer(self, new_index, indexer, copy):
18411841
# be subclass-friendly
18421842
return self._constructor(new_values, new_index, name=self.name)
18431843

1844-
def reindex(self, index=None, method=None, level=None, copy=True):
1844+
def reindex(self, index=None, method=None, level=None, fill_value=np.nan,
1845+
copy=True):
18451846
"""Conform Series to new index with optional filling logic, placing
18461847
NA/NaN in locations having no value in the previous index. A new object
18471848
is produced unless the new index is equivalent to the current one and
@@ -1861,6 +1862,9 @@ def reindex(self, index=None, method=None, level=None, copy=True):
18611862
level : int or name
18621863
Broadcast across a level, matching Index values on the
18631864
passed MultiIndex level
1865+
fill_value : scalar, default np.NaN
1866+
Value to use for missing values. Defaults to NaN, but can be any
1867+
"compatible" value
18641868
18651869
Returns
18661870
-------
@@ -1878,7 +1882,7 @@ def reindex(self, index=None, method=None, level=None, copy=True):
18781882

18791883
new_index, fill_vec = self.index.reindex(index, method=method,
18801884
level=level)
1881-
new_values = com.take_1d(self.values, fill_vec)
1885+
new_values = com.take_1d(self.values, fill_vec, fill_value=fill_value)
18821886
return Series(new_values, index=new_index, name=self.name)
18831887

18841888
def reindex_like(self, other, method=None):

pandas/src/generate_code.py

+21-9
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
take_1d_template = """@cython.wraparound(False)
44
@cython.boundscheck(False)
55
def take_1d_%(name)s(ndarray[%(c_type)s] values, ndarray[int32_t] indexer,
6-
out=None):
6+
out=None, fill_value=np.nan):
77
cdef:
88
Py_ssize_t i, n, idx
99
ndarray[%(c_type)s] outbuf
10+
%(c_type)s fv
1011
1112
n = len(indexer)
1213
@@ -15,12 +16,21 @@ def take_1d_%(name)s(ndarray[%(c_type)s] values, ndarray[int32_t] indexer,
1516
else:
1617
outbuf = out
1718
18-
for i in range(n):
19-
idx = indexer[i]
20-
if idx == -1:
21-
%(na_action)s
22-
else:
23-
outbuf[i] = values[idx]
19+
if %(raise_on_na)s and _checknan(fill_value):
20+
for i in range(n):
21+
idx = indexer[i]
22+
if idx == -1:
23+
raise ValueError('No NA values allowed')
24+
else:
25+
outbuf[i] = values[idx]
26+
else:
27+
fv = fill_value
28+
for i in range(n):
29+
idx = indexer[i]
30+
if idx == -1:
31+
outbuf[i] = fv
32+
else:
33+
outbuf[i] = values[idx]
2434
2535
"""
2636

@@ -724,12 +734,14 @@ def generate_from_template(template, ndim=1, exclude=None):
724734
if exclude is not None and name in exclude:
725735
continue
726736

737+
raise_on_na = 'False' if can_hold_na else 'True'
727738
if ndim == 1:
728739
na_action = set_na if can_hold_na else raise_on_na
729740
elif ndim == 2:
730741
na_action = set_na_2d if can_hold_na else raise_on_na
731-
func = template % {'name' : name, 'c_type' : c_type,
732-
'dtype' : dtype, 'na_action' : na_action}
742+
func = template % {'name': name, 'c_type': c_type,
743+
'dtype': dtype, 'na_action': na_action,
744+
'raise_on_na': raise_on_na}
733745
output.write(func)
734746
return output.getvalue()
735747

pandas/src/generated.pyx

+91-41
Original file line numberDiff line numberDiff line change
@@ -927,10 +927,11 @@ def backfill_2d_inplace_bool(ndarray[uint8_t, ndim=2] values,
927927
@cython.wraparound(False)
928928
@cython.boundscheck(False)
929929
def take_1d_float64(ndarray[float64_t] values, ndarray[int32_t] indexer,
930-
out=None):
930+
out=None, fill_value=np.nan):
931931
cdef:
932932
Py_ssize_t i, n, idx
933933
ndarray[float64_t] outbuf
934+
float64_t fv
934935

935936
n = len(indexer)
936937

@@ -939,20 +940,30 @@ def take_1d_float64(ndarray[float64_t] values, ndarray[int32_t] indexer,
939940
else:
940941
outbuf = out
941942

942-
for i in range(n):
943-
idx = indexer[i]
944-
if idx == -1:
945-
outbuf[i] = NaN
946-
else:
947-
outbuf[i] = values[idx]
943+
if False and _checknan(fill_value):
944+
for i in range(n):
945+
idx = indexer[i]
946+
if idx == -1:
947+
raise ValueError('No NA values allowed')
948+
else:
949+
outbuf[i] = values[idx]
950+
else:
951+
fv = fill_value
952+
for i in range(n):
953+
idx = indexer[i]
954+
if idx == -1:
955+
outbuf[i] = fv
956+
else:
957+
outbuf[i] = values[idx]
948958

949959
@cython.wraparound(False)
950960
@cython.boundscheck(False)
951961
def take_1d_object(ndarray[object] values, ndarray[int32_t] indexer,
952-
out=None):
962+
out=None, fill_value=np.nan):
953963
cdef:
954964
Py_ssize_t i, n, idx
955965
ndarray[object] outbuf
966+
object fv
956967

957968
n = len(indexer)
958969

@@ -961,20 +972,30 @@ def take_1d_object(ndarray[object] values, ndarray[int32_t] indexer,
961972
else:
962973
outbuf = out
963974

964-
for i in range(n):
965-
idx = indexer[i]
966-
if idx == -1:
967-
outbuf[i] = NaN
968-
else:
969-
outbuf[i] = values[idx]
975+
if False and _checknan(fill_value):
976+
for i in range(n):
977+
idx = indexer[i]
978+
if idx == -1:
979+
raise ValueError('No NA values allowed')
980+
else:
981+
outbuf[i] = values[idx]
982+
else:
983+
fv = fill_value
984+
for i in range(n):
985+
idx = indexer[i]
986+
if idx == -1:
987+
outbuf[i] = fv
988+
else:
989+
outbuf[i] = values[idx]
970990

971991
@cython.wraparound(False)
972992
@cython.boundscheck(False)
973993
def take_1d_int32(ndarray[int32_t] values, ndarray[int32_t] indexer,
974-
out=None):
994+
out=None, fill_value=np.nan):
975995
cdef:
976996
Py_ssize_t i, n, idx
977997
ndarray[int32_t] outbuf
998+
int32_t fv
978999

9791000
n = len(indexer)
9801001

@@ -983,20 +1004,30 @@ def take_1d_int32(ndarray[int32_t] values, ndarray[int32_t] indexer,
9831004
else:
9841005
outbuf = out
9851006

986-
for i in range(n):
987-
idx = indexer[i]
988-
if idx == -1:
989-
raise ValueError('No NA values allowed')
990-
else:
991-
outbuf[i] = values[idx]
1007+
if True and _checknan(fill_value):
1008+
for i in range(n):
1009+
idx = indexer[i]
1010+
if idx == -1:
1011+
raise ValueError('No NA values allowed')
1012+
else:
1013+
outbuf[i] = values[idx]
1014+
else:
1015+
fv = fill_value
1016+
for i in range(n):
1017+
idx = indexer[i]
1018+
if idx == -1:
1019+
outbuf[i] = fv
1020+
else:
1021+
outbuf[i] = values[idx]
9921022

9931023
@cython.wraparound(False)
9941024
@cython.boundscheck(False)
9951025
def take_1d_int64(ndarray[int64_t] values, ndarray[int32_t] indexer,
996-
out=None):
1026+
out=None, fill_value=np.nan):
9971027
cdef:
9981028
Py_ssize_t i, n, idx
9991029
ndarray[int64_t] outbuf
1030+
int64_t fv
10001031

10011032
n = len(indexer)
10021033

@@ -1005,20 +1036,30 @@ def take_1d_int64(ndarray[int64_t] values, ndarray[int32_t] indexer,
10051036
else:
10061037
outbuf = out
10071038

1008-
for i in range(n):
1009-
idx = indexer[i]
1010-
if idx == -1:
1011-
raise ValueError('No NA values allowed')
1012-
else:
1013-
outbuf[i] = values[idx]
1039+
if True and _checknan(fill_value):
1040+
for i in range(n):
1041+
idx = indexer[i]
1042+
if idx == -1:
1043+
raise ValueError('No NA values allowed')
1044+
else:
1045+
outbuf[i] = values[idx]
1046+
else:
1047+
fv = fill_value
1048+
for i in range(n):
1049+
idx = indexer[i]
1050+
if idx == -1:
1051+
outbuf[i] = fv
1052+
else:
1053+
outbuf[i] = values[idx]
10141054

10151055
@cython.wraparound(False)
10161056
@cython.boundscheck(False)
10171057
def take_1d_bool(ndarray[uint8_t] values, ndarray[int32_t] indexer,
1018-
out=None):
1058+
out=None, fill_value=np.nan):
10191059
cdef:
10201060
Py_ssize_t i, n, idx
10211061
ndarray[uint8_t] outbuf
1062+
uint8_t fv
10221063

10231064
n = len(indexer)
10241065

@@ -1027,12 +1068,21 @@ def take_1d_bool(ndarray[uint8_t] values, ndarray[int32_t] indexer,
10271068
else:
10281069
outbuf = out
10291070

1030-
for i in range(n):
1031-
idx = indexer[i]
1032-
if idx == -1:
1033-
raise ValueError('No NA values allowed')
1034-
else:
1035-
outbuf[i] = values[idx]
1071+
if True and _checknan(fill_value):
1072+
for i in range(n):
1073+
idx = indexer[i]
1074+
if idx == -1:
1075+
raise ValueError('No NA values allowed')
1076+
else:
1077+
outbuf[i] = values[idx]
1078+
else:
1079+
fv = fill_value
1080+
for i in range(n):
1081+
idx = indexer[i]
1082+
if idx == -1:
1083+
outbuf[i] = fv
1084+
else:
1085+
outbuf[i] = values[idx]
10361086

10371087

10381088
@cython.boundscheck(False)
@@ -1404,7 +1454,7 @@ def take_2d_axis0_int32(ndarray[int32_t, ndim=2] values,
14041454

14051455
if idx == -1:
14061456
for j from 0 <= j < k:
1407-
raise ValueError('No NA values allowed')
1457+
True
14081458
else:
14091459
for j from 0 <= j < k:
14101460
outbuf[i, j] = values[idx, j]
@@ -1431,7 +1481,7 @@ def take_2d_axis0_int64(ndarray[int64_t, ndim=2] values,
14311481

14321482
if idx == -1:
14331483
for j from 0 <= j < k:
1434-
raise ValueError('No NA values allowed')
1484+
True
14351485
else:
14361486
for j from 0 <= j < k:
14371487
outbuf[i, j] = values[idx, j]
@@ -1458,7 +1508,7 @@ def take_2d_axis0_bool(ndarray[uint8_t, ndim=2] values,
14581508

14591509
if idx == -1:
14601510
for j from 0 <= j < k:
1461-
raise ValueError('No NA values allowed')
1511+
True
14621512
else:
14631513
for j from 0 <= j < k:
14641514
outbuf[i, j] = values[idx, j]
@@ -1540,7 +1590,7 @@ def take_2d_axis1_int32(ndarray[int32_t, ndim=2] values,
15401590

15411591
if idx == -1:
15421592
for i in range(n):
1543-
raise ValueError('No NA values allowed')
1593+
True
15441594
else:
15451595
for i in range(n):
15461596
outbuf[i, j] = values[i, idx]
@@ -1567,7 +1617,7 @@ def take_2d_axis1_int64(ndarray[int64_t, ndim=2] values,
15671617

15681618
if idx == -1:
15691619
for i in range(n):
1570-
raise ValueError('No NA values allowed')
1620+
True
15711621
else:
15721622
for i in range(n):
15731623
outbuf[i, j] = values[i, idx]
@@ -1594,7 +1644,7 @@ def take_2d_axis1_bool(ndarray[uint8_t, ndim=2] values,
15941644

15951645
if idx == -1:
15961646
for i in range(n):
1597-
raise ValueError('No NA values allowed')
1647+
True
15981648
else:
15991649
for i in range(n):
16001650
outbuf[i, j] = values[i, idx]

0 commit comments

Comments
 (0)