Skip to content

Commit 389b153

Browse files
pitrouwesm
andcommitted
ARROW-9439: [C++] Fix crash on invalid IPC input
Also add argument-checking variants of SliceBuffer, SliceMutableBuffer, Array::Slice and ArrayData::Slice. Should fix the following issue: * https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=24101 Closes #7733 from pitrou/ARROW-9439-oss-fuzz-ipc Lead-authored-by: Antoine Pitrou <[email protected]> Co-authored-by: Wes McKinney <[email protected]> Signed-off-by: Wes McKinney <[email protected]>
1 parent 8daf756 commit 389b153

File tree

12 files changed

+263
-35
lines changed

12 files changed

+263
-35
lines changed

cpp/src/arrow/array/array_base.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,19 @@ std::shared_ptr<Array> Array::Slice(int64_t offset) const {
246246
return Slice(offset, slice_length);
247247
}
248248

249+
Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset, int64_t length) const {
250+
ARROW_ASSIGN_OR_RAISE(auto sliced_data, data_->SliceSafe(offset, length));
251+
return MakeArray(std::move(sliced_data));
252+
}
253+
254+
Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset) const {
255+
if (offset < 0) {
256+
// Avoid UBSAN in subtraction below
257+
return Status::Invalid("Negative buffer slice offset");
258+
}
259+
return SliceSafe(offset, data_->length - offset);
260+
}
261+
249262
std::string Array::ToString() const {
250263
std::stringstream ss;
251264
ARROW_CHECK_OK(PrettyPrint(*this, 0, &ss));

cpp/src/arrow/array/array_base.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ class ARROW_EXPORT Array {
151151
/// Slice from offset until end of the array
152152
std::shared_ptr<Array> Slice(int64_t offset) const;
153153

154+
/// Input-checking variant of Array::Slice
155+
Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
156+
/// Input-checking variant of Array::Slice
157+
Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
158+
154159
std::shared_ptr<ArrayData> data() const { return data_; }
155160

156161
int num_fields() const { return static_cast<int>(data_->child_data.size()); }

cpp/src/arrow/array/array_test.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,55 @@ TEST_F(TestArray, TestLength) {
113113
ASSERT_EQ(arr->length(), 100);
114114
}
115115

116+
TEST_F(TestArray, TestSliceSafe) {
117+
std::vector<int32_t> original_data{1, 2, 3, 4, 5, 6, 7};
118+
auto arr = std::make_shared<Int32Array>(7, Buffer::Wrap(original_data));
119+
120+
auto check_data = [](const Array& arr, const std::vector<int32_t>& expected) {
121+
ASSERT_EQ(arr.length(), static_cast<int64_t>(expected.size()));
122+
const int32_t* data = arr.data()->GetValues<int32_t>(1);
123+
for (int64_t i = 0; i < arr.length(); ++i) {
124+
ASSERT_EQ(data[i], expected[i]);
125+
}
126+
};
127+
128+
check_data(*arr, {1, 2, 3, 4, 5, 6, 7});
129+
130+
ASSERT_OK_AND_ASSIGN(auto sliced, arr->SliceSafe(0, 0));
131+
check_data(*sliced, {});
132+
133+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(0, 7));
134+
check_data(*sliced, original_data);
135+
136+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(3, 4));
137+
check_data(*sliced, {4, 5, 6, 7});
138+
139+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(0, 7));
140+
check_data(*sliced, {1, 2, 3, 4, 5, 6, 7});
141+
142+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(7, 0));
143+
check_data(*sliced, {});
144+
145+
ASSERT_RAISES(Invalid, arr->SliceSafe(8, 0));
146+
ASSERT_RAISES(Invalid, arr->SliceSafe(0, 8));
147+
ASSERT_RAISES(Invalid, arr->SliceSafe(-1, 0));
148+
ASSERT_RAISES(Invalid, arr->SliceSafe(0, -1));
149+
ASSERT_RAISES(Invalid, arr->SliceSafe(6, 2));
150+
ASSERT_RAISES(Invalid, arr->SliceSafe(6, std::numeric_limits<int64_t>::max() - 5));
151+
152+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(0));
153+
check_data(*sliced, original_data);
154+
155+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(3));
156+
check_data(*sliced, {4, 5, 6, 7});
157+
158+
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(7));
159+
check_data(*sliced, {});
160+
161+
ASSERT_RAISES(Invalid, arr->SliceSafe(8));
162+
ASSERT_RAISES(Invalid, arr->SliceSafe(-1));
163+
}
164+
116165
Status MakeArrayFromValidBytes(const std::vector<uint8_t>& v, MemoryPool* pool,
117166
std::shared_ptr<Array>* out) {
118167
int64_t null_count = v.size() - std::accumulate(v.begin(), v.end(), 0);

cpp/src/arrow/array/concatenate.cc

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -202,47 +202,55 @@ class ConcatenateImpl {
202202

203203
Status Visit(const FixedWidthType& fixed) {
204204
// Handles numbers, decimal128, fixed_size_binary
205-
return ConcatenateBuffers(Buffers(1, fixed), pool_).Value(&out_->buffers[1]);
205+
ARROW_ASSIGN_OR_RAISE(auto buffers, Buffers(1, fixed));
206+
return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
206207
}
207208

208209
Status Visit(const BinaryType&) {
209210
std::vector<Range> value_ranges;
210-
RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, sizeof(int32_t)), pool_,
211-
&out_->buffers[1], &value_ranges));
212-
return ConcatenateBuffers(Buffers(2, value_ranges), pool_).Value(&out_->buffers[2]);
211+
ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
212+
RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
213+
&value_ranges));
214+
ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
215+
return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
213216
}
214217

215218
Status Visit(const LargeBinaryType&) {
216219
std::vector<Range> value_ranges;
217-
RETURN_NOT_OK(ConcatenateOffsets<int64_t>(Buffers(1, sizeof(int64_t)), pool_,
218-
&out_->buffers[1], &value_ranges));
219-
return ConcatenateBuffers(Buffers(2, value_ranges), pool_).Value(&out_->buffers[2]);
220+
ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
221+
RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
222+
&value_ranges));
223+
ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
224+
return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
220225
}
221226

222227
Status Visit(const ListType&) {
223228
std::vector<Range> value_ranges;
224-
RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, sizeof(int32_t)), pool_,
225-
&out_->buffers[1], &value_ranges));
226-
return ConcatenateImpl(ChildData(0, value_ranges), pool_)
227-
.Concatenate(&out_->child_data[0]);
229+
ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
230+
RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
231+
&value_ranges));
232+
ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
233+
return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
228234
}
229235

230236
Status Visit(const LargeListType&) {
231237
std::vector<Range> value_ranges;
232-
RETURN_NOT_OK(ConcatenateOffsets<int64_t>(Buffers(1, sizeof(int64_t)), pool_,
233-
&out_->buffers[1], &value_ranges));
234-
return ConcatenateImpl(ChildData(0, value_ranges), pool_)
235-
.Concatenate(&out_->child_data[0]);
238+
ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
239+
RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
240+
&value_ranges));
241+
ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
242+
return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
236243
}
237244

238245
Status Visit(const FixedSizeListType&) {
239-
return ConcatenateImpl(ChildData(0), pool_).Concatenate(&out_->child_data[0]);
246+
ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0));
247+
return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
240248
}
241249

242250
Status Visit(const StructType& s) {
243251
for (int i = 0; i < s.num_fields(); ++i) {
244-
RETURN_NOT_OK(
245-
ConcatenateImpl(ChildData(i), pool_).Concatenate(&out_->child_data[i]));
252+
ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i));
253+
RETURN_NOT_OK(ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i]));
246254
}
247255
return Status::OK();
248256
}
@@ -263,7 +271,8 @@ class ConcatenateImpl {
263271

264272
if (dictionaries_same) {
265273
out_->dictionary = in_[0]->dictionary;
266-
return ConcatenateBuffers(Buffers(1, *fixed), pool_).Value(&out_->buffers[1]);
274+
ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, *fixed));
275+
return ConcatenateBuffers(index_buffers, pool_).Value(&out_->buffers[1]);
267276
} else {
268277
return Status::NotImplemented("Concat with dictionary unification NYI");
269278
}
@@ -279,17 +288,24 @@ class ConcatenateImpl {
279288
}
280289

281290
private:
291+
// NOTE: Concatenate() can be called during IPC reads to append delta dictionaries
292+
// on non-validated input. Therefore, the input-checking SliceBufferSafe and
293+
// ArrayData::SliceSafe are used below.
294+
282295
// Gather the index-th buffer of each input into a vector.
283296
// Bytes are sliced with that input's offset and length.
284297
// Note that BufferVector will not contain the buffer of in_[i] if it's
285298
// nullptr.
286-
BufferVector Buffers(size_t index) {
299+
Result<BufferVector> Buffers(size_t index) {
287300
BufferVector buffers;
288301
buffers.reserve(in_.size());
289302
for (const std::shared_ptr<const ArrayData>& array_data : in_) {
290303
const auto& buffer = array_data->buffers[index];
291304
if (buffer != nullptr) {
292-
buffers.push_back(SliceBuffer(buffer, array_data->offset, array_data->length));
305+
ARROW_ASSIGN_OR_RAISE(
306+
auto sliced_buffer,
307+
SliceBufferSafe(buffer, array_data->offset, array_data->length));
308+
buffers.push_back(std::move(sliced_buffer));
293309
}
294310
}
295311
return buffers;
@@ -299,14 +315,17 @@ class ConcatenateImpl {
299315
// Bytes are sliced with the explicitly passed ranges.
300316
// Note that BufferVector will not contain the buffer of in_[i] if it's
301317
// nullptr.
302-
BufferVector Buffers(size_t index, const std::vector<Range>& ranges) {
318+
Result<BufferVector> Buffers(size_t index, const std::vector<Range>& ranges) {
303319
DCHECK_EQ(in_.size(), ranges.size());
304320
BufferVector buffers;
305321
buffers.reserve(in_.size());
306322
for (size_t i = 0; i < in_.size(); ++i) {
307323
const auto& buffer = in_[i]->buffers[index];
308324
if (buffer != nullptr) {
309-
buffers.push_back(SliceBuffer(buffer, ranges[i].offset, ranges[i].length));
325+
ARROW_ASSIGN_OR_RAISE(
326+
auto sliced_buffer,
327+
SliceBufferSafe(buffer, ranges[i].offset, ranges[i].length));
328+
buffers.push_back(std::move(sliced_buffer));
310329
} else {
311330
DCHECK_EQ(ranges[i].length, 0);
312331
}
@@ -319,14 +338,16 @@ class ConcatenateImpl {
319338
// those elements are sliced with that input's offset and length.
320339
// Note that BufferVector will not contain the buffer of in_[i] if it's
321340
// nullptr.
322-
BufferVector Buffers(size_t index, int byte_width) {
341+
Result<BufferVector> Buffers(size_t index, int byte_width) {
323342
BufferVector buffers;
324343
buffers.reserve(in_.size());
325344
for (const std::shared_ptr<const ArrayData>& array_data : in_) {
326345
const auto& buffer = array_data->buffers[index];
327346
if (buffer != nullptr) {
328-
buffers.push_back(SliceBuffer(buffer, array_data->offset * byte_width,
329-
array_data->length * byte_width));
347+
ARROW_ASSIGN_OR_RAISE(auto sliced_buffer,
348+
SliceBufferSafe(buffer, array_data->offset * byte_width,
349+
array_data->length * byte_width));
350+
buffers.push_back(std::move(sliced_buffer));
330351
}
331352
}
332353
return buffers;
@@ -337,7 +358,7 @@ class ConcatenateImpl {
337358
// those elements are sliced with that input's offset and length.
338359
// Note that BufferVector will not contain the buffer of in_[i] if it's
339360
// nullptr.
340-
BufferVector Buffers(size_t index, const FixedWidthType& fixed) {
361+
Result<BufferVector> Buffers(size_t index, const FixedWidthType& fixed) {
341362
DCHECK_EQ(fixed.bit_width() % 8, 0);
342363
return Buffers(index, fixed.bit_width() / 8);
343364
}
@@ -355,23 +376,24 @@ class ConcatenateImpl {
355376

356377
// Gather the index-th child_data of each input into a vector.
357378
// Elements are sliced with that input's offset and length.
358-
std::vector<std::shared_ptr<const ArrayData>> ChildData(size_t index) {
379+
Result<std::vector<std::shared_ptr<const ArrayData>>> ChildData(size_t index) {
359380
std::vector<std::shared_ptr<const ArrayData>> child_data(in_.size());
360381
for (size_t i = 0; i < in_.size(); ++i) {
361-
child_data[i] = in_[i]->child_data[index]->Slice(in_[i]->offset, in_[i]->length);
382+
ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
383+
in_[i]->offset, in_[i]->length));
362384
}
363385
return child_data;
364386
}
365387

366388
// Gather the index-th child_data of each input into a vector.
367389
// Elements are sliced with the explicitly passed ranges.
368-
std::vector<std::shared_ptr<const ArrayData>> ChildData(
390+
Result<std::vector<std::shared_ptr<const ArrayData>>> ChildData(
369391
size_t index, const std::vector<Range>& ranges) {
370392
DCHECK_EQ(in_.size(), ranges.size());
371393
std::vector<std::shared_ptr<const ArrayData>> child_data(in_.size());
372394
for (size_t i = 0; i < in_.size(); ++i) {
373-
child_data[i] =
374-
in_[i]->child_data[index]->Slice(ranges[i].offset, ranges[i].length);
395+
ARROW_ASSIGN_OR_RAISE(child_data[i], in_[i]->child_data[index]->SliceSafe(
396+
ranges[i].offset, ranges[i].length));
375397
}
376398
return child_data;
377399
}

cpp/src/arrow/array/data.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "arrow/status.h"
3030
#include "arrow/type.h"
3131
#include "arrow/util/bitmap_ops.h"
32+
#include "arrow/util/int_util.h"
3233
#include "arrow/util/logging.h"
3334
#include "arrow/util/macros.h"
3435

@@ -105,6 +106,11 @@ std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
105106
return copy;
106107
}
107108

109+
Result<std::shared_ptr<ArrayData>> ArrayData::SliceSafe(int64_t off, int64_t len) const {
110+
RETURN_NOT_OK(internal::CheckSliceParams(length, off, len, "array"));
111+
return Slice(off, len);
112+
}
113+
108114
int64_t ArrayData::GetNullCount() const {
109115
int64_t precomputed = this->null_count.load();
110116
if (ARROW_PREDICT_FALSE(precomputed == kUnknownNullCount)) {

cpp/src/arrow/array/data.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,15 @@ struct ARROW_EXPORT ArrayData {
195195
return GetMutableValues<T>(i, offset);
196196
}
197197

198-
// Construct a zero-copy slice of the data with the indicated offset and length
198+
/// \brief Construct a zero-copy slice of the data with the given offset and length
199199
std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
200200

201+
/// \brief Input-checking variant of Slice
202+
///
203+
/// An Invalid Status is returned if the requested slice falls out of bounds.
204+
/// Note that unlike Slice, `length` isn't clamped to the available buffer size.
205+
Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
206+
201207
void SetNullCount(int64_t v) { null_count.store(v); }
202208

203209
/// \brief Return null count, or compute and set it if it's not known

cpp/src/arrow/buffer.cc

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "arrow/result.h"
2626
#include "arrow/status.h"
2727
#include "arrow/util/bit_util.h"
28+
#include "arrow/util/int_util.h"
2829
#include "arrow/util/logging.h"
2930
#include "arrow/util/string.h"
3031

@@ -43,6 +44,46 @@ Result<std::shared_ptr<Buffer>> Buffer::CopySlice(const int64_t start,
4344
return std::move(new_buffer);
4445
}
4546

47+
namespace {
48+
49+
Status CheckBufferSlice(const Buffer& buffer, int64_t offset, int64_t length) {
50+
return internal::CheckSliceParams(buffer.size(), offset, length, "buffer");
51+
}
52+
53+
Status CheckBufferSlice(const Buffer& buffer, int64_t offset) {
54+
if (ARROW_PREDICT_FALSE(offset < 0)) {
55+
// Avoid UBSAN in subtraction below
56+
return Status::Invalid("Negative buffer slice offset");
57+
}
58+
return CheckBufferSlice(buffer, offset, buffer.size() - offset);
59+
}
60+
61+
} // namespace
62+
63+
Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
64+
int64_t offset) {
65+
RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
66+
return SliceBuffer(buffer, offset);
67+
}
68+
69+
Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
70+
int64_t offset, int64_t length) {
71+
RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
72+
return SliceBuffer(buffer, offset, length);
73+
}
74+
75+
Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
76+
const std::shared_ptr<Buffer>& buffer, int64_t offset) {
77+
RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
78+
return SliceMutableBuffer(buffer, offset);
79+
}
80+
81+
Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
82+
const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
83+
RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
84+
return SliceMutableBuffer(buffer, offset, length);
85+
}
86+
4687
std::string Buffer::ToHexString() {
4788
return HexEncode(data(), static_cast<size_t>(size()));
4889
}

0 commit comments

Comments
 (0)