Skip to content

Commit 2ea7350

Browse files
author
Pindikura Ravindra
committed
ARROW-4086: [Java] Add apis to debug memory alloc failures
- On failures, capture state for each allocator in chain and add the details to the exception. - add APIs to get parent/child allocators - Do not mask the original exception when allocating from vectors Author: Pindikura Ravindra <[email protected]> Closes #4369 from pravindra/memdebug and squashes the following commits: 92a269b <Pindikura Ravindra> ARROW-4086 : Add apis to debug memory alloc failures
1 parent 1643c1e commit 2ea7350

File tree

10 files changed

+466
-74
lines changed

10 files changed

+466
-74
lines changed

java/memory/src/main/java/org/apache/arrow/memory/Accountant.java

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class Accountant implements AutoCloseable {
3636
*/
3737
protected final Accountant parent;
3838

39+
private final String name;
40+
3941
/**
4042
* The amount of memory reserved for this allocator. Releases below this amount of memory will
4143
* not be returned to the
@@ -57,7 +59,8 @@ class Accountant implements AutoCloseable {
5759
*/
5860
private final AtomicLong locallyHeldMemory = new AtomicLong();
5961

60-
public Accountant(Accountant parent, long reservation, long maxAllocation) {
62+
public Accountant(Accountant parent, String name, long reservation, long maxAllocation) {
63+
Preconditions.checkNotNull(name, "name must not be null");
6164
Preconditions.checkArgument(reservation >= 0, "The initial reservation size must be " +
6265
"non-negative.");
6366
Preconditions.checkArgument(maxAllocation >= 0, "The maximum allocation limit must be " +
@@ -68,6 +71,7 @@ public Accountant(Accountant parent, long reservation, long maxAllocation) {
6871
"reserve memory.");
6972

7073
this.parent = parent;
74+
this.name = name;
7175
this.reservation = reservation;
7276
this.allocationLimit.set(maxAllocation);
7377

@@ -77,28 +81,43 @@ public Accountant(Accountant parent, long reservation, long maxAllocation) {
7781
if (!outcome.isOk()) {
7882
throw new OutOfMemoryException(String.format(
7983
"Failure trying to allocate initial reservation for Allocator. " +
80-
"Attempted to allocate %d bytes and received an outcome of %s.", reservation,
81-
outcome.name()));
84+
"Attempted to allocate %d bytes.", reservation,
85+
outcome.getStatus().name()), outcome.getDetails());
8286
}
8387
}
8488
}
8589

8690
/**
8791
* Attempt to allocate the requested amount of memory. Either completely succeeds or completely
88-
* fails. Constructs a a
89-
* log of delta
90-
*
91-
* <p>If it fails, no changes are made to accounting.
92+
* fails. If it fails, no changes are made to accounting.
9293
*
9394
* @param size The amount of memory to reserve in bytes.
94-
* @return True if the allocation was successful, false if the allocation failed.
95+
* @return the status and details of allocation at each allocator in the chain.
9596
*/
9697
AllocationOutcome allocateBytes(long size) {
97-
final AllocationOutcome outcome = allocate(size, true, false);
98-
if (!outcome.isOk()) {
98+
AllocationOutcome.Status status = allocateBytesInternal(size);
99+
if (status.isOk()) {
100+
return AllocationOutcome.SUCCESS_INSTANCE;
101+
} else {
102+
// Try again, but with details this time.
103+
// Populating details only on failures avoids performance overhead in the common case (success case).
104+
AllocationOutcomeDetails details = new AllocationOutcomeDetails();
105+
status = allocateBytesInternal(size, details);
106+
return new AllocationOutcome(status, details);
107+
}
108+
}
109+
110+
private AllocationOutcome.Status allocateBytesInternal(long size, AllocationOutcomeDetails details) {
111+
final AllocationOutcome.Status status = allocate(size,
112+
true /*incomingUpdatePeek*/, false /*forceAllocation*/, details);
113+
if (!status.isOk()) {
99114
releaseBytes(size);
100115
}
101-
return outcome;
116+
return status;
117+
}
118+
119+
private AllocationOutcome.Status allocateBytesInternal(long size) {
120+
return allocateBytesInternal(size, null /*details*/);
102121
}
103122

104123
private void updatePeak() {
@@ -126,7 +145,7 @@ private void updatePeak() {
126145
* @return Whether the allocation fit within limits.
127146
*/
128147
boolean forceAllocate(long size) {
129-
final AllocationOutcome outcome = allocate(size, true, true);
148+
final AllocationOutcome.Status outcome = allocate(size, true, true, null);
130149
return outcome.isOk();
131150
}
132151

@@ -152,21 +171,38 @@ boolean forceAllocate(long size) {
152171
* @param forceAllocation Whether we should force the allocation.
153172
* @return The outcome of the allocation.
154173
*/
155-
private AllocationOutcome allocate(final long size, final boolean incomingUpdatePeak, final boolean forceAllocation) {
174+
private AllocationOutcome.Status allocate(final long size, final boolean incomingUpdatePeak,
175+
final boolean forceAllocation, AllocationOutcomeDetails details) {
156176
final long newLocal = locallyHeldMemory.addAndGet(size);
157177
final long beyondReservation = newLocal - reservation;
158178
final boolean beyondLimit = newLocal > allocationLimit.get();
159179
final boolean updatePeak = forceAllocation || (incomingUpdatePeak && !beyondLimit);
160180

161-
AllocationOutcome parentOutcome = AllocationOutcome.SUCCESS;
181+
if (details != null) {
182+
// Add details if required (used in exceptions and debugging).
183+
boolean allocationFailed = true;
184+
long allocatedLocal = 0;
185+
if (!beyondLimit) {
186+
allocatedLocal = size - Math.min(beyondReservation, size);
187+
allocationFailed = false;
188+
}
189+
details.pushEntry(this, newLocal - size, size, allocatedLocal, allocationFailed);
190+
}
191+
192+
AllocationOutcome.Status parentOutcome = AllocationOutcome.Status.SUCCESS;
162193
if (beyondReservation > 0 && parent != null) {
163194
// we need to get memory from our parent.
164195
final long parentRequest = Math.min(beyondReservation, size);
165-
parentOutcome = parent.allocate(parentRequest, updatePeak, forceAllocation);
196+
parentOutcome = parent.allocate(parentRequest, updatePeak, forceAllocation, details);
166197
}
167198

168-
final AllocationOutcome finalOutcome = beyondLimit ? AllocationOutcome.FAILED_LOCAL :
169-
parentOutcome.isOk() ? AllocationOutcome.SUCCESS : AllocationOutcome.FAILED_PARENT;
199+
final AllocationOutcome.Status finalOutcome;
200+
if (beyondLimit) {
201+
finalOutcome = AllocationOutcome.Status.FAILED_LOCAL;
202+
} else {
203+
finalOutcome = parentOutcome.isOk() ? AllocationOutcome.Status.SUCCESS
204+
: AllocationOutcome.Status.FAILED_PARENT;
205+
}
170206

171207
if (updatePeak) {
172208
updatePeak();
@@ -206,6 +242,15 @@ public void close() {
206242
}
207243
}
208244

245+
/**
246+
* Return the name of the accountant.
247+
*
248+
* @return name of accountant
249+
*/
250+
public String getName() {
251+
return name;
252+
}
253+
209254
/**
210255
* Return the current limit of this Accountant.
211256
*

java/memory/src/main/java/org/apache/arrow/memory/AllocationOutcome.java

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,38 +17,81 @@
1717

1818
package org.apache.arrow.memory;
1919

20+
import java.util.Optional;
21+
2022
/**
2123
* Describes the type of outcome that occurred when trying to account for allocation of memory.
2224
*/
23-
public enum AllocationOutcome {
25+
public class AllocationOutcome {
26+
private final Status status;
27+
private final AllocationOutcomeDetails details;
28+
static final AllocationOutcome SUCCESS_INSTANCE = new AllocationOutcome(Status.SUCCESS);
29+
30+
AllocationOutcome(Status status, AllocationOutcomeDetails details) {
31+
this.status = status;
32+
this.details = details;
33+
}
34+
35+
AllocationOutcome(Status status) {
36+
this(status, null);
37+
}
2438

2539
/**
26-
* Allocation succeeded.
40+
* Get the status of the allocation.
41+
* @return status code.
2742
*/
28-
SUCCESS(true),
43+
public Status getStatus() {
44+
return status;
45+
}
2946

3047
/**
31-
* Allocation succeeded but only because the allocator was forced to move beyond a limit.
48+
* Get additional details of the allocation (like the status at each allocator in the hierarchy).
49+
* @return details of allocation
3250
*/
33-
FORCED_SUCCESS(true),
51+
public Optional<AllocationOutcomeDetails> getDetails() {
52+
return Optional.ofNullable(details);
53+
}
3454

3555
/**
36-
* Allocation failed because the local allocator's limits were exceeded.
56+
* Returns true if the allocation was a success.
57+
* @return true if allocation was successful, false otherwise.
3758
*/
38-
FAILED_LOCAL(false),
59+
public boolean isOk() {
60+
return status.isOk();
61+
}
3962

4063
/**
41-
* Allocation failed because a parent allocator's limits were exceeded.
64+
* Allocation status code.
4265
*/
43-
FAILED_PARENT(false);
66+
public enum Status {
67+
/**
68+
* Allocation succeeded.
69+
*/
70+
SUCCESS(true),
4471

45-
private final boolean ok;
72+
/**
73+
* Allocation succeeded but only because the allocator was forced to move beyond a limit.
74+
*/
75+
FORCED_SUCCESS(true),
4676

47-
AllocationOutcome(boolean ok) {
48-
this.ok = ok;
49-
}
77+
/**
78+
* Allocation failed because the local allocator's limits were exceeded.
79+
*/
80+
FAILED_LOCAL(false),
5081

51-
public boolean isOk() {
52-
return ok;
82+
/**
83+
* Allocation failed because a parent allocator's limits were exceeded.
84+
*/
85+
FAILED_PARENT(false);
86+
87+
private final boolean ok;
88+
89+
Status(boolean ok) {
90+
this.ok = ok;
91+
}
92+
93+
public boolean isOk() {
94+
return ok;
95+
}
5396
}
5497
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.memory;
19+
20+
import java.util.ArrayDeque;
21+
import java.util.Deque;
22+
23+
/**
24+
* Captures details of allocation for each accountant in the hierarchical chain.
25+
*/
26+
public class AllocationOutcomeDetails {
27+
Deque<Entry> allocEntries;
28+
29+
AllocationOutcomeDetails() {
30+
allocEntries = new ArrayDeque<>();
31+
}
32+
33+
void pushEntry(Accountant accountant, long totalUsedBeforeAllocation, long requestedSize,
34+
long allocatedSize, boolean allocationFailed) {
35+
36+
Entry top = allocEntries.peekLast();
37+
if (top != null && top.allocationFailed) {
38+
// if the allocation has already failed, stop saving the entries.
39+
return;
40+
}
41+
42+
allocEntries.addLast(new Entry(accountant, totalUsedBeforeAllocation, requestedSize,
43+
allocatedSize, allocationFailed));
44+
}
45+
46+
/**
47+
* Get the allocator that caused the failure.
48+
* @return the allocator that caused failure, null if there was no failure.
49+
*/
50+
public BufferAllocator getFailedAllocator() {
51+
Entry top = allocEntries.peekLast();
52+
if (top != null && top.allocationFailed && (top.accountant instanceof BufferAllocator)) {
53+
return (BufferAllocator)top.accountant;
54+
} else {
55+
return null;
56+
}
57+
}
58+
59+
@Override
60+
public String toString() {
61+
StringBuilder sb = new StringBuilder();
62+
sb.append("Allocation outcome details:\n");
63+
allocEntries.forEach(sb::append);
64+
return sb.toString();
65+
}
66+
67+
/**
68+
* Outcome of the allocation request at one accountant in the hierarchy.
69+
*/
70+
public static class Entry {
71+
private final Accountant accountant;
72+
73+
// Remember allocator attributes at the time of the request.
74+
private final long limit;
75+
private final long used;
76+
77+
// allocation outcome
78+
private final long requestedSize;
79+
private final long allocatedSize;
80+
private final boolean allocationFailed;
81+
82+
Entry(Accountant accountant, long totalUsedBeforeAllocation, long requestedSize,
83+
long allocatedSize, boolean allocationFailed) {
84+
this.accountant = accountant;
85+
this.limit = accountant.getLimit();
86+
this.used = totalUsedBeforeAllocation;
87+
88+
this.requestedSize = requestedSize;
89+
this.allocatedSize = allocatedSize;
90+
this.allocationFailed = allocationFailed;
91+
}
92+
93+
public Accountant getAccountant() {
94+
return accountant;
95+
}
96+
97+
public long getLimit() {
98+
return limit;
99+
}
100+
101+
public long getUsed() {
102+
return used;
103+
}
104+
105+
public long getRequestedSize() {
106+
return requestedSize;
107+
}
108+
109+
public long getAllocatedSize() {
110+
return allocatedSize;
111+
}
112+
113+
public boolean isAllocationFailed() {
114+
return allocationFailed;
115+
}
116+
117+
@Override
118+
public String toString() {
119+
return new StringBuilder()
120+
.append("allocator[" + accountant.getName() + "]")
121+
.append(" reservation: " + accountant.getInitReservation())
122+
.append(" limit: " + limit)
123+
.append(" used: " + used)
124+
.append(" requestedSize: " + requestedSize)
125+
.append(" allocatedSize: " + allocatedSize)
126+
.append(" localAllocationStatus: " + (allocationFailed ? "success" : "fail"))
127+
.append("\n")
128+
.toString();
129+
}
130+
}
131+
132+
}

0 commit comments

Comments
 (0)