Skip to content

Commit 7c5f64f

Browse files
Vladimir Davydovtorvalds
Vladimir Davydov
authored andcommitted
mm: oom: deduplicate victim selection code for memcg and global oom
When selecting an oom victim, we use the same heuristic for both memory cgroup and global oom. The only difference is the scope of tasks to select the victim from. So we could just export an iterator over all memcg tasks and keep all oom related logic in oom_kill.c, but instead we duplicate pieces of it in memcontrol.c reusing some initially private functions of oom_kill.c in order to not duplicate all of it. That looks ugly and error prone, because any modification of select_bad_process should also be propagated to mem_cgroup_out_of_memory. Let's rework this as follows: keep all oom heuristic related code private to oom_kill.c and make oom_kill.c use exported memcg functions when it's really necessary (like in case of iterating over memcg tasks). Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Vladimir Davydov <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Tetsuo Handa <[email protected]> Cc: David Rientjes <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 48e509e commit 7c5f64f

File tree

4 files changed

+167
-205
lines changed

4 files changed

+167
-205
lines changed

include/linux/memcontrol.h

+15
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
366366
struct mem_cgroup *,
367367
struct mem_cgroup_reclaim_cookie *);
368368
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
369+
int mem_cgroup_scan_tasks(struct mem_cgroup *,
370+
int (*)(struct task_struct *, void *), void *);
369371

370372
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
371373
{
@@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
446448

447449
void mem_cgroup_handle_over_high(void);
448450

451+
unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
452+
449453
void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
450454
struct task_struct *p);
451455

@@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
639643
{
640644
}
641645

646+
static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
647+
int (*fn)(struct task_struct *, void *), void *arg)
648+
{
649+
return 0;
650+
}
651+
642652
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
643653
{
644654
return 0;
@@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
669679
return 0;
670680
}
671681

682+
static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
683+
{
684+
return 0;
685+
}
686+
672687
static inline void
673688
mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
674689
{

include/linux/oom.h

+4-39
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,11 @@ struct oom_control {
3434
* for display purposes.
3535
*/
3636
const int order;
37-
};
3837

39-
/*
40-
* Types of limitations to the nodes from which allocations may occur
41-
*/
42-
enum oom_constraint {
43-
CONSTRAINT_NONE,
44-
CONSTRAINT_CPUSET,
45-
CONSTRAINT_MEMORY_POLICY,
46-
CONSTRAINT_MEMCG,
47-
};
48-
49-
enum oom_scan_t {
50-
OOM_SCAN_OK, /* scan thread and find its badness */
51-
OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */
52-
OOM_SCAN_ABORT, /* abort the iteration and return */
53-
OOM_SCAN_SELECT, /* always select this thread first */
38+
/* Used by oom implementation, do not set */
39+
unsigned long totalpages;
40+
struct task_struct *chosen;
41+
unsigned long chosen_points;
5442
};
5543

5644
extern struct mutex oom_lock;
@@ -70,45 +58,22 @@ static inline bool oom_task_origin(const struct task_struct *p)
7058
return p->signal->oom_flag_origin;
7159
}
7260

73-
extern void mark_oom_victim(struct task_struct *tsk);
74-
75-
#ifdef CONFIG_MMU
76-
extern void wake_oom_reaper(struct task_struct *tsk);
77-
#else
78-
static inline void wake_oom_reaper(struct task_struct *tsk)
79-
{
80-
}
81-
#endif
82-
8361
extern unsigned long oom_badness(struct task_struct *p,
8462
struct mem_cgroup *memcg, const nodemask_t *nodemask,
8563
unsigned long totalpages);
8664

87-
extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
88-
unsigned int points, unsigned long totalpages,
89-
const char *message);
90-
91-
extern void check_panic_on_oom(struct oom_control *oc,
92-
enum oom_constraint constraint);
93-
94-
extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
95-
struct task_struct *task);
96-
9765
extern bool out_of_memory(struct oom_control *oc);
9866

9967
extern void exit_oom_victim(struct task_struct *tsk);
10068

10169
extern int register_oom_notifier(struct notifier_block *nb);
10270
extern int unregister_oom_notifier(struct notifier_block *nb);
10371

104-
extern bool oom_killer_disabled;
10572
extern bool oom_killer_disable(void);
10673
extern void oom_killer_enable(void);
10774

10875
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
10976

110-
bool task_will_free_mem(struct task_struct *task);
111-
11277
/* sysctls */
11378
extern int sysctl_oom_dump_tasks;
11479
extern int sysctl_oom_kill_allocating_task;

mm/memcontrol.c

+42-72
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
920920
iter != NULL; \
921921
iter = mem_cgroup_iter(NULL, iter, NULL))
922922

923+
/**
924+
* mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
925+
* @memcg: hierarchy root
926+
* @fn: function to call for each task
927+
* @arg: argument passed to @fn
928+
*
929+
* This function iterates over tasks attached to @memcg or to any of its
930+
* descendants and calls @fn for each task. If @fn returns a non-zero
931+
* value, the function breaks the iteration loop and returns the value.
932+
* Otherwise, it will iterate over all tasks and return 0.
933+
*
934+
* This function must not be called for the root memory cgroup.
935+
*/
936+
int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
937+
int (*fn)(struct task_struct *, void *), void *arg)
938+
{
939+
struct mem_cgroup *iter;
940+
int ret = 0;
941+
942+
BUG_ON(memcg == root_mem_cgroup);
943+
944+
for_each_mem_cgroup_tree(iter, memcg) {
945+
struct css_task_iter it;
946+
struct task_struct *task;
947+
948+
css_task_iter_start(&iter->css, &it);
949+
while (!ret && (task = css_task_iter_next(&it)))
950+
ret = fn(task, arg);
951+
css_task_iter_end(&it);
952+
if (ret) {
953+
mem_cgroup_iter_break(memcg, iter);
954+
break;
955+
}
956+
}
957+
return ret;
958+
}
959+
923960
/**
924961
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
925962
* @page: the page
@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
11781215
/*
11791216
* Return the memory (and swap, if configured) limit for a memcg.
11801217
*/
1181-
static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
1218+
unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
11821219
{
11831220
unsigned long limit;
11841221

@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
12051242
.gfp_mask = gfp_mask,
12061243
.order = order,
12071244
};
1208-
struct mem_cgroup *iter;
1209-
unsigned long chosen_points = 0;
1210-
unsigned long totalpages;
1211-
unsigned int points = 0;
1212-
struct task_struct *chosen = NULL;
1245+
bool ret;
12131246

12141247
mutex_lock(&oom_lock);
1215-
1216-
/*
1217-
* If current has a pending SIGKILL or is exiting, then automatically
1218-
* select it. The goal is to allow it to allocate so that it may
1219-
* quickly exit and free its memory.
1220-
*/
1221-
if (task_will_free_mem(current)) {
1222-
mark_oom_victim(current);
1223-
wake_oom_reaper(current);
1224-
goto unlock;
1225-
}
1226-
1227-
check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
1228-
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
1229-
for_each_mem_cgroup_tree(iter, memcg) {
1230-
struct css_task_iter it;
1231-
struct task_struct *task;
1232-
1233-
css_task_iter_start(&iter->css, &it);
1234-
while ((task = css_task_iter_next(&it))) {
1235-
switch (oom_scan_process_thread(&oc, task)) {
1236-
case OOM_SCAN_SELECT:
1237-
if (chosen)
1238-
put_task_struct(chosen);
1239-
chosen = task;
1240-
chosen_points = ULONG_MAX;
1241-
get_task_struct(chosen);
1242-
/* fall through */
1243-
case OOM_SCAN_CONTINUE:
1244-
continue;
1245-
case OOM_SCAN_ABORT:
1246-
css_task_iter_end(&it);
1247-
mem_cgroup_iter_break(memcg, iter);
1248-
if (chosen)
1249-
put_task_struct(chosen);
1250-
/* Set a dummy value to return "true". */
1251-
chosen = (void *) 1;
1252-
goto unlock;
1253-
case OOM_SCAN_OK:
1254-
break;
1255-
};
1256-
points = oom_badness(task, memcg, NULL, totalpages);
1257-
if (!points || points < chosen_points)
1258-
continue;
1259-
/* Prefer thread group leaders for display purposes */
1260-
if (points == chosen_points &&
1261-
thread_group_leader(chosen))
1262-
continue;
1263-
1264-
if (chosen)
1265-
put_task_struct(chosen);
1266-
chosen = task;
1267-
chosen_points = points;
1268-
get_task_struct(chosen);
1269-
}
1270-
css_task_iter_end(&it);
1271-
}
1272-
1273-
if (chosen) {
1274-
points = chosen_points * 1000 / totalpages;
1275-
oom_kill_process(&oc, chosen, points, totalpages,
1276-
"Memory cgroup out of memory");
1277-
}
1278-
unlock:
1248+
ret = out_of_memory(&oc);
12791249
mutex_unlock(&oom_lock);
1280-
return chosen;
1250+
return ret;
12811251
}
12821252

12831253
#if MAX_NUMNODES > 1
@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
16001570
if (!memcg)
16011571
return false;
16021572

1603-
if (!handle || oom_killer_disabled)
1573+
if (!handle)
16041574
goto cleanup;
16051575

16061576
owait.memcg = memcg;

0 commit comments

Comments
 (0)