mm/mempolicy: Add MPOL_RANDOM

To help work around certain memory controller limitations or similar, a
random NUMA allocation memory policy is added.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
This commit is contained in:
Tvrtko Ursulin
2024-05-22 14:12:42 +01:00
committed by Dom Cobley
parent 01b55152ee
commit 078181bedd
2 changed files with 72 additions and 0 deletions

View File

@@ -24,6 +24,7 @@ enum {
MPOL_LOCAL, MPOL_LOCAL,
MPOL_PREFERRED_MANY, MPOL_PREFERRED_MANY,
MPOL_WEIGHTED_INTERLEAVE, MPOL_WEIGHTED_INTERLEAVE,
MPOL_RANDOM,
MPOL_MAX, /* always last member of enum */ MPOL_MAX, /* always last member of enum */
}; };

View File

@@ -41,6 +41,9 @@
* preferred many Try a set of nodes first before normal fallback. This is * preferred many Try a set of nodes first before normal fallback. This is
* similar to preferred without the special case. * similar to preferred without the special case.
* *
* random Allocate memory from a random node out of allowed set of
* nodes.
*
* default Allocate on the local node first, or when on a VMA * default Allocate on the local node first, or when on a VMA
* use the process policy. This is what Linux always did * use the process policy. This is what Linux always did
* in a NUMA aware kernel and still does by, ahem, default. * in a NUMA aware kernel and still does by, ahem, default.
@@ -592,6 +595,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
.create = mpol_new_nodemask, .create = mpol_new_nodemask,
.rebind = mpol_rebind_nodemask, .rebind = mpol_rebind_nodemask,
}, },
[MPOL_RANDOM] = {
.create = mpol_new_nodemask,
.rebind = mpol_rebind_nodemask,
},
}; };
static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist, static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
@@ -1045,6 +1052,7 @@ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes)
case MPOL_PREFERRED: case MPOL_PREFERRED:
case MPOL_PREFERRED_MANY: case MPOL_PREFERRED_MANY:
case MPOL_WEIGHTED_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE:
case MPOL_RANDOM:
*nodes = pol->nodes; *nodes = pol->nodes;
break; break;
case MPOL_LOCAL: case MPOL_LOCAL:
@@ -2062,6 +2070,27 @@ static unsigned int interleave_nodes(struct mempolicy *policy)
return nid; return nid;
} }
static unsigned int read_once_policy_nodemask(struct mempolicy *pol, nodemask_t *mask);
static unsigned int random_nodes(struct mempolicy *policy)
{
unsigned int nid = first_node(policy->nodes);
unsigned int cpuset_mems_cookie;
nodemask_t nodemask;
unsigned int r;
r = get_random_u32_below(read_once_policy_nodemask(policy, &nodemask));
/* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */
do {
cpuset_mems_cookie = read_mems_allowed_begin();
while (r--)
nid = next_node_in(nid, policy->nodes);
} while (read_mems_allowed_retry(cpuset_mems_cookie));
return nid;
}
/* /*
* Depending on the memory policy provide a node from which to allocate the * Depending on the memory policy provide a node from which to allocate the
* next slab entry. * next slab entry.
@@ -2107,6 +2136,9 @@ unsigned int mempolicy_slab_node(void)
case MPOL_LOCAL: case MPOL_LOCAL:
return node; return node;
case MPOL_RANDOM:
return random_nodes(policy);
default: default:
BUG(); BUG();
} }
@@ -2188,6 +2220,33 @@ static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx)
return nid; return nid;
} }
static unsigned int random_nid(struct mempolicy *pol,
struct vm_area_struct *vma,
pgoff_t ilx)
{
nodemask_t nodemask;
unsigned int r, nnodes;
int i, nid;
nnodes = read_once_policy_nodemask(pol, &nodemask);
if (!nnodes)
return numa_node_id();
/*
* QQQ
* Can we say hash of vma+ilx is sufficiently random but still
* stable in case of reliance on stable, as it appears is with
* mpol_misplaced and interleaving?
*/
r = hash_long((unsigned long)vma + ilx,
ilog2(roundup_pow_of_two(nnodes)));
nid = first_node(nodemask);
for (i = 0; i < r; i++)
nid = next_node(nid, nodemask);
return nid;
}
/* /*
* Return a nodemask representing a mempolicy for filtering nodes for * Return a nodemask representing a mempolicy for filtering nodes for
* page allocation, together with preferred node id (or the input node id). * page allocation, together with preferred node id (or the input node id).
@@ -2231,6 +2290,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
weighted_interleave_nodes(pol) : weighted_interleave_nodes(pol) :
weighted_interleave_nid(pol, ilx); weighted_interleave_nid(pol, ilx);
break; break;
case MPOL_RANDOM:
*nid = random_nodes(pol);
break;
} }
return nodemask; return nodemask;
@@ -2299,6 +2361,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
case MPOL_BIND: case MPOL_BIND:
case MPOL_INTERLEAVE: case MPOL_INTERLEAVE:
case MPOL_WEIGHTED_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE:
case MPOL_RANDOM:
*mask = mempolicy->nodes; *mask = mempolicy->nodes;
break; break;
@@ -2797,6 +2860,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
case MPOL_PREFERRED: case MPOL_PREFERRED:
case MPOL_PREFERRED_MANY: case MPOL_PREFERRED_MANY:
case MPOL_WEIGHTED_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE:
case MPOL_RANDOM:
return !!nodes_equal(a->nodes, b->nodes); return !!nodes_equal(a->nodes, b->nodes);
case MPOL_LOCAL: case MPOL_LOCAL:
return true; return true;
@@ -2988,6 +3052,10 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
polnid = zonelist_node_idx(z); polnid = zonelist_node_idx(z);
break; break;
case MPOL_RANDOM:
polnid = random_nid(pol, vma, ilx);
break;
default: default:
BUG(); BUG();
} }
@@ -3333,6 +3401,7 @@ static const char * const policy_modes[] =
[MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave", [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
[MPOL_LOCAL] = "local", [MPOL_LOCAL] = "local",
[MPOL_PREFERRED_MANY] = "prefer (many)", [MPOL_PREFERRED_MANY] = "prefer (many)",
[MPOL_RANDOM] = "random",
}; };
/** /**
@@ -3395,6 +3464,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
break; break;
case MPOL_INTERLEAVE: case MPOL_INTERLEAVE:
case MPOL_WEIGHTED_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE:
case MPOL_RANDOM:
/* /*
* Default to online nodes with memory if no nodelist * Default to online nodes with memory if no nodelist
*/ */
@@ -3539,6 +3609,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
case MPOL_BIND: case MPOL_BIND:
case MPOL_INTERLEAVE: case MPOL_INTERLEAVE:
case MPOL_WEIGHTED_INTERLEAVE: case MPOL_WEIGHTED_INTERLEAVE:
case MPOL_RANDOM:
nodes = pol->nodes; nodes = pol->nodes;
break; break;
default: default: