Merge tag 'mm-nonmm-stable-2025-03-30-18-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull non-MM updates from Andrew Morton:

 - The series "powerpc/crash: use generic crashkernel reservation" from
   Sourabh Jain changes powerpc's kexec code to use more of the generic
   layers.

 - The series "get_maintainer: report subsystem status separately" from
   Vlastimil Babka makes some long-requested improvements to the
   get_maintainer output.

 - The series "ucount: Simplify refcounting with rcuref_t" from
   Sebastian Siewior cleans up and optimizing the refcounting in the
   ucount code.

 - The series "reboot: support runtime configuration of emergency
   hw_protection action" from Ahmad Fatoum improves the ability for a
   driver to perform an emergency system shutdown or reboot.

 - The series "Converge on using secs_to_jiffies() part two" from Easwar
   Hariharan performs further migrations from msecs_to_jiffies() to
   secs_to_jiffies().

 - The series "lib/interval_tree: add some test cases and cleanup" from
   Wei Yang permits more userspace testing of kernel library code, adds
   some more tests and performs some cleanups.

 - The series "hung_task: Dump the blocking task stacktrace" from Masami
   Hiramatsu arranges for the hung_task detector to dump the stack of
   the blocking task and not just that of the blocked task.

 - The series "resource: Split and use DEFINE_RES*() macros" from Andy
   Shevchenko provides some cleanups to the resource definition macros.

 - Plus the usual shower of singleton patches - please see the
   individual changelogs for details.

* tag 'mm-nonmm-stable-2025-03-30-18-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (77 commits)
  mailmap: consolidate email addresses of Alexander Sverdlin
  fs/procfs: fix the comment above proc_pid_wchan()
  relay: use kasprintf() instead of fixed buffer formatting
  resource: replace open coded variant of DEFINE_RES()
  resource: replace open coded variants of DEFINE_RES_*_NAMED()
  resource: replace open coded variant of DEFINE_RES_NAMED_DESC()
  resource: split DEFINE_RES_NAMED_DESC() out of DEFINE_RES_NAMED()
  samples: add hung_task detector mutex blocking sample
  hung_task: show the blocker task if the task is hung on mutex
  kexec_core: accept unaccepted kexec segments' destination addresses
  watchdog/perf: optimize bytes copied and remove manual NUL-termination
  lib/interval_tree: fix the comment of interval_tree_span_iter_next_gap()
  lib/interval_tree: skip the check before go to the right subtree
  lib/interval_tree: add test case for span iteration
  lib/interval_tree: add test case for interval_tree_iter_xxx() helpers
  lib/rbtree: add random seed
  lib/rbtree: split tests
  lib/rbtree: enable userland test suite for rbtree related data structure
  checkpatch: describe --min-conf-desc-length
  scripts/gdb/symbols: determine KASLR offset on s390
  ...
This commit is contained in:
Linus Torvalds
2025-04-01 10:06:52 -07:00
107 changed files with 1392 additions and 627 deletions

View File

@@ -375,11 +375,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
return 0;
}
void __init reserve_crashkernel_generic(char *cmdline,
unsigned long long crash_size,
unsigned long long crash_base,
unsigned long long crash_low_size,
bool high)
void __init reserve_crashkernel_generic(unsigned long long crash_size,
unsigned long long crash_base,
unsigned long long crash_low_size,
bool high)
{
unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
bool fixed_base = false;

View File

@@ -1582,6 +1582,17 @@ struct mm_struct *get_task_mm(struct task_struct *task)
}
EXPORT_SYMBOL_GPL(get_task_mm);
static bool may_access_mm(struct mm_struct *mm, struct task_struct *task, unsigned int mode)
{
if (mm == current->mm)
return true;
if (ptrace_may_access(task, mode))
return true;
if ((mode & PTRACE_MODE_READ) && perfmon_capable())
return true;
return false;
}
struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
{
struct mm_struct *mm;
@@ -1594,7 +1605,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
mm = get_task_mm(task);
if (!mm) {
mm = ERR_PTR(-ESRCH);
} else if (mm != current->mm && !ptrace_may_access(task, mode)) {
} else if (!may_access_mm(mm, task, mode)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}

View File

@@ -93,6 +93,43 @@ static struct notifier_block panic_block = {
.notifier_call = hung_task_panic,
};
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
static void debug_show_blocker(struct task_struct *task)
{
struct task_struct *g, *t;
unsigned long owner;
struct mutex *lock;
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
lock = READ_ONCE(task->blocker_mutex);
if (!lock)
return;
owner = mutex_get_owner(lock);
if (unlikely(!owner)) {
pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
task->comm, task->pid);
return;
}
/* Ensure the owner information is correct. */
for_each_process_thread(g, t) {
if ((unsigned long)t == owner) {
pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n",
task->comm, task->pid, t->comm, t->pid);
sched_show_task(t);
return;
}
}
}
#else
static inline void debug_show_blocker(struct task_struct *task)
{
}
#endif
static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -152,6 +189,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
debug_show_blocker(t);
hung_task_show_lock = true;
if (sysctl_hung_task_all_cpu_backtrace)

View File

@@ -210,6 +210,16 @@ int sanity_check_segment_list(struct kimage *image)
}
#endif
/*
* The destination addresses are searched from system RAM rather than
* being allocated from the buddy allocator, so they are not guaranteed
* to be accepted by the current kernel. Accept the destination
* addresses before kexec swaps their content with the segments' source
* pages to avoid accessing memory before it is accepted.
*/
for (i = 0; i < nr_segments; i++)
accept_memory(image->segment[i].mem, image->segment[i].memsz);
return 0;
}

View File

@@ -390,7 +390,7 @@ int kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
struct kexec_buf *kbuf,
unsigned long *lowest_load_addr)
{
unsigned long lowest_addr = UINT_MAX;
unsigned long lowest_addr = ULONG_MAX;
int ret;
size_t i;

View File

@@ -464,6 +464,12 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
continue;
}
/* Make sure this does not conflict with exclude range */
if (arch_check_excluded_range(image, temp_start, temp_end)) {
temp_start = temp_start - PAGE_SIZE;
continue;
}
/* We found a suitable memory range */
break;
} while (1);
@@ -498,6 +504,12 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
continue;
}
/* Make sure this does not conflict with exclude range */
if (arch_check_excluded_range(image, temp_start, temp_end)) {
temp_start = temp_start + PAGE_SIZE;
continue;
}
/* We found a suitable memory range */
break;
} while (1);

View File

@@ -72,6 +72,14 @@ static inline unsigned long __owner_flags(unsigned long owner)
return owner & MUTEX_FLAGS;
}
/* Do not use the return value as a pointer directly. */
unsigned long mutex_get_owner(struct mutex *lock)
{
unsigned long owner = atomic_long_read(&lock->owner);
return (unsigned long)__owner_task(owner);
}
/*
* Returns: __mutex_owner(lock) on failure or NULL on success.
*/
@@ -182,6 +190,9 @@ static void
__mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct list_head *list)
{
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
WRITE_ONCE(current->blocker_mutex, lock);
#endif
debug_mutex_add_waiter(lock, waiter, current);
list_add_tail(&waiter->list, list);
@@ -197,6 +208,9 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
__mutex_clear_flag(lock, MUTEX_FLAGS);
debug_mutex_remove_waiter(lock, waiter, current);
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
WRITE_ONCE(current->blocker_mutex, NULL);
#endif
}
/*

View File

@@ -36,6 +36,8 @@ enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
EXPORT_SYMBOL_GPL(reboot_mode);
enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED;
static enum hw_protection_action hw_protection_action = HWPROT_ACT_SHUTDOWN;
/*
* This variable is used privately to keep track of whether or not
* reboot_type is still set to its default value (i.e., reboot= hasn't
@@ -229,6 +231,9 @@ EXPORT_SYMBOL(unregister_restart_handler);
/**
* do_kernel_restart - Execute kernel restart handler call chain
*
* @cmd: pointer to buffer containing command to execute for restart
* or %NULL
*
* Calls functions registered with register_restart_handler.
*
* Expected to be called from machine_restart as last step of the restart
@@ -933,61 +938,86 @@ void orderly_reboot(void)
}
EXPORT_SYMBOL_GPL(orderly_reboot);
static const char *hw_protection_action_str(enum hw_protection_action action)
{
switch (action) {
case HWPROT_ACT_SHUTDOWN:
return "shutdown";
case HWPROT_ACT_REBOOT:
return "reboot";
default:
return "undefined";
}
}
static enum hw_protection_action hw_failure_emergency_action;
/**
* hw_failure_emergency_poweroff_func - emergency poweroff work after a known delay
* @work: work_struct associated with the emergency poweroff function
* hw_failure_emergency_action_func - emergency action work after a known delay
* @work: work_struct associated with the emergency action function
*
* This function is called in very critical situations to force
* a kernel poweroff after a configurable timeout value.
* a kernel poweroff or reboot after a configurable timeout value.
*/
static void hw_failure_emergency_poweroff_func(struct work_struct *work)
static void hw_failure_emergency_action_func(struct work_struct *work)
{
const char *action_str = hw_protection_action_str(hw_failure_emergency_action);
pr_emerg("Hardware protection timed-out. Trying forced %s\n",
action_str);
/*
* We have reached here after the emergency shutdown waiting period has
* expired. This means orderly_poweroff has not been able to shut off
* the system for some reason.
* We have reached here after the emergency action waiting period has
* expired. This means orderly_poweroff/reboot has not been able to
* shut off the system for some reason.
*
* Try to shut down the system immediately using kernel_power_off
* if populated
* Try to shut off the system immediately if possible
*/
pr_emerg("Hardware protection timed-out. Trying forced poweroff\n");
kernel_power_off();
if (hw_failure_emergency_action == HWPROT_ACT_REBOOT)
kernel_restart(NULL);
else
kernel_power_off();
/*
* Worst of the worst case trigger emergency restart
*/
pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");
pr_emerg("Hardware protection %s failed. Trying emergency restart\n",
action_str);
emergency_restart();
}
static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work,
hw_failure_emergency_poweroff_func);
static DECLARE_DELAYED_WORK(hw_failure_emergency_action_work,
hw_failure_emergency_action_func);
/**
* hw_failure_emergency_poweroff - Trigger an emergency system poweroff
* hw_failure_emergency_schedule - Schedule an emergency system shutdown or reboot
*
* @action: The hardware protection action to be taken
* @action_delay_ms: Time in milliseconds to elapse before triggering action
*
* This may be called from any critical situation to trigger a system shutdown
* after a given period of time. If time is negative this is not scheduled.
* or reboot after a given period of time.
* If time is negative this is not scheduled.
*/
static void hw_failure_emergency_poweroff(int poweroff_delay_ms)
static void hw_failure_emergency_schedule(enum hw_protection_action action,
int action_delay_ms)
{
if (poweroff_delay_ms <= 0)
if (action_delay_ms <= 0)
return;
schedule_delayed_work(&hw_failure_emergency_poweroff_work,
msecs_to_jiffies(poweroff_delay_ms));
hw_failure_emergency_action = action;
schedule_delayed_work(&hw_failure_emergency_action_work,
msecs_to_jiffies(action_delay_ms));
}
/**
* __hw_protection_shutdown - Trigger an emergency system shutdown or reboot
* __hw_protection_trigger - Trigger an emergency system shutdown or reboot
*
* @reason: Reason of emergency shutdown or reboot to be printed.
* @ms_until_forced: Time to wait for orderly shutdown or reboot before
* triggering it. Negative value disables the forced
* shutdown or reboot.
* @shutdown: If true, indicates that a shutdown will happen
* after the critical tempeature is reached.
* If false, indicates that a reboot will happen
* after the critical tempeature is reached.
* @action: The hardware protection action to be taken.
*
* Initiate an emergency system shutdown or reboot in order to protect
* hardware from further damage. Usage examples include a thermal protection.
@@ -995,11 +1025,16 @@ static void hw_failure_emergency_poweroff(int poweroff_delay_ms)
* pending even if the previous request has given a large timeout for forced
* shutdown/reboot.
*/
void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown)
void __hw_protection_trigger(const char *reason, int ms_until_forced,
enum hw_protection_action action)
{
static atomic_t allow_proceed = ATOMIC_INIT(1);
pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);
if (action == HWPROT_ACT_DEFAULT)
action = hw_protection_action;
pr_emerg("HARDWARE PROTECTION %s (%s)\n",
hw_protection_action_str(action), reason);
/* Shutdown should be initiated only once. */
if (!atomic_dec_and_test(&allow_proceed))
@@ -1009,13 +1044,55 @@ void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shut
* Queue a backup emergency shutdown in the event of
* orderly_poweroff failure
*/
hw_failure_emergency_poweroff(ms_until_forced);
if (shutdown)
orderly_poweroff(true);
else
hw_failure_emergency_schedule(action, ms_until_forced);
if (action == HWPROT_ACT_REBOOT)
orderly_reboot();
else
orderly_poweroff(true);
}
EXPORT_SYMBOL_GPL(__hw_protection_shutdown);
EXPORT_SYMBOL_GPL(__hw_protection_trigger);
static bool hw_protection_action_parse(const char *str,
enum hw_protection_action *action)
{
if (sysfs_streq(str, "shutdown"))
*action = HWPROT_ACT_SHUTDOWN;
else if (sysfs_streq(str, "reboot"))
*action = HWPROT_ACT_REBOOT;
else
return false;
return true;
}
static int __init hw_protection_setup(char *str)
{
hw_protection_action_parse(str, &hw_protection_action);
return 1;
}
__setup("hw_protection=", hw_protection_setup);
#ifdef CONFIG_SYSFS
static ssize_t hw_protection_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%s\n",
hw_protection_action_str(hw_protection_action));
}
static ssize_t hw_protection_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf,
size_t count)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!hw_protection_action_parse(buf, &hw_protection_action))
return -EINVAL;
return count;
}
static struct kobj_attribute hw_protection_attr = __ATTR_RW(hw_protection);
#endif
static int __init reboot_setup(char *str)
{
@@ -1276,6 +1353,7 @@ static struct kobj_attribute reboot_cpu_attr = __ATTR_RW(cpu);
#endif
static struct attribute *reboot_attrs[] = {
&hw_protection_attr.attr,
&reboot_mode_attr.attr,
#ifdef CONFIG_X86
&reboot_force_attr.attr,

View File

@@ -351,10 +351,9 @@ static struct dentry *relay_create_buf_file(struct rchan *chan,
struct dentry *dentry;
char *tmpname;
tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
tmpname = kasprintf(GFP_KERNEL, "%s%d", chan->base_filename, cpu);
if (!tmpname)
return NULL;
snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
/* Create file in fs */
dentry = chan->cb->create_buf_file(tmpname, chan->parent,

View File

@@ -561,8 +561,7 @@ static int __region_intersects(struct resource *parent, resource_size_t start,
struct resource res, o;
bool covered;
res.start = start;
res.end = start + size - 1;
res = DEFINE_RES(start, size, 0);
for (p = parent->child; p ; p = p->sibling) {
if (!resource_intersection(p, &res, &o))
@@ -1714,18 +1713,13 @@ static int __init reserve_setup(char *str)
* I/O port space; otherwise assume it's memory.
*/
if (io_start < 0x10000) {
res->flags = IORESOURCE_IO;
*res = DEFINE_RES_IO_NAMED(io_start, io_num, "reserved");
parent = &ioport_resource;
} else {
res->flags = IORESOURCE_MEM;
*res = DEFINE_RES_MEM_NAMED(io_start, io_num, "reserved");
parent = &iomem_resource;
}
res->name = "reserved";
res->start = io_start;
res->end = io_start + io_num - 1;
res->flags |= IORESOURCE_BUSY;
res->desc = IORES_DESC_NONE;
res->child = NULL;
if (request_resource(parent, res) == 0)
reserved = x+1;
}
@@ -1975,11 +1969,7 @@ get_free_mem_region(struct device *dev, struct resource *base,
*/
revoke_iomem(res);
} else {
res->start = addr;
res->end = addr + size - 1;
res->name = name;
res->desc = desc;
res->flags = IORESOURCE_MEM;
*res = DEFINE_RES_NAMED_DESC(addr, size, name, IORESOURCE_MEM, desc);
/*
* Only succeed if the resource hosts an exclusive

View File

@@ -176,9 +176,10 @@ static bool recalc_sigpending_tsk(struct task_struct *t)
void recalc_sigpending(void)
{
if (!recalc_sigpending_tsk(current) && !freezing(current))
clear_thread_flag(TIF_SIGPENDING);
if (!recalc_sigpending_tsk(current) && !freezing(current)) {
if (unlikely(test_thread_flag(TIF_SIGPENDING)))
clear_thread_flag(TIF_SIGPENDING);
}
}
EXPORT_SYMBOL(recalc_sigpending);

View File

@@ -11,11 +11,14 @@
struct ucounts init_ucounts = {
.ns = &init_user_ns,
.uid = GLOBAL_ROOT_UID,
.count = ATOMIC_INIT(1),
.count = RCUREF_INIT(1),
};
#define UCOUNTS_HASHTABLE_BITS 10
static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
#define UCOUNTS_HASHTABLE_ENTRIES (1 << UCOUNTS_HASHTABLE_BITS)
static struct hlist_nulls_head ucounts_hashtable[UCOUNTS_HASHTABLE_ENTRIES] = {
[0 ... UCOUNTS_HASHTABLE_ENTRIES - 1] = HLIST_NULLS_HEAD_INIT(0)
};
static DEFINE_SPINLOCK(ucounts_lock);
#define ucounts_hashfn(ns, uid) \
@@ -24,7 +27,6 @@ static DEFINE_SPINLOCK(ucounts_lock);
#define ucounts_hashentry(ns, uid) \
(ucounts_hashtable + ucounts_hashfn(ns, uid))
#ifdef CONFIG_SYSCTL
static struct ctl_table_set *
set_lookup(struct ctl_table_root *root)
@@ -127,88 +129,73 @@ void retire_userns_sysctls(struct user_namespace *ns)
#endif
}
static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid,
struct hlist_nulls_head *hashent)
{
struct ucounts *ucounts;
struct hlist_nulls_node *pos;
hlist_for_each_entry(ucounts, hashent, node) {
if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
return ucounts;
guard(rcu)();
hlist_nulls_for_each_entry_rcu(ucounts, pos, hashent, node) {
if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) {
if (rcuref_get(&ucounts->count))
return ucounts;
}
}
return NULL;
}
static void hlist_add_ucounts(struct ucounts *ucounts)
{
struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
struct hlist_nulls_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
spin_lock_irq(&ucounts_lock);
hlist_add_head(&ucounts->node, hashent);
hlist_nulls_add_head_rcu(&ucounts->node, hashent);
spin_unlock_irq(&ucounts_lock);
}
static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
{
/* Returns true on a successful get, false if the count wraps. */
return !atomic_add_negative(1, &ucounts->count);
}
struct ucounts *get_ucounts(struct ucounts *ucounts)
{
if (!get_ucounts_or_wrap(ucounts)) {
put_ucounts(ucounts);
ucounts = NULL;
}
return ucounts;
}
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
{
struct hlist_head *hashent = ucounts_hashentry(ns, uid);
bool wrapped;
struct ucounts *ucounts, *new = NULL;
struct hlist_nulls_head *hashent = ucounts_hashentry(ns, uid);
struct ucounts *ucounts, *new;
ucounts = find_ucounts(ns, uid, hashent);
if (ucounts)
return ucounts;
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL;
new->ns = ns;
new->uid = uid;
rcuref_init(&new->count, 1);
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
if (!ucounts) {
if (ucounts) {
spin_unlock_irq(&ucounts_lock);
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL;
new->ns = ns;
new->uid = uid;
atomic_set(&new->count, 1);
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
if (!ucounts) {
hlist_add_head(&new->node, hashent);
get_user_ns(new->ns);
spin_unlock_irq(&ucounts_lock);
return new;
}
kfree(new);
return ucounts;
}
wrapped = !get_ucounts_or_wrap(ucounts);
hlist_nulls_add_head_rcu(&new->node, hashent);
get_user_ns(new->ns);
spin_unlock_irq(&ucounts_lock);
kfree(new);
if (wrapped) {
put_ucounts(ucounts);
return NULL;
}
return ucounts;
return new;
}
void put_ucounts(struct ucounts *ucounts)
{
unsigned long flags;
if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
hlist_del_init(&ucounts->node);
if (rcuref_put(&ucounts->count)) {
spin_lock_irqsave(&ucounts_lock, flags);
hlist_nulls_del_rcu(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
put_user_ns(ucounts->ns);
kfree(ucounts);
kfree_rcu(ucounts, rcu);
}
}

View File

@@ -269,12 +269,10 @@ void __init hardlockup_config_perf_event(const char *str)
} else {
unsigned int len = comma - str;
if (len >= sizeof(buf))
if (len > sizeof(buf))
return;
if (strscpy(buf, str, sizeof(buf)) < 0)
return;
buf[len] = 0;
strscpy(buf, str, len);
if (kstrtoull(buf, 16, &config))
return;
}