Merge branches 'pm-em' and 'pm-runtime'

Merge Energy Model handling code updates and updates of the runtime PM
core code for 6.15-rc1:

 - Clean up the Energy Model handling code somewhat (Rafael Wysocki).

 - Use kfree_rcu() to simplify the handling of runtime Energy Model
   updates (Li RongQing).

 - Add an entry for the Energy Model framework to MAINTAINERS as
   properly maintained (Lukasz Luba).

 - Address RCU-related sparse warnings in the Energy Model code (Rafael
   Wysocki).

 - Remove ENERGY_MODEL dependency on SMP and allow it to be selected
   when DEVFREQ is set without CPUFREQ so it can be used on a wider
   range of systems (Jeson Gao).

 - Unify error handling during runtime suspend and runtime resume in the
   core to help drivers to implement more consistent runtime PM error
   handling (Rafael Wysocki).

 - Drop a redundant check from pm_runtime_force_resume() and rearrange
   documentation related to __pm_runtime_disable() (Rafael Wysocki).

* pm-em:
  PM: EM: Rework the depends on for CONFIG_ENERGY_MODEL
  PM: EM: Address RCU-related sparse warnings
  PM: EM: Consify two parameters of em_dev_register_perf_domain()
  MAINTAINERS: Add Energy Model framework as properly maintained
  PM: EM: use kfree_rcu() to simplify the code
  PM: EM: Slightly reduce em_check_capacity_update() overhead
  PM: EM: Drop unused parameter from em_adjust_new_capacity()

* pm-runtime:
  PM: runtime: Unify error handling during suspend and resume
  PM: runtime: Drop status check from pm_runtime_force_resume()
  PM: Rearrange documentation related to __pm_runtime_disable()
This commit is contained in:
Rafael J. Wysocki
2025-03-24 15:06:06 +01:00
8 changed files with 91 additions and 85 deletions

View File

@@ -8528,6 +8528,15 @@ M: Maxim Levitsky <maximlevitsky@gmail.com>
S: Maintained
F: drivers/media/rc/ene_ir.*
ENERGY MODEL
M: Lukasz Luba <lukasz.luba@arm.com>
M: "Rafael J. Wysocki" <rafael@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
F: kernel/power/energy_model.c
F: include/linux/energy_model.h
F: Documentation/power/energy-model.rst
EPAPR HYPERVISOR BYTE CHANNEL DEVICE DRIVER
M: Laurentiu Tudor <laurentiu.tudor@nxp.com>
L: linuxppc-dev@lists.ozlabs.org

View File

@@ -1404,6 +1404,10 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn
TRACE_DEVICE(dev);
TRACE_SUSPEND(0);
/*
* Disable runtime PM for the device without checking if there is a
* pending resume request for it.
*/
__pm_runtime_disable(dev, false);
dpm_wait_for_subordinate(dev, async);

View File

@@ -448,8 +448,19 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
retval = __rpm_callback(cb, dev);
}
dev->power.runtime_error = retval;
return retval != -EACCES ? retval : -EIO;
/*
* Since -EACCES means that runtime PM is disabled for the given device,
* it should not be returned by runtime PM callbacks. If it is returned
* nevertheless, assume it to be a transient error and convert it to
* -EAGAIN.
*/
if (retval == -EACCES)
retval = -EAGAIN;
if (retval != -EAGAIN && retval != -EBUSY)
dev->power.runtime_error = retval;
return retval;
}
/**
@@ -725,21 +736,18 @@ static int rpm_suspend(struct device *dev, int rpmflags)
dev->power.deferred_resume = false;
wake_up_all(&dev->power.wait_queue);
if (retval == -EAGAIN || retval == -EBUSY) {
dev->power.runtime_error = 0;
/*
* On transient errors, if the callback routine failed an autosuspend,
* and if the last_busy time has been updated so that there is a new
* autosuspend expiration time, automatically reschedule another
* autosuspend.
*/
if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) &&
pm_runtime_autosuspend_expiration(dev) != 0)
goto repeat;
pm_runtime_cancel_pending(dev);
/*
* If the callback routine failed an autosuspend, and
* if the last_busy time has been updated so that there
* is a new autosuspend expiration time, automatically
* reschedule another autosuspend.
*/
if ((rpmflags & RPM_AUTO) &&
pm_runtime_autosuspend_expiration(dev) != 0)
goto repeat;
} else {
pm_runtime_cancel_pending(dev);
}
goto out;
}
@@ -1460,20 +1468,6 @@ int pm_runtime_barrier(struct device *dev)
}
EXPORT_SYMBOL_GPL(pm_runtime_barrier);
/**
* __pm_runtime_disable - Disable runtime PM of a device.
* @dev: Device to handle.
* @check_resume: If set, check if there's a resume request for the device.
*
* Increment power.disable_depth for the device and if it was zero previously,
* cancel all pending runtime PM requests for the device and wait for all
* operations in progress to complete. The device can be either active or
* suspended after its runtime PM has been disabled.
*
* If @check_resume is set and there's a resume request pending when
* __pm_runtime_disable() is called and power.disable_depth is zero, the
* function will wake up the device before disabling its runtime PM.
*/
void __pm_runtime_disable(struct device *dev, bool check_resume)
{
spin_lock_irq(&dev->power.lock);
@@ -1959,7 +1953,7 @@ int pm_runtime_force_resume(struct device *dev)
int (*callback)(struct device *);
int ret = 0;
if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
if (!dev->power.needs_force_resume)
goto out;
/*

View File

@@ -82,7 +82,7 @@ config DTPM
config DTPM_CPU
bool "Add CPU power capping based on the energy model"
depends on DTPM && ENERGY_MODEL
depends on DTPM && ENERGY_MODEL && SMP
help
This enables support for CPU power limitation based on
energy model.

View File

@@ -167,13 +167,13 @@ struct em_data_callback {
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
int em_dev_update_perf_domain(struct device *dev,
struct em_perf_table __rcu *new_table);
struct em_perf_table *new_table);
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
struct em_data_callback *cb, cpumask_t *span,
bool microwatts);
const struct em_data_callback *cb,
const cpumask_t *cpus, bool microwatts);
void em_dev_unregister_perf_domain(struct device *dev);
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd);
void em_table_free(struct em_perf_table __rcu *table);
struct em_perf_table *em_table_alloc(struct em_perf_domain *pd);
void em_table_free(struct em_perf_table *table);
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
int nr_states);
int em_dev_update_chip_binning(struct device *dev);
@@ -346,8 +346,8 @@ struct em_data_callback {};
static inline
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
struct em_data_callback *cb, cpumask_t *span,
bool microwatts)
const struct em_data_callback *cb,
const cpumask_t *cpus, bool microwatts)
{
return -EINVAL;
}
@@ -373,14 +373,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
return 0;
}
static inline
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{
return NULL;
}
static inline void em_table_free(struct em_perf_table __rcu *table) {}
static inline void em_table_free(struct em_perf_table *table) {}
static inline
int em_dev_update_perf_domain(struct device *dev,
struct em_perf_table __rcu *new_table)
struct em_perf_table *new_table)
{
return -EINVAL;
}

View File

@@ -556,11 +556,18 @@ static inline int pm_runtime_set_suspended(struct device *dev)
* pm_runtime_disable - Disable runtime PM for a device.
* @dev: Target device.
*
* Prevent the runtime PM framework from working with @dev (by incrementing its
* "blocking" counter).
* Prevent the runtime PM framework from working with @dev by incrementing its
* "disable" counter.
*
* For each invocation of this function for @dev there must be a matching
* pm_runtime_enable() call in order for runtime PM to be enabled for it.
* If the counter is zero when this function runs and there is a pending runtime
* resume request for @dev, it will be resumed. If the counter is still zero at
* that point, all of the pending runtime PM requests for @dev will be canceled
* and all runtime PM operations in progress involving it will be waited for to
* complete.
*
* For each invocation of this function for @dev, there must be a matching
* pm_runtime_enable() call, so that runtime PM is eventually enabled for it
* again.
*/
static inline void pm_runtime_disable(struct device *dev)
{

View File

@@ -380,8 +380,7 @@ config CPU_PM
config ENERGY_MODEL
bool "Energy Model for devices with DVFS (CPUs, GPUs, etc)"
depends on SMP
depends on CPU_FREQ
depends on CPU_FREQ || PM_DEVFREQ
help
Several subsystems (thermal and/or the task scheduler for example)
can leverage information about the energy consumed by devices to

View File

@@ -161,22 +161,10 @@ static void em_debug_create_pd(struct device *dev) {}
static void em_debug_remove_pd(struct device *dev) {}
#endif
static void em_destroy_table_rcu(struct rcu_head *rp)
{
struct em_perf_table __rcu *table;
table = container_of(rp, struct em_perf_table, rcu);
kfree(table);
}
static void em_release_table_kref(struct kref *kref)
{
struct em_perf_table __rcu *table;
/* It was the last owner of this table so we can free */
table = container_of(kref, struct em_perf_table, kref);
call_rcu(&table->rcu, em_destroy_table_rcu);
kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu);
}
/**
@@ -185,7 +173,7 @@ static void em_release_table_kref(struct kref *kref)
*
* No return values.
*/
void em_table_free(struct em_perf_table __rcu *table)
void em_table_free(struct em_perf_table *table)
{
kref_put(&table->kref, em_release_table_kref);
}
@@ -198,9 +186,9 @@ void em_table_free(struct em_perf_table __rcu *table)
* has a user.
* Returns allocated table or NULL.
*/
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{
struct em_perf_table __rcu *table;
struct em_perf_table *table;
int table_size;
table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
@@ -239,7 +227,7 @@ static void em_init_performance(struct device *dev, struct em_perf_domain *pd,
}
static int em_compute_costs(struct device *dev, struct em_perf_state *table,
struct em_data_callback *cb, int nr_states,
const struct em_data_callback *cb, int nr_states,
unsigned long flags)
{
unsigned long prev_cost = ULONG_MAX;
@@ -308,9 +296,9 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
* Return 0 on success or an error code on failure.
*/
int em_dev_update_perf_domain(struct device *dev,
struct em_perf_table __rcu *new_table)
struct em_perf_table *new_table)
{
struct em_perf_table __rcu *old_table;
struct em_perf_table *old_table;
struct em_perf_domain *pd;
if (!dev)
@@ -327,7 +315,8 @@ int em_dev_update_perf_domain(struct device *dev,
kref_get(&new_table->kref);
old_table = pd->em_table;
old_table = rcu_dereference_protected(pd->em_table,
lockdep_is_held(&em_pd_mutex));
rcu_assign_pointer(pd->em_table, new_table);
em_cpufreq_update_efficiencies(dev, new_table->state);
@@ -341,7 +330,7 @@ EXPORT_SYMBOL_GPL(em_dev_update_perf_domain);
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
struct em_perf_state *table,
struct em_data_callback *cb,
const struct em_data_callback *cb,
unsigned long flags)
{
unsigned long power, freq, prev_freq = 0;
@@ -396,10 +385,11 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
}
static int em_create_pd(struct device *dev, int nr_states,
struct em_data_callback *cb, cpumask_t *cpus,
const struct em_data_callback *cb,
const cpumask_t *cpus,
unsigned long flags)
{
struct em_perf_table __rcu *em_table;
struct em_perf_table *em_table;
struct em_perf_domain *pd;
struct device *cpu_dev;
int cpu, ret, num_cpus;
@@ -556,9 +546,10 @@ EXPORT_SYMBOL_GPL(em_cpu_get);
* Return 0 on success
*/
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
struct em_data_callback *cb, cpumask_t *cpus,
bool microwatts)
const struct em_data_callback *cb,
const cpumask_t *cpus, bool microwatts)
{
struct em_perf_table *em_table;
unsigned long cap, prev_cap = 0;
unsigned long flags = 0;
int cpu, ret;
@@ -631,7 +622,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
dev->em_pd->min_perf_state = 0;
dev->em_pd->max_perf_state = nr_states - 1;
em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
em_table = rcu_dereference_protected(dev->em_pd->em_table,
lockdep_is_held(&em_pd_mutex));
em_cpufreq_update_efficiencies(dev, em_table->state);
em_debug_create_pd(dev);
dev_info(dev, "EM: created perf domain\n");
@@ -668,7 +661,8 @@ void em_dev_unregister_perf_domain(struct device *dev)
mutex_lock(&em_pd_mutex);
em_debug_remove_pd(dev);
em_table_free(dev->em_pd->em_table);
em_table_free(rcu_dereference_protected(dev->em_pd->em_table,
lockdep_is_held(&em_pd_mutex)));
kfree(dev->em_pd);
dev->em_pd = NULL;
@@ -676,9 +670,9 @@ void em_dev_unregister_perf_domain(struct device *dev)
}
EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd)
static struct em_perf_table *em_table_dup(struct em_perf_domain *pd)
{
struct em_perf_table __rcu *em_table;
struct em_perf_table *em_table;
struct em_perf_state *ps, *new_ps;
int ps_size;
@@ -700,7 +694,7 @@ static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd)
}
static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd,
struct em_perf_table __rcu *em_table)
struct em_perf_table *em_table)
{
int ret;
@@ -728,10 +722,9 @@ free_em_table:
* are correctly calculated.
*/
static void em_adjust_new_capacity(struct device *dev,
struct em_perf_domain *pd,
u64 max_cap)
struct em_perf_domain *pd)
{
struct em_perf_table __rcu *em_table;
struct em_perf_table *em_table;
em_table = em_table_dup(pd);
if (!em_table) {
@@ -775,7 +768,8 @@ static void em_check_capacity_update(void)
}
cpufreq_cpu_put(policy);
pd = em_cpu_get(cpu);
dev = get_cpu_device(cpu);
pd = em_pd_get(dev);
if (!pd || em_is_artificial(pd))
continue;
@@ -799,8 +793,7 @@ static void em_check_capacity_update(void)
pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
cpu, cpu_capacity, em_max_perf);
dev = get_cpu_device(cpu);
em_adjust_new_capacity(dev, pd, cpu_capacity);
em_adjust_new_capacity(dev, pd);
}
free_cpumask_var(cpu_done_mask);
@@ -822,7 +815,7 @@ static void em_update_workfn(struct work_struct *work)
*/
int em_dev_update_chip_binning(struct device *dev)
{
struct em_perf_table __rcu *em_table;
struct em_perf_table *em_table;
struct em_perf_domain *pd;
int i, ret;