mirror of
https://github.com/raspberrypi/linux.git
synced 2025-12-06 10:00:17 +00:00
drm/v3d: Clock V3D down when not in use.
My various attempts at re-enabling runtime PM have failed, so just crank the clock down when V3D is idle to reduce power consumption. Signed-off-by: Eric Anholt <eric@anholt.net> drm/v3d: Plug dma_fence leak The irq_fence and done_fence are given a reference that is never released. The necessary dma_fence_put()s seem to have been deleted in error in an earlier commit. Fixes: 0b73676836b2 ("drm/v3d: Clock V3D down when not in use.") Signed-off-by: Phil Elwell <phil@raspberrypi.org> v3d_drv: Handle missing clock more gracefully Signed-off-by: popcornmix <popcornmix@gmail.com> v3d_gem: Kick the clock so firmware knows we are using firmware clock interface Setting the v3d clock to low value allows firmware to handle dvfs in case where v3d hardware is not being actively used (e.g. console use). Signed-off-by: popcornmix <popcornmix@gmail.com> drm/v3d: Switch clock setting to new api Signed-off-by: Dom Cobley <popcornmix@gmail.com> drm/v3d: Convert to new clock range API Signed-off-by: Maxime Ripard <maxime@cerno.tech> drm/v3d: Correct clock settng calls to new APIs There was a report that 6.12 kernel has lower benchmark scores than 6.6. I can confirm, and found it started with 6.8 kernel which moved some code into a new file (v3d_submit.c) and in two places the change to the clock api were missed. The effect of the bug is the v3d clock sometimes unwantedly drops to a lower rate. With this patch the benchmark scores are good again. Fixes:86963038cbSigned-off-by: Dom Cobley <popcornmix@gmail.com> drm/v3d: CPU job submissions shouldn't affect V3D GPU clock We can avoid calling the v3d_clock_up_put and v3d_clock_up_get when a job is submitted to a CPU queue. We don't need to change the V3D core frequency to run a CPU job as it is executed on the CPU. This way we avoid delaying timestamps CPU jobs by 4.5ms that is the time that it takes the firmware to increase the V3D core frequency. Fixes:fe6a858096("drm/v3d: Correct clock settng calls to new APIs") Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com> Reviewed-by: Maíra Canal <mcanal@igalia.com>
This commit is contained in:
@@ -25,6 +25,9 @@
|
|||||||
|
|
||||||
#include <drm/drm_drv.h>
|
#include <drm/drm_drv.h>
|
||||||
#include <drm/drm_managed.h>
|
#include <drm/drm_managed.h>
|
||||||
|
|
||||||
|
#include <soc/bcm2835/raspberrypi-firmware.h>
|
||||||
|
|
||||||
#include <uapi/drm/v3d_drm.h>
|
#include <uapi/drm/v3d_drm.h>
|
||||||
|
|
||||||
#include "v3d_drv.h"
|
#include "v3d_drv.h"
|
||||||
@@ -303,6 +306,8 @@ map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
|
|||||||
static int v3d_platform_drm_probe(struct platform_device *pdev)
|
static int v3d_platform_drm_probe(struct platform_device *pdev)
|
||||||
{
|
{
|
||||||
struct device *dev = &pdev->dev;
|
struct device *dev = &pdev->dev;
|
||||||
|
struct rpi_firmware *firmware;
|
||||||
|
struct device_node *node;
|
||||||
struct drm_device *drm;
|
struct drm_device *drm;
|
||||||
struct v3d_dev *v3d;
|
struct v3d_dev *v3d;
|
||||||
enum v3d_gen gen;
|
enum v3d_gen gen;
|
||||||
@@ -388,6 +393,34 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
v3d->clk = devm_clk_get(dev, NULL);
|
||||||
|
if (IS_ERR_OR_NULL(v3d->clk)) {
|
||||||
|
if (PTR_ERR(v3d->clk) != -EPROBE_DEFER)
|
||||||
|
dev_err(dev, "Failed to get clock (%ld)\n", PTR_ERR(v3d->clk));
|
||||||
|
return PTR_ERR(v3d->clk);
|
||||||
|
}
|
||||||
|
|
||||||
|
node = rpi_firmware_find_node();
|
||||||
|
if (!node)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
firmware = rpi_firmware_get(node);
|
||||||
|
of_node_put(node);
|
||||||
|
if (!firmware)
|
||||||
|
return -EPROBE_DEFER;
|
||||||
|
|
||||||
|
v3d->clk_up_rate = rpi_firmware_clk_get_max_rate(firmware,
|
||||||
|
RPI_FIRMWARE_V3D_CLK_ID);
|
||||||
|
rpi_firmware_put(firmware);
|
||||||
|
|
||||||
|
/* For downclocking, drop it to the minimum frequency we can get from
|
||||||
|
* the CPRMAN clock generator dividing off our parent. The divider is
|
||||||
|
* 4 bits, but ask for just higher than that so that rounding doesn't
|
||||||
|
* make cprman reject our rate.
|
||||||
|
*/
|
||||||
|
v3d->clk_down_rate =
|
||||||
|
(clk_get_rate(clk_get_parent(v3d->clk)) / (1 << 4)) + 10000;
|
||||||
|
|
||||||
if (v3d->ver < V3D_GEN_41) {
|
if (v3d->ver < V3D_GEN_41) {
|
||||||
ret = map_regs(v3d, &v3d->gca_regs, "gca");
|
ret = map_regs(v3d, &v3d->gca_regs, "gca");
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -417,6 +450,8 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
|
|||||||
ret = v3d_sysfs_init(dev);
|
ret = v3d_sysfs_init(dev);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto drm_unregister;
|
goto drm_unregister;
|
||||||
|
ret = clk_set_min_rate(v3d->clk, v3d->clk_down_rate);
|
||||||
|
WARN_ON_ONCE(ret != 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|||||||
@@ -128,6 +128,12 @@ struct v3d_dev {
|
|||||||
void __iomem *gca_regs;
|
void __iomem *gca_regs;
|
||||||
void __iomem *sms_regs;
|
void __iomem *sms_regs;
|
||||||
struct clk *clk;
|
struct clk *clk;
|
||||||
|
struct delayed_work clk_down_work;
|
||||||
|
unsigned long clk_up_rate, clk_down_rate;
|
||||||
|
struct mutex clk_lock;
|
||||||
|
u32 clk_refcount;
|
||||||
|
bool clk_up;
|
||||||
|
|
||||||
struct reset_control *reset;
|
struct reset_control *reset;
|
||||||
|
|
||||||
/* Virtual and DMA addresses of the single shared page table. */
|
/* Virtual and DMA addresses of the single shared page table. */
|
||||||
@@ -631,3 +637,4 @@ int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
|
|||||||
/* v3d_sysfs.c */
|
/* v3d_sysfs.c */
|
||||||
int v3d_sysfs_init(struct device *dev);
|
int v3d_sysfs_init(struct device *dev);
|
||||||
void v3d_sysfs_destroy(struct device *dev);
|
void v3d_sysfs_destroy(struct device *dev);
|
||||||
|
void v3d_submit_init(struct drm_device *dev);
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <linux/device.h>
|
#include <linux/device.h>
|
||||||
#include <linux/dma-mapping.h>
|
#include <linux/dma-mapping.h>
|
||||||
#include <linux/io.h>
|
#include <linux/io.h>
|
||||||
|
#include <linux/clk.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/platform_device.h>
|
#include <linux/platform_device.h>
|
||||||
#include <linux/reset.h>
|
#include <linux/reset.h>
|
||||||
@@ -288,6 +289,8 @@ v3d_gem_init(struct drm_device *dev)
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
v3d_submit_init(dev);
|
||||||
|
|
||||||
/* Note: We don't allocate address 0. Various bits of HW
|
/* Note: We don't allocate address 0. Various bits of HW
|
||||||
* treat 0 as special, such as the occlusion query counters
|
* treat 0 as special, such as the occlusion query counters
|
||||||
* where 0 means "disabled".
|
* where 0 means "disabled".
|
||||||
|
|||||||
@@ -5,11 +5,52 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <drm/drm_syncobj.h>
|
#include <drm/drm_syncobj.h>
|
||||||
|
#include <linux/clk.h>
|
||||||
|
|
||||||
#include "v3d_drv.h"
|
#include "v3d_drv.h"
|
||||||
#include "v3d_regs.h"
|
#include "v3d_regs.h"
|
||||||
#include "v3d_trace.h"
|
#include "v3d_trace.h"
|
||||||
|
|
||||||
|
static void
|
||||||
|
v3d_clock_down_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct v3d_dev *v3d =
|
||||||
|
container_of(work, struct v3d_dev, clk_down_work.work);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = clk_set_min_rate(v3d->clk, v3d->clk_down_rate);
|
||||||
|
v3d->clk_up = false;
|
||||||
|
WARN_ON_ONCE(ret != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
v3d_clock_up_get(struct v3d_dev *v3d)
|
||||||
|
{
|
||||||
|
mutex_lock(&v3d->clk_lock);
|
||||||
|
if (v3d->clk_refcount++ == 0) {
|
||||||
|
cancel_delayed_work_sync(&v3d->clk_down_work);
|
||||||
|
if (!v3d->clk_up) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = clk_set_min_rate(v3d->clk, v3d->clk_up_rate);
|
||||||
|
WARN_ON_ONCE(ret != 0);
|
||||||
|
v3d->clk_up = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mutex_unlock(&v3d->clk_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
v3d_clock_up_put(struct v3d_dev *v3d)
|
||||||
|
{
|
||||||
|
mutex_lock(&v3d->clk_lock);
|
||||||
|
if (--v3d->clk_refcount == 0) {
|
||||||
|
schedule_delayed_work(&v3d->clk_down_work,
|
||||||
|
msecs_to_jiffies(100));
|
||||||
|
}
|
||||||
|
mutex_unlock(&v3d->clk_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/* Takes the reservation lock on all the BOs being referenced, so that
|
/* Takes the reservation lock on all the BOs being referenced, so that
|
||||||
* we can attach fences and update the reservations after pushing the job
|
* we can attach fences and update the reservations after pushing the job
|
||||||
* to the queue.
|
* to the queue.
|
||||||
@@ -85,9 +126,10 @@ v3d_lookup_bos(struct drm_device *dev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
v3d_job_free(struct kref *ref)
|
v3d_job_free_common(struct v3d_job *job,
|
||||||
|
bool is_gpu_job)
|
||||||
{
|
{
|
||||||
struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
|
struct v3d_dev *v3d = job->v3d;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (job->bo) {
|
if (job->bo) {
|
||||||
@@ -99,12 +141,31 @@ v3d_job_free(struct kref *ref)
|
|||||||
dma_fence_put(job->irq_fence);
|
dma_fence_put(job->irq_fence);
|
||||||
dma_fence_put(job->done_fence);
|
dma_fence_put(job->done_fence);
|
||||||
|
|
||||||
|
if (is_gpu_job)
|
||||||
|
v3d_clock_up_put(v3d);
|
||||||
|
|
||||||
if (job->perfmon)
|
if (job->perfmon)
|
||||||
v3d_perfmon_put(job->perfmon);
|
v3d_perfmon_put(job->perfmon);
|
||||||
|
|
||||||
kfree(job);
|
kfree(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
v3d_job_free(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
|
||||||
|
|
||||||
|
v3d_job_free_common(job, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
v3d_cpu_job_free(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
|
||||||
|
|
||||||
|
v3d_job_free_common(job, false);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
v3d_render_job_free(struct kref *ref)
|
v3d_render_job_free(struct kref *ref)
|
||||||
{
|
{
|
||||||
@@ -199,6 +260,8 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
|
|||||||
if (ret && ret != -ENOENT)
|
if (ret && ret != -ENOENT)
|
||||||
goto fail_deps;
|
goto fail_deps;
|
||||||
}
|
}
|
||||||
|
if (queue != V3D_CPU)
|
||||||
|
v3d_clock_up_get(v3d);
|
||||||
|
|
||||||
kref_init(&job->refcount);
|
kref_init(&job->refcount);
|
||||||
|
|
||||||
@@ -1316,7 +1379,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
|
|||||||
trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type);
|
trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type);
|
||||||
|
|
||||||
ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
|
ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
|
||||||
v3d_job_free, 0, &se, V3D_CPU);
|
v3d_cpu_job_free, 0, &se, V3D_CPU);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
v3d_job_deallocate((void *)&cpu_job);
|
v3d_job_deallocate((void *)&cpu_job);
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -1404,3 +1467,14 @@ fail:
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void v3d_submit_init(struct drm_device *dev) {
|
||||||
|
struct v3d_dev *v3d = to_v3d_dev(dev);
|
||||||
|
|
||||||
|
mutex_init(&v3d->clk_lock);
|
||||||
|
INIT_DELAYED_WORK(&v3d->clk_down_work, v3d_clock_down_work);
|
||||||
|
|
||||||
|
/* kick the clock so firmware knows we are using firmware clock interface */
|
||||||
|
v3d_clock_up_get(v3d);
|
||||||
|
v3d_clock_up_put(v3d);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user