drm/vc4: Update a bunch of code to match upstream submission.

This gets almost everything matching, except for the MSAA support and
using generic PM domains.

Signed-off-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
Eric Anholt
2015-12-04 11:35:34 -08:00
committed by popcornmix
parent b9e5697fbe
commit e4058e84ed
15 changed files with 591 additions and 529 deletions

View File

@@ -58,15 +58,14 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size)
struct drm_gem_cma_object *cma_obj; struct drm_gem_cma_object *cma_obj;
struct drm_gem_object *gem_obj; struct drm_gem_object *gem_obj;
int ret; int ret;
size_t obj_size = (drm->driver->gem_obj_size ?
drm->driver->gem_obj_size :
sizeof(*cma_obj));
cma_obj = kzalloc(obj_size, GFP_KERNEL); if (drm->driver->gem_create_object)
if (!cma_obj) gem_obj = drm->driver->gem_create_object(drm, size);
else
gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
if (!gem_obj)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
gem_obj = &cma_obj->base;
ret = drm_gem_object_init(drm, gem_obj, size); ret = drm_gem_object_init(drm, gem_obj, size);
if (ret) if (ret)

View File

@@ -12,6 +12,10 @@
* access to system memory with no MMU in between. To support it, we * access to system memory with no MMU in between. To support it, we
* use the GEM CMA helper functions to allocate contiguous ranges of * use the GEM CMA helper functions to allocate contiguous ranges of
* physical memory for our BOs. * physical memory for our BOs.
*
* Since the CMA allocator is very slow, we keep a cache of recently
* freed BOs around so that the kernel's allocation of objects for 3D
* rendering can return quickly.
*/ */
#include "vc4_drv.h" #include "vc4_drv.h"
@@ -34,6 +38,36 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4)
vc4->bo_stats.size_cached / 1024); vc4->bo_stats.size_cached / 1024);
} }
#ifdef CONFIG_DEBUG_FS
int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_bo_stats stats;
/* Take a snapshot of the current stats with the lock held. */
mutex_lock(&vc4->bo_lock);
stats = vc4->bo_stats;
mutex_unlock(&vc4->bo_lock);
seq_printf(m, "num bos allocated: %d\n",
stats.num_allocated);
seq_printf(m, "size bos allocated: %dkb\n",
stats.size_allocated / 1024);
seq_printf(m, "num bos used: %d\n",
stats.num_allocated - stats.num_cached);
seq_printf(m, "size bos used: %dkb\n",
(stats.size_allocated - stats.size_cached) / 1024);
seq_printf(m, "num bos cached: %d\n",
stats.num_cached);
seq_printf(m, "size bos cached: %dkb\n",
stats.size_cached / 1024);
return 0;
}
#endif
static uint32_t bo_page_index(size_t size) static uint32_t bo_page_index(size_t size)
{ {
return (size / PAGE_SIZE) - 1; return (size / PAGE_SIZE) - 1;
@@ -81,8 +115,8 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
struct list_head *new_list; struct list_head *new_list;
uint32_t i; uint32_t i;
new_list = kmalloc(new_size * sizeof(struct list_head), new_list = kmalloc_array(new_size, sizeof(struct list_head),
GFP_KERNEL); GFP_KERNEL);
if (!new_list) if (!new_list)
return NULL; return NULL;
@@ -90,7 +124,9 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
* head locations. * head locations.
*/ */
for (i = 0; i < vc4->bo_cache.size_list_size; i++) { for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
struct list_head *old_list = &vc4->bo_cache.size_list[i]; struct list_head *old_list =
&vc4->bo_cache.size_list[i];
if (list_empty(old_list)) if (list_empty(old_list))
INIT_LIST_HEAD(&new_list[i]); INIT_LIST_HEAD(&new_list[i]);
else else
@@ -122,11 +158,60 @@ void vc4_bo_cache_purge(struct drm_device *dev)
mutex_unlock(&vc4->bo_lock); mutex_unlock(&vc4->bo_lock);
} }
struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size) static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
uint32_t size)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t size = roundup(unaligned_size, PAGE_SIZE);
uint32_t page_index = bo_page_index(size); uint32_t page_index = bo_page_index(size);
struct vc4_bo *bo = NULL;
size = roundup(size, PAGE_SIZE);
mutex_lock(&vc4->bo_lock);
if (page_index >= vc4->bo_cache.size_list_size)
goto out;
if (list_empty(&vc4->bo_cache.size_list[page_index]))
goto out;
bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
struct vc4_bo, size_head);
vc4_bo_remove_from_cache(bo);
kref_init(&bo->base.base.refcount);
out:
mutex_unlock(&vc4->bo_lock);
return bo;
}
/**
* vc4_gem_create_object - Implementation of driver->gem_create_object.
*
* This lets the CMA helpers allocate object structs for us, and keep
* our BO stats correct.
*/
struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_bo *bo;
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return ERR_PTR(-ENOMEM);
mutex_lock(&vc4->bo_lock);
vc4->bo_stats.num_allocated++;
vc4->bo_stats.size_allocated += size;
mutex_unlock(&vc4->bo_lock);
return &bo->base.base;
}
struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
bool from_cache)
{
size_t size = roundup(unaligned_size, PAGE_SIZE);
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_gem_cma_object *cma_obj; struct drm_gem_cma_object *cma_obj;
int pass; int pass;
@@ -134,18 +219,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size)
return NULL; return NULL;
/* First, try to get a vc4_bo from the kernel BO cache. */ /* First, try to get a vc4_bo from the kernel BO cache. */
mutex_lock(&vc4->bo_lock); if (from_cache) {
if (page_index < vc4->bo_cache.size_list_size && struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
!list_empty(&vc4->bo_cache.size_list[page_index])) {
struct vc4_bo *bo = if (bo)
list_first_entry(&vc4->bo_cache.size_list[page_index], return bo;
struct vc4_bo, size_head);
vc4_bo_remove_from_cache(bo);
mutex_unlock(&vc4->bo_lock);
kref_init(&bo->base.base.refcount);
return bo;
} }
mutex_unlock(&vc4->bo_lock);
/* Otherwise, make a new BO. */ /* Otherwise, make a new BO. */
for (pass = 0; ; pass++) { for (pass = 0; ; pass++) {
@@ -179,9 +258,6 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size)
} }
} }
vc4->bo_stats.num_allocated++;
vc4->bo_stats.size_allocated += size;
return to_vc4_bo(&cma_obj->base); return to_vc4_bo(&cma_obj->base);
} }
@@ -199,7 +275,7 @@ int vc4_dumb_create(struct drm_file *file_priv,
if (args->size < args->pitch * args->height) if (args->size < args->pitch * args->height)
args->size = args->pitch * args->height; args->size = args->pitch * args->height;
bo = vc4_bo_create(dev, args->size); bo = vc4_bo_create(dev, args->size, false);
if (!bo) if (!bo)
return -ENOMEM; return -ENOMEM;
@@ -209,8 +285,8 @@ int vc4_dumb_create(struct drm_file *file_priv,
return ret; return ret;
} }
static void /* Must be called with bo_lock held. */
vc4_bo_cache_free_old(struct drm_device *dev) static void vc4_bo_cache_free_old(struct drm_device *dev)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned long expire_time = jiffies - msecs_to_jiffies(1000); unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
@@ -313,95 +389,6 @@ vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
return drm_gem_prime_export(dev, obj, flags); return drm_gem_prime_export(dev, obj, flags);
} }
int
vc4_create_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_create_bo *args = data;
struct vc4_bo *bo = NULL;
int ret;
bo = vc4_bo_create(dev, args->size);
if (!bo)
return -ENOMEM;
ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
drm_gem_object_unreference_unlocked(&bo->base.base);
return ret;
}
int
vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_create_shader_bo *args = data;
struct vc4_bo *bo = NULL;
int ret;
if (args->size == 0)
return -EINVAL;
if (args->size % sizeof(u64) != 0)
return -EINVAL;
if (args->flags != 0) {
DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
return -EINVAL;
}
if (args->pad != 0) {
DRM_INFO("Pad set: 0x%08x\n", args->pad);
return -EINVAL;
}
bo = vc4_bo_create(dev, args->size);
if (!bo)
return -ENOMEM;
ret = copy_from_user(bo->base.vaddr,
(void __user *)(uintptr_t)args->data,
args->size);
if (ret != 0)
goto fail;
bo->validated_shader = vc4_validate_shader(&bo->base);
if (!bo->validated_shader) {
ret = -EINVAL;
goto fail;
}
/* We have to create the handle after validation, to avoid
* races for users to do doing things like mmap the shader BO.
*/
ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
fail:
drm_gem_object_unreference_unlocked(&bo->base.base);
return ret;
}
int
vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_mmap_bo *args = data;
struct drm_gem_object *gem_obj;
gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (!gem_obj) {
DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
return -EINVAL;
}
/* The mmap offset was set up at BO allocation time. */
args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
drm_gem_object_unreference(gem_obj);
return 0;
}
int vc4_mmap(struct file *filp, struct vm_area_struct *vma) int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
{ {
struct drm_gem_object *gem_obj; struct drm_gem_object *gem_obj;
@@ -441,8 +428,8 @@ int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{ {
struct vc4_bo *bo = to_vc4_bo(obj); struct vc4_bo *bo = to_vc4_bo(obj);
if (bo->validated_shader) { if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
DRM_ERROR("mmaping of shader BOs not allowed.\n"); DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
return -EINVAL; return -EINVAL;
} }
@@ -461,6 +448,102 @@ void *vc4_prime_vmap(struct drm_gem_object *obj)
return drm_gem_cma_prime_vmap(obj); return drm_gem_cma_prime_vmap(obj);
} }
int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_create_bo *args = data;
struct vc4_bo *bo = NULL;
int ret;
/*
* We can't allocate from the BO cache, because the BOs don't
* get zeroed, and that might leak data between users.
*/
bo = vc4_bo_create(dev, args->size, false);
if (!bo)
return -ENOMEM;
ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
drm_gem_object_unreference_unlocked(&bo->base.base);
return ret;
}
int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_mmap_bo *args = data;
struct drm_gem_object *gem_obj;
gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (!gem_obj) {
DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
return -EINVAL;
}
/* The mmap offset was set up at BO allocation time. */
args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
drm_gem_object_unreference_unlocked(gem_obj);
return 0;
}
int
vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_vc4_create_shader_bo *args = data;
struct vc4_bo *bo = NULL;
int ret;
if (args->size == 0)
return -EINVAL;
if (args->size % sizeof(u64) != 0)
return -EINVAL;
if (args->flags != 0) {
DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
return -EINVAL;
}
if (args->pad != 0) {
DRM_INFO("Pad set: 0x%08x\n", args->pad);
return -EINVAL;
}
bo = vc4_bo_create(dev, args->size, true);
if (!bo)
return -ENOMEM;
ret = copy_from_user(bo->base.vaddr,
(void __user *)(uintptr_t)args->data,
args->size);
if (ret != 0)
goto fail;
/* Clear the rest of the memory from allocating from the BO
* cache.
*/
memset(bo->base.vaddr + args->size, 0,
bo->base.base.size - args->size);
bo->validated_shader = vc4_validate_shader(&bo->base);
if (!bo->validated_shader) {
ret = -EINVAL;
goto fail;
}
/* We have to create the handle after validation, to avoid
* races for users to do doing things like mmap the shader BO.
*/
ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
fail:
drm_gem_object_unreference_unlocked(&bo->base.base);
return ret;
}
void vc4_bo_cache_init(struct drm_device *dev) void vc4_bo_cache_init(struct drm_device *dev)
{ {
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -472,7 +555,7 @@ void vc4_bo_cache_init(struct drm_device *dev)
INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work); INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
setup_timer(&vc4->bo_cache.time_timer, setup_timer(&vc4->bo_cache.time_timer,
vc4_bo_cache_time_timer, vc4_bo_cache_time_timer,
(unsigned long) dev); (unsigned long)dev);
} }
void vc4_bo_cache_destroy(struct drm_device *dev) void vc4_bo_cache_destroy(struct drm_device *dev)
@@ -489,28 +572,3 @@ void vc4_bo_cache_destroy(struct drm_device *dev)
vc4_bo_stats_dump(vc4); vc4_bo_stats_dump(vc4);
} }
} }
#ifdef CONFIG_DEBUG_FS
int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_bo_stats stats;
mutex_lock(&vc4->bo_lock);
stats = vc4->bo_stats;
mutex_unlock(&vc4->bo_lock);
seq_printf(m, "num bos allocated: %d\n", stats.num_allocated);
seq_printf(m, "size bos allocated: %dkb\n", stats.size_allocated / 1024);
seq_printf(m, "num bos used: %d\n", (stats.num_allocated -
stats.num_cached));
seq_printf(m, "size bos used: %dkb\n", (stats.size_allocated -
stats.size_cached) / 1024);
seq_printf(m, "num bos cached: %d\n", stats.num_cached);
seq_printf(m, "size bos cached: %dkb\n", stats.size_cached / 1024);
return 0;
}
#endif

View File

@@ -501,6 +501,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
vc4_plane_async_set_fb(plane, flip_state->fb); vc4_plane_async_set_fb(plane, flip_state->fb);
if (flip_state->event) { if (flip_state->event) {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&dev->event_lock, flags); spin_lock_irqsave(&dev->event_lock, flags);
drm_crtc_send_vblank_event(crtc, flip_state->event); drm_crtc_send_vblank_event(crtc, flip_state->event);
spin_unlock_irqrestore(&dev->event_lock, flags); spin_unlock_irqrestore(&dev->event_lock, flags);
@@ -562,9 +563,9 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
} }
static int vc4_page_flip(struct drm_crtc *crtc, static int vc4_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb, struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event, struct drm_pending_vblank_event *event,
uint32_t flags) uint32_t flags)
{ {
if (flags & DRM_MODE_PAGE_FLIP_ASYNC) if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
return vc4_async_page_flip(crtc, fb, event, flags); return vc4_async_page_flip(crtc, fb, event, flags);

View File

@@ -81,7 +81,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
DRM_ROOT_ONLY),
}; };
static struct drm_driver vc4_drm_driver = { static struct drm_driver vc4_drm_driver = {
@@ -107,6 +108,7 @@ static struct drm_driver vc4_drm_driver = {
.debugfs_cleanup = vc4_debugfs_cleanup, .debugfs_cleanup = vc4_debugfs_cleanup,
#endif #endif
.gem_create_object = vc4_create_object,
.gem_free_object = vc4_free_object, .gem_free_object = vc4_free_object,
.gem_vm_ops = &drm_gem_cma_vm_ops, .gem_vm_ops = &drm_gem_cma_vm_ops,
@@ -128,8 +130,6 @@ static struct drm_driver vc4_drm_driver = {
.num_ioctls = ARRAY_SIZE(vc4_drm_ioctls), .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
.fops = &vc4_drm_fops, .fops = &vc4_drm_fops,
//.gem_obj_size = sizeof(struct vc4_bo),
.name = DRIVER_NAME, .name = DRIVER_NAME,
.desc = DRIVER_DESC, .desc = DRIVER_DESC,
.date = DRIVER_DATE, .date = DRIVER_DATE,

View File

@@ -72,6 +72,9 @@ struct vc4_dev {
* job_done_work. * job_done_work.
*/ */
struct list_head job_done_list; struct list_head job_done_list;
/* Spinlock used to synchronize the job_list and seqno
* accesses between the IRQ handler and GEM ioctls.
*/
spinlock_t job_lock; spinlock_t job_lock;
wait_queue_head_t job_wait_queue; wait_queue_head_t job_wait_queue;
struct work_struct job_done_work; struct work_struct job_done_work;
@@ -318,8 +321,7 @@ struct vc4_texture_sample_info {
* and validate the shader state record's uniforms that define the texture * and validate the shader state record's uniforms that define the texture
* samples. * samples.
*/ */
struct vc4_validated_shader_info struct vc4_validated_shader_info {
{
uint32_t uniforms_size; uint32_t uniforms_size;
uint32_t uniforms_src_size; uint32_t uniforms_src_size;
uint32_t num_texture_samples; uint32_t num_texture_samples;
@@ -355,8 +357,10 @@ struct vc4_validated_shader_info
#define wait_for(COND, MS) _wait_for(COND, MS, 1) #define wait_for(COND, MS) _wait_for(COND, MS, 1)
/* vc4_bo.c */ /* vc4_bo.c */
struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
void vc4_free_object(struct drm_gem_object *gem_obj); void vc4_free_object(struct drm_gem_object *gem_obj);
struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size); struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
bool from_cache);
int vc4_dumb_create(struct drm_file *file_priv, int vc4_dumb_create(struct drm_file *file_priv,
struct drm_device *dev, struct drm_device *dev,
struct drm_mode_create_dumb *args); struct drm_mode_create_dumb *args);
@@ -432,7 +436,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
enum drm_plane_type type); enum drm_plane_type type);
u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
u32 vc4_plane_dlist_size(struct drm_plane_state *state); u32 vc4_plane_dlist_size(struct drm_plane_state *state);
void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb); void vc4_plane_async_set_fb(struct drm_plane *plane,
struct drm_framebuffer *fb);
/* vc4_v3d.c */ /* vc4_v3d.c */
extern struct platform_driver vc4_v3d_driver; extern struct platform_driver vc4_v3d_driver;
@@ -450,9 +455,6 @@ vc4_validate_bin_cl(struct drm_device *dev,
int int
vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
bool vc4_use_bo(struct vc4_exec_info *exec, bool vc4_use_bo(struct vc4_exec_info *exec,
uint32_t hindex, uint32_t hindex,
enum vc4_bo_mode mode, enum vc4_bo_mode mode,
@@ -464,3 +466,7 @@ bool vc4_check_tex_size(struct vc4_exec_info *exec,
struct drm_gem_cma_object *fbo, struct drm_gem_cma_object *fbo,
uint32_t offset, uint8_t tiling_format, uint32_t offset, uint8_t tiling_format,
uint32_t width, uint32_t height, uint8_t cpp); uint32_t width, uint32_t height, uint8_t cpp);
/* vc4_validate_shader.c */
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);

View File

@@ -53,9 +53,8 @@ vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
unsigned int i; unsigned int i;
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
for (i = 0; i < state->user_state.bo_count; i++) { for (i = 0; i < state->user_state.bo_count; i++)
drm_gem_object_unreference(state->bo[i]); drm_gem_object_unreference(state->bo[i]);
}
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
kfree(state); kfree(state);
@@ -65,10 +64,10 @@ int
vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv) struct drm_file *file_priv)
{ {
struct drm_vc4_get_hang_state *get_state = data; struct drm_vc4_get_hang_state *get_state = data;
struct drm_vc4_get_hang_state_bo *bo_state; struct drm_vc4_get_hang_state_bo *bo_state;
struct vc4_hang_state *kernel_state; struct vc4_hang_state *kernel_state;
struct drm_vc4_get_hang_state *state; struct drm_vc4_get_hang_state *state;
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned long irqflags; unsigned long irqflags;
u32 i; u32 i;
@@ -107,6 +106,7 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
for (i = 0; i < state->bo_count; i++) { for (i = 0; i < state->bo_count; i++) {
struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
u32 handle; u32 handle;
ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
&handle); &handle);
@@ -124,7 +124,7 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
state->bo_count * sizeof(*bo_state)); state->bo_count * sizeof(*bo_state));
kfree(bo_state); kfree(bo_state);
err_free: err_free:
vc4_free_hang_state(dev, kernel_state); vc4_free_hang_state(dev, kernel_state);
@@ -578,7 +578,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
goto fail; goto fail;
} }
bo = vc4_bo_create(dev, exec_size); bo = vc4_bo_create(dev, exec_size, true);
if (!bo) { if (!bo) {
DRM_ERROR("Couldn't allocate BO for binning\n"); DRM_ERROR("Couldn't allocate BO for binning\n");
ret = PTR_ERR(exec->exec_bo); ret = PTR_ERR(exec->exec_bo);
@@ -668,6 +668,7 @@ vc4_job_handle_completed(struct vc4_dev *vc4)
static void vc4_seqno_cb_work(struct work_struct *work) static void vc4_seqno_cb_work(struct work_struct *work)
{ {
struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
cb->func(cb); cb->func(cb);
} }
@@ -717,6 +718,7 @@ vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
uint64_t delta = jiffies_to_nsecs(jiffies - start); uint64_t delta = jiffies_to_nsecs(jiffies - start);
if (*timeout_ns >= delta) if (*timeout_ns >= delta)
*timeout_ns -= delta; *timeout_ns -= delta;
} }
@@ -750,9 +752,10 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
} }
bo = to_vc4_bo(gem_obj); bo = to_vc4_bo(gem_obj);
ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, &args->timeout_ns); ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
&args->timeout_ns);
drm_gem_object_unreference(gem_obj); drm_gem_object_unreference_unlocked(gem_obj);
return ret; return ret;
} }
@@ -793,7 +796,8 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret) if (ret)
goto fail; goto fail;
} else { } else {
exec->ct0ca = exec->ct0ea = 0; exec->ct0ca = 0;
exec->ct0ea = 0;
} }
ret = vc4_get_rcl(dev, exec); ret = vc4_get_rcl(dev, exec);
@@ -831,7 +835,7 @@ vc4_gem_init(struct drm_device *dev)
INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
setup_timer(&vc4->hangcheck.timer, setup_timer(&vc4->hangcheck.timer,
vc4_hangcheck_elapsed, vc4_hangcheck_elapsed,
(unsigned long) dev); (unsigned long)dev);
INIT_WORK(&vc4->job_done_work, vc4_job_done_work); INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
} }

View File

@@ -56,7 +56,7 @@ vc4_overflow_mem_work(struct work_struct *work)
struct drm_device *dev = vc4->dev; struct drm_device *dev = vc4->dev;
struct vc4_bo *bo; struct vc4_bo *bo;
bo = vc4_bo_create(dev, 256 * 1024); bo = vc4_bo_create(dev, 256 * 1024, true);
if (!bo) { if (!bo) {
DRM_ERROR("Couldn't allocate binner overflow mem\n"); DRM_ERROR("Couldn't allocate binner overflow mem\n");
return; return;
@@ -87,9 +87,8 @@ vc4_overflow_mem_work(struct work_struct *work)
spin_unlock_irqrestore(&vc4->job_lock, irqflags); spin_unlock_irqrestore(&vc4->job_lock, irqflags);
} }
if (vc4->overflow_mem) { if (vc4->overflow_mem)
drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
}
vc4->overflow_mem = bo; vc4->overflow_mem = bo;
V3D_WRITE(V3D_BPOA, bo->base.paddr); V3D_WRITE(V3D_BPOA, bo->base.paddr);

View File

@@ -132,6 +132,7 @@ static int vc4_atomic_commit(struct drm_device *dev,
struct drm_gem_cma_object *cma_bo = struct drm_gem_cma_object *cma_bo =
drm_fb_cma_get_gem_obj(new_state->fb, 0); drm_fb_cma_get_gem_obj(new_state->fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
wait_seqno = max(bo->seqno, wait_seqno); wait_seqno = max(bo->seqno, wait_seqno);
} }
} }

View File

@@ -27,60 +27,60 @@
#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */ #include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
enum vc4_packet { enum vc4_packet {
VC4_PACKET_HALT = 0, VC4_PACKET_HALT = 0,
VC4_PACKET_NOP = 1, VC4_PACKET_NOP = 1,
VC4_PACKET_FLUSH = 4, VC4_PACKET_FLUSH = 4,
VC4_PACKET_FLUSH_ALL = 5, VC4_PACKET_FLUSH_ALL = 5,
VC4_PACKET_START_TILE_BINNING = 6, VC4_PACKET_START_TILE_BINNING = 6,
VC4_PACKET_INCREMENT_SEMAPHORE = 7, VC4_PACKET_INCREMENT_SEMAPHORE = 7,
VC4_PACKET_WAIT_ON_SEMAPHORE = 8, VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
VC4_PACKET_BRANCH = 16, VC4_PACKET_BRANCH = 16,
VC4_PACKET_BRANCH_TO_SUB_LIST = 17, VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
VC4_PACKET_STORE_MS_TILE_BUFFER = 24, VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
VC4_PACKET_COMPRESSED_PRIMITIVE = 48, VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
VC4_PACKET_GL_SHADER_STATE = 64, VC4_PACKET_GL_SHADER_STATE = 64,
VC4_PACKET_NV_SHADER_STATE = 65, VC4_PACKET_NV_SHADER_STATE = 65,
VC4_PACKET_VG_SHADER_STATE = 66, VC4_PACKET_VG_SHADER_STATE = 66,
VC4_PACKET_CONFIGURATION_BITS = 96, VC4_PACKET_CONFIGURATION_BITS = 96,
VC4_PACKET_FLAT_SHADE_FLAGS = 97, VC4_PACKET_FLAT_SHADE_FLAGS = 97,
VC4_PACKET_POINT_SIZE = 98, VC4_PACKET_POINT_SIZE = 98,
VC4_PACKET_LINE_WIDTH = 99, VC4_PACKET_LINE_WIDTH = 99,
VC4_PACKET_RHT_X_BOUNDARY = 100, VC4_PACKET_RHT_X_BOUNDARY = 100,
VC4_PACKET_DEPTH_OFFSET = 101, VC4_PACKET_DEPTH_OFFSET = 101,
VC4_PACKET_CLIP_WINDOW = 102, VC4_PACKET_CLIP_WINDOW = 102,
VC4_PACKET_VIEWPORT_OFFSET = 103, VC4_PACKET_VIEWPORT_OFFSET = 103,
VC4_PACKET_Z_CLIPPING = 104, VC4_PACKET_Z_CLIPPING = 104,
VC4_PACKET_CLIPPER_XY_SCALING = 105, VC4_PACKET_CLIPPER_XY_SCALING = 105,
VC4_PACKET_CLIPPER_Z_SCALING = 106, VC4_PACKET_CLIPPER_Z_SCALING = 106,
VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
VC4_PACKET_CLEAR_COLORS = 114, VC4_PACKET_CLEAR_COLORS = 114,
VC4_PACKET_TILE_COORDINATES = 115, VC4_PACKET_TILE_COORDINATES = 115,
/* Not an actual hardware packet -- this is what we use to put /* Not an actual hardware packet -- this is what we use to put
* references to GEM bos in the command stream, since we need the u32 * references to GEM bos in the command stream, since we need the u32
* int the actual address packet in order to store the offset from the * int the actual address packet in order to store the offset from the
* start of the BO. * start of the BO.
*/ */
VC4_PACKET_GEM_HANDLES = 254, VC4_PACKET_GEM_HANDLES = 254,
} __attribute__ ((__packed__)); } __attribute__ ((__packed__));
#define VC4_PACKET_HALT_SIZE 1 #define VC4_PACKET_HALT_SIZE 1
@@ -148,10 +148,10 @@ enum vc4_packet {
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
*/ */
#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3) #define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2) #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1) #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0) #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
/** @} */ /** @} */
@@ -160,10 +160,10 @@ enum vc4_packet {
* byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
*/ */
#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15) #define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14) #define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13) #define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12) #define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
@@ -201,28 +201,28 @@ enum vc4_packet {
#define VC4_INDEX_BUFFER_U16 (1 << 4) #define VC4_INDEX_BUFFER_U16 (1 << 4)
/* This flag is only present in NV shader state. */ /* This flag is only present in NV shader state. */
#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3) #define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2) #define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1) #define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0) #define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
/** @{ byte 2 of config bits. */ /** @{ byte 2 of config bits. */
#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1) #define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
#define VC4_CONFIG_BITS_EARLY_Z (1 << 0) #define VC4_CONFIG_BITS_EARLY_Z BIT(0)
/** @} */ /** @} */
/** @{ byte 1 of config bits. */ /** @{ byte 1 of config bits. */
#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7) #define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
/** same values in this 3-bit field as PIPE_FUNC_* */ /** same values in this 3-bit field as PIPE_FUNC_* */
#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3) #define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0) #define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
/** @} */ /** @} */
/** @{ byte 0 of config bits. */ /** @{ byte 0 of config bits. */
@@ -230,15 +230,15 @@ enum vc4_packet {
#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4) #define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3) #define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2) #define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1) #define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0) #define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
/** @} */ /** @} */
/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) #define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
@@ -254,17 +254,17 @@ enum vc4_packet {
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) #define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0) #define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
/** @} */ /** @} */
/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12) #define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11) #define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10) #define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9) #define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) #define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
/** The values of the field are VC4_TILING_FORMAT_* */ /** The values of the field are VC4_TILING_FORMAT_* */
#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
@@ -280,8 +280,8 @@ enum vc4_packet {
#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
#define VC4_RENDER_CONFIG_FORMAT_BGR565 2 #define VC4_RENDER_CONFIG_FORMAT_BGR565 2
#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) #define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
@@ -291,24 +291,24 @@ enum vc4_packet {
#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
enum vc4_texture_data_type { enum vc4_texture_data_type {
VC4_TEXTURE_TYPE_RGBA8888 = 0, VC4_TEXTURE_TYPE_RGBA8888 = 0,
VC4_TEXTURE_TYPE_RGBX8888 = 1, VC4_TEXTURE_TYPE_RGBX8888 = 1,
VC4_TEXTURE_TYPE_RGBA4444 = 2, VC4_TEXTURE_TYPE_RGBA4444 = 2,
VC4_TEXTURE_TYPE_RGBA5551 = 3, VC4_TEXTURE_TYPE_RGBA5551 = 3,
VC4_TEXTURE_TYPE_RGB565 = 4, VC4_TEXTURE_TYPE_RGB565 = 4,
VC4_TEXTURE_TYPE_LUMINANCE = 5, VC4_TEXTURE_TYPE_LUMINANCE = 5,
VC4_TEXTURE_TYPE_ALPHA = 6, VC4_TEXTURE_TYPE_ALPHA = 6,
VC4_TEXTURE_TYPE_LUMALPHA = 7, VC4_TEXTURE_TYPE_LUMALPHA = 7,
VC4_TEXTURE_TYPE_ETC1 = 8, VC4_TEXTURE_TYPE_ETC1 = 8,
VC4_TEXTURE_TYPE_S16F = 9, VC4_TEXTURE_TYPE_S16F = 9,
VC4_TEXTURE_TYPE_S8 = 10, VC4_TEXTURE_TYPE_S8 = 10,
VC4_TEXTURE_TYPE_S16 = 11, VC4_TEXTURE_TYPE_S16 = 11,
VC4_TEXTURE_TYPE_BW1 = 12, VC4_TEXTURE_TYPE_BW1 = 12,
VC4_TEXTURE_TYPE_A4 = 13, VC4_TEXTURE_TYPE_A4 = 13,
VC4_TEXTURE_TYPE_A1 = 14, VC4_TEXTURE_TYPE_A1 = 14,
VC4_TEXTURE_TYPE_RGBA64 = 15, VC4_TEXTURE_TYPE_RGBA64 = 15,
VC4_TEXTURE_TYPE_RGBA32R = 16, VC4_TEXTURE_TYPE_RGBA32R = 16,
VC4_TEXTURE_TYPE_YUV422R = 17, VC4_TEXTURE_TYPE_YUV422R = 17,
}; };
#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)

View File

@@ -25,194 +25,190 @@
#define VC4_QPU_DEFINES_H #define VC4_QPU_DEFINES_H
enum qpu_op_add { enum qpu_op_add {
QPU_A_NOP, QPU_A_NOP,
QPU_A_FADD, QPU_A_FADD,
QPU_A_FSUB, QPU_A_FSUB,
QPU_A_FMIN, QPU_A_FMIN,
QPU_A_FMAX, QPU_A_FMAX,
QPU_A_FMINABS, QPU_A_FMINABS,
QPU_A_FMAXABS, QPU_A_FMAXABS,
QPU_A_FTOI, QPU_A_FTOI,
QPU_A_ITOF, QPU_A_ITOF,
QPU_A_ADD = 12, QPU_A_ADD = 12,
QPU_A_SUB, QPU_A_SUB,
QPU_A_SHR, QPU_A_SHR,
QPU_A_ASR, QPU_A_ASR,
QPU_A_ROR, QPU_A_ROR,
QPU_A_SHL, QPU_A_SHL,
QPU_A_MIN, QPU_A_MIN,
QPU_A_MAX, QPU_A_MAX,
QPU_A_AND, QPU_A_AND,
QPU_A_OR, QPU_A_OR,
QPU_A_XOR, QPU_A_XOR,
QPU_A_NOT, QPU_A_NOT,
QPU_A_CLZ, QPU_A_CLZ,
QPU_A_V8ADDS = 30, QPU_A_V8ADDS = 30,
QPU_A_V8SUBS = 31, QPU_A_V8SUBS = 31,
}; };
enum qpu_op_mul { enum qpu_op_mul {
QPU_M_NOP, QPU_M_NOP,
QPU_M_FMUL, QPU_M_FMUL,
QPU_M_MUL24, QPU_M_MUL24,
QPU_M_V8MULD, QPU_M_V8MULD,
QPU_M_V8MIN, QPU_M_V8MIN,
QPU_M_V8MAX, QPU_M_V8MAX,
QPU_M_V8ADDS, QPU_M_V8ADDS,
QPU_M_V8SUBS, QPU_M_V8SUBS,
}; };
enum qpu_raddr { enum qpu_raddr {
QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
/* 0-31 are the plain regfile a or b fields */ /* 0-31 are the plain regfile a or b fields */
QPU_R_UNIF = 32, QPU_R_UNIF = 32,
QPU_R_VARY = 35, QPU_R_VARY = 35,
QPU_R_ELEM_QPU = 38, QPU_R_ELEM_QPU = 38,
QPU_R_NOP, QPU_R_NOP,
QPU_R_XY_PIXEL_COORD = 41, QPU_R_XY_PIXEL_COORD = 41,
QPU_R_MS_REV_FLAGS = 41, QPU_R_MS_REV_FLAGS = 41,
QPU_R_VPM = 48, QPU_R_VPM = 48,
QPU_R_VPM_LD_BUSY, QPU_R_VPM_LD_BUSY,
QPU_R_VPM_LD_WAIT, QPU_R_VPM_LD_WAIT,
QPU_R_MUTEX_ACQUIRE, QPU_R_MUTEX_ACQUIRE,
}; };
enum qpu_waddr { enum qpu_waddr {
/* 0-31 are the plain regfile a or b fields */ /* 0-31 are the plain regfile a or b fields */
QPU_W_ACC0 = 32, /* aka r0 */ QPU_W_ACC0 = 32, /* aka r0 */
QPU_W_ACC1, QPU_W_ACC1,
QPU_W_ACC2, QPU_W_ACC2,
QPU_W_ACC3, QPU_W_ACC3,
QPU_W_TMU_NOSWAP, QPU_W_TMU_NOSWAP,
QPU_W_ACC5, QPU_W_ACC5,
QPU_W_HOST_INT, QPU_W_HOST_INT,
QPU_W_NOP, QPU_W_NOP,
QPU_W_UNIFORMS_ADDRESS, QPU_W_UNIFORMS_ADDRESS,
QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */ QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
QPU_W_MS_FLAGS = 42, QPU_W_MS_FLAGS = 42,
QPU_W_REV_FLAG = 42, QPU_W_REV_FLAG = 42,
QPU_W_TLB_STENCIL_SETUP = 43, QPU_W_TLB_STENCIL_SETUP = 43,
QPU_W_TLB_Z, QPU_W_TLB_Z,
QPU_W_TLB_COLOR_MS, QPU_W_TLB_COLOR_MS,
QPU_W_TLB_COLOR_ALL, QPU_W_TLB_COLOR_ALL,
QPU_W_TLB_ALPHA_MASK, QPU_W_TLB_ALPHA_MASK,
QPU_W_VPM, QPU_W_VPM,
QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
QPU_W_MUTEX_RELEASE, QPU_W_MUTEX_RELEASE,
QPU_W_SFU_RECIP, QPU_W_SFU_RECIP,
QPU_W_SFU_RECIPSQRT, QPU_W_SFU_RECIPSQRT,
QPU_W_SFU_EXP, QPU_W_SFU_EXP,
QPU_W_SFU_LOG, QPU_W_SFU_LOG,
QPU_W_TMU0_S, QPU_W_TMU0_S,
QPU_W_TMU0_T, QPU_W_TMU0_T,
QPU_W_TMU0_R, QPU_W_TMU0_R,
QPU_W_TMU0_B, QPU_W_TMU0_B,
QPU_W_TMU1_S, QPU_W_TMU1_S,
QPU_W_TMU1_T, QPU_W_TMU1_T,
QPU_W_TMU1_R, QPU_W_TMU1_R,
QPU_W_TMU1_B, QPU_W_TMU1_B,
}; };
enum qpu_sig_bits { enum qpu_sig_bits {
QPU_SIG_SW_BREAKPOINT, QPU_SIG_SW_BREAKPOINT,
QPU_SIG_NONE, QPU_SIG_NONE,
QPU_SIG_THREAD_SWITCH, QPU_SIG_THREAD_SWITCH,
QPU_SIG_PROG_END, QPU_SIG_PROG_END,
QPU_SIG_WAIT_FOR_SCOREBOARD, QPU_SIG_WAIT_FOR_SCOREBOARD,
QPU_SIG_SCOREBOARD_UNLOCK, QPU_SIG_SCOREBOARD_UNLOCK,
QPU_SIG_LAST_THREAD_SWITCH, QPU_SIG_LAST_THREAD_SWITCH,
QPU_SIG_COVERAGE_LOAD, QPU_SIG_COVERAGE_LOAD,
QPU_SIG_COLOR_LOAD, QPU_SIG_COLOR_LOAD,
QPU_SIG_COLOR_LOAD_END, QPU_SIG_COLOR_LOAD_END,
QPU_SIG_LOAD_TMU0, QPU_SIG_LOAD_TMU0,
QPU_SIG_LOAD_TMU1, QPU_SIG_LOAD_TMU1,
QPU_SIG_ALPHA_MASK_LOAD, QPU_SIG_ALPHA_MASK_LOAD,
QPU_SIG_SMALL_IMM, QPU_SIG_SMALL_IMM,
QPU_SIG_LOAD_IMM, QPU_SIG_LOAD_IMM,
QPU_SIG_BRANCH QPU_SIG_BRANCH
}; };
enum qpu_mux { enum qpu_mux {
/* hardware mux values */ /* hardware mux values */
QPU_MUX_R0, QPU_MUX_R0,
QPU_MUX_R1, QPU_MUX_R1,
QPU_MUX_R2, QPU_MUX_R2,
QPU_MUX_R3, QPU_MUX_R3,
QPU_MUX_R4, QPU_MUX_R4,
QPU_MUX_R5, QPU_MUX_R5,
QPU_MUX_A, QPU_MUX_A,
QPU_MUX_B, QPU_MUX_B,
/* non-hardware mux values */ /* non-hardware mux values */
QPU_MUX_IMM, QPU_MUX_IMM,
}; };
enum qpu_cond { enum qpu_cond {
QPU_COND_NEVER, QPU_COND_NEVER,
QPU_COND_ALWAYS, QPU_COND_ALWAYS,
QPU_COND_ZS, QPU_COND_ZS,
QPU_COND_ZC, QPU_COND_ZC,
QPU_COND_NS, QPU_COND_NS,
QPU_COND_NC, QPU_COND_NC,
QPU_COND_CS, QPU_COND_CS,
QPU_COND_CC, QPU_COND_CC,
}; };
enum qpu_pack_mul { enum qpu_pack_mul {
QPU_PACK_MUL_NOP, QPU_PACK_MUL_NOP,
QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */ /* replicated to each 8 bits of the 32-bit dst. */
QPU_PACK_MUL_8A, QPU_PACK_MUL_8888 = 3,
QPU_PACK_MUL_8B, QPU_PACK_MUL_8A,
QPU_PACK_MUL_8C, QPU_PACK_MUL_8B,
QPU_PACK_MUL_8D, QPU_PACK_MUL_8C,
QPU_PACK_MUL_8D,
}; };
enum qpu_pack_a { enum qpu_pack_a {
QPU_PACK_A_NOP, QPU_PACK_A_NOP,
/* convert to 16 bit float if float input, or to int16. */ /* convert to 16 bit float if float input, or to int16. */
QPU_PACK_A_16A, QPU_PACK_A_16A,
QPU_PACK_A_16B, QPU_PACK_A_16B,
/* replicated to each 8 bits of the 32-bit dst. */ /* replicated to each 8 bits of the 32-bit dst. */
QPU_PACK_A_8888, QPU_PACK_A_8888,
/* Convert to 8-bit unsigned int. */ /* Convert to 8-bit unsigned int. */
QPU_PACK_A_8A, QPU_PACK_A_8A,
QPU_PACK_A_8B, QPU_PACK_A_8B,
QPU_PACK_A_8C, QPU_PACK_A_8C,
QPU_PACK_A_8D, QPU_PACK_A_8D,
/* Saturating variants of the previous instructions. */ /* Saturating variants of the previous instructions. */
QPU_PACK_A_32_SAT, /* int-only */ QPU_PACK_A_32_SAT, /* int-only */
QPU_PACK_A_16A_SAT, /* int or float */ QPU_PACK_A_16A_SAT, /* int or float */
QPU_PACK_A_16B_SAT, QPU_PACK_A_16B_SAT,
QPU_PACK_A_8888_SAT, QPU_PACK_A_8888_SAT,
QPU_PACK_A_8A_SAT, QPU_PACK_A_8A_SAT,
QPU_PACK_A_8B_SAT, QPU_PACK_A_8B_SAT,
QPU_PACK_A_8C_SAT, QPU_PACK_A_8C_SAT,
QPU_PACK_A_8D_SAT, QPU_PACK_A_8D_SAT,
}; };
enum qpu_unpack_r4 { enum qpu_unpack_r4 {
QPU_UNPACK_R4_NOP, QPU_UNPACK_R4_NOP,
QPU_UNPACK_R4_F16A_TO_F32, QPU_UNPACK_R4_F16A_TO_F32,
QPU_UNPACK_R4_F16B_TO_F32, QPU_UNPACK_R4_F16B_TO_F32,
QPU_UNPACK_R4_8D_REP, QPU_UNPACK_R4_8D_REP,
QPU_UNPACK_R4_8A, QPU_UNPACK_R4_8A,
QPU_UNPACK_R4_8B, QPU_UNPACK_R4_8B,
QPU_UNPACK_R4_8C, QPU_UNPACK_R4_8C,
QPU_UNPACK_R4_8D, QPU_UNPACK_R4_8D,
}; };
#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) #define QPU_MASK(high, low) \
/* Using the GNU statement expression extension */ ((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
#define QPU_SET_FIELD(value, field) \
({ \
uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
assert((fieldval & ~ field ## _MASK) == 0); \
fieldval & field ## _MASK; \
})
#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) #define QPU_GET_FIELD(word, field) \
((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
#define QPU_SIG_SHIFT 60 #define QPU_SIG_SHIFT 60
#define QPU_SIG_MASK QPU_MASK(63, 60) #define QPU_SIG_MASK QPU_MASK(63, 60)

View File

@@ -63,7 +63,6 @@ static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
setup->next_offset += 4; setup->next_offset += 4;
} }
/* /*
* Emits a no-op STORE_TILE_BUFFER_GENERAL. * Emits a no-op STORE_TILE_BUFFER_GENERAL.
* *
@@ -217,7 +216,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
} }
size += xtiles * ytiles * loop_body_size; size += xtiles * ytiles * loop_body_size;
setup->rcl = &vc4_bo_create(dev, size)->base; setup->rcl = &vc4_bo_create(dev, size, true)->base;
if (!setup->rcl) if (!setup->rcl)
return -ENOMEM; return -ENOMEM;
list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
@@ -256,6 +255,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
for (x = min_x_tile; x <= max_x_tile; x++) { for (x = min_x_tile; x <= max_x_tile; x++) {
bool first = (x == min_x_tile && y == min_y_tile); bool first = (x == min_x_tile && y == min_y_tile);
bool last = (x == max_x_tile && y == max_y_tile); bool last = (x == max_x_tile && y == max_y_tile);
emit_tile(exec, setup, x, y, first, last); emit_tile(exec, setup, x, y, first, last);
} }
} }

View File

@@ -125,7 +125,7 @@ int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
{ {
struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev; struct drm_device *dev = node->minor->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t ident1 = V3D_READ(V3D_IDENT1); uint32_t ident1 = V3D_READ(V3D_IDENT1);
@@ -133,11 +133,13 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS); uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS); uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
seq_printf(m, "Revision: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_REV)); seq_printf(m, "Revision: %d\n",
VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
seq_printf(m, "Slices: %d\n", nslc); seq_printf(m, "Slices: %d\n", nslc);
seq_printf(m, "TMUs: %d\n", nslc * tups); seq_printf(m, "TMUs: %d\n", nslc * tups);
seq_printf(m, "QPUs: %d\n", nslc * qups); seq_printf(m, "QPUs: %d\n", nslc * qups);
seq_printf(m, "Semaphores: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM)); seq_printf(m, "Semaphores: %d\n",
VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
return 0; return 0;
} }
@@ -218,7 +220,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
} }
static void vc4_v3d_unbind(struct device *dev, struct device *master, static void vc4_v3d_unbind(struct device *dev, struct device *master,
void *data) void *data)
{ {
struct drm_device *drm = dev_get_drvdata(master); struct drm_device *drm = dev_get_drvdata(master);
struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_dev *vc4 = to_vc4_dev(drm);

View File

@@ -48,7 +48,6 @@
void *validated, \ void *validated, \
void *untrusted void *untrusted
/** Return the width in pixels of a 64-byte microtile. */ /** Return the width in pixels of a 64-byte microtile. */
static uint32_t static uint32_t
utile_width(int cpp) utile_width(int cpp)
@@ -192,7 +191,7 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
if (size + offset < size || if (size + offset < size ||
size + offset > fbo->base.size) { size + offset > fbo->base.size) {
DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n", DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
width, height, width, height,
aligned_width, aligned_height, aligned_width, aligned_height,
size, offset, fbo->base.size); size, offset, fbo->base.size);
@@ -278,7 +277,7 @@ validate_indexed_prim_list(VALIDATE_ARGS)
if (offset > ib->base.size || if (offset > ib->base.size ||
(ib->base.size - offset) / index_size < length) { (ib->base.size - offset) / index_size < length) {
DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n", DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
offset, length, index_size, ib->base.size); offset, length, index_size, ib->base.size);
return -EINVAL; return -EINVAL;
} }
@@ -377,6 +376,7 @@ static int
validate_tile_binning_config(VALIDATE_ARGS) validate_tile_binning_config(VALIDATE_ARGS)
{ {
struct drm_device *dev = exec->exec_bo->base.dev; struct drm_device *dev = exec->exec_bo->base.dev;
struct vc4_bo *tile_bo;
uint8_t flags; uint8_t flags;
uint32_t tile_state_size, tile_alloc_size; uint32_t tile_state_size, tile_alloc_size;
uint32_t tile_count; uint32_t tile_count;
@@ -438,12 +438,12 @@ validate_tile_binning_config(VALIDATE_ARGS)
*/ */
tile_alloc_size += 1024 * 1024; tile_alloc_size += 1024 * 1024;
exec->tile_bo = &vc4_bo_create(dev, exec->tile_alloc_offset + tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
tile_alloc_size)->base; true);
exec->tile_bo = &tile_bo->base;
if (!exec->tile_bo) if (!exec->tile_bo)
return -ENOMEM; return -ENOMEM;
list_add_tail(&to_vc4_bo(&exec->tile_bo->base)->unref_head, list_add_tail(&tile_bo->unref_head, &exec->unref_list);
&exec->unref_list);
/* tile alloc address. */ /* tile alloc address. */
*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
@@ -463,8 +463,8 @@ validate_gem_handles(VALIDATE_ARGS)
return 0; return 0;
} }
#define VC4_DEFINE_PACKET(packet, name, func) \ #define VC4_DEFINE_PACKET(packet, func) \
[packet] = { packet ## _SIZE, name, func } [packet] = { packet ## _SIZE, #packet, func }
static const struct cmd_info { static const struct cmd_info {
uint16_t len; uint16_t len;
@@ -472,42 +472,43 @@ static const struct cmd_info {
int (*func)(struct vc4_exec_info *exec, void *validated, int (*func)(struct vc4_exec_info *exec, void *validated,
void *untrusted); void *untrusted);
} cmd_info[] = { } cmd_info[] = {
VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL), VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL), VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL), VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all), VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, validate_flush_all),
VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning), VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore), validate_start_tile_binning),
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
validate_increment_semaphore),
VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list), VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
validate_indexed_prim_list),
VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
validate_gl_array_primitive),
VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive), VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
/* This is only used by clipped primitives (packets 48 and 49), which VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
* we don't support parsing yet. VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, validate_nv_shader_state),
*/
VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state), VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state), VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL), VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL), VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL), VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL), VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL), VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL), VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
/* Note: The docs say this was also 105, but it was 106 in the /* Note: The docs say this was also 105, but it was 106 in the
* initial userland code drop. * initial userland code drop.
*/ */
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL), VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config), VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
validate_tile_binning_config),
VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles), VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
}; };
int int
@@ -526,7 +527,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
u8 cmd = *(uint8_t *)src_pkt; u8 cmd = *(uint8_t *)src_pkt;
const struct cmd_info *info; const struct cmd_info *info;
if (cmd > ARRAY_SIZE(cmd_info)) { if (cmd >= ARRAY_SIZE(cmd_info)) {
DRM_ERROR("0x%08x: packet %d out of bounds\n", DRM_ERROR("0x%08x: packet %d out of bounds\n",
src_offset, cmd); src_offset, cmd);
return -EINVAL; return -EINVAL;
@@ -539,11 +540,6 @@ vc4_validate_bin_cl(struct drm_device *dev,
return -EINVAL; return -EINVAL;
} }
#if 0
DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
src_offset, cmd, info->name, info->len);
#endif
if (src_offset + info->len > len) { if (src_offset + info->len > len) {
DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
"exceeds bounds (0x%08x)\n", "exceeds bounds (0x%08x)\n",
@@ -558,8 +554,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
if (info->func && info->func(exec, if (info->func && info->func(exec,
dst_pkt + 1, dst_pkt + 1,
src_pkt + 1)) { src_pkt + 1)) {
DRM_ERROR("0x%08x: packet %d (%s) failed to " DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
"validate\n",
src_offset, cmd, info->name); src_offset, cmd, info->name);
return -EINVAL; return -EINVAL;
} }
@@ -618,12 +613,14 @@ reloc_tex(struct vc4_exec_info *exec,
if (sample->is_direct) { if (sample->is_direct) {
uint32_t remaining_size = tex->base.size - p0; uint32_t remaining_size = tex->base.size - p0;
if (p0 > tex->base.size - 4) { if (p0 > tex->base.size - 4) {
DRM_ERROR("UBO offset greater than UBO size\n"); DRM_ERROR("UBO offset greater than UBO size\n");
goto fail; goto fail;
} }
if (p1 > remaining_size - 4) { if (p1 > remaining_size - 4) {
DRM_ERROR("UBO clamp would allow reads outside of UBO\n"); DRM_ERROR("UBO clamp would allow reads "
"outside of UBO\n");
goto fail; goto fail;
} }
*validated_p0 = tex->paddr + p0; *validated_p0 = tex->paddr + p0;
@@ -786,7 +783,7 @@ validate_shader_rec(struct drm_device *dev,
struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8]; struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size; uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
int i; int i;
struct vc4_validated_shader_info *validated_shader; struct vc4_validated_shader_info *shader;
if (state->packet == VC4_PACKET_NV_SHADER_STATE) { if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
relocs = nv_relocs; relocs = nv_relocs;
@@ -841,12 +838,12 @@ validate_shader_rec(struct drm_device *dev,
else else
mode = VC4_MODE_RENDER; mode = VC4_MODE_RENDER;
if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) { if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i]))
return false; return false;
}
} }
for (i = 0; i < nr_fixed_relocs; i++) { for (i = 0; i < nr_fixed_relocs; i++) {
struct vc4_bo *vc4_bo;
uint32_t o = relocs[i].offset; uint32_t o = relocs[i].offset;
uint32_t src_offset = *(uint32_t *)(pkt_u + o); uint32_t src_offset = *(uint32_t *)(pkt_u + o);
uint32_t *texture_handles_u; uint32_t *texture_handles_u;
@@ -858,34 +855,34 @@ validate_shader_rec(struct drm_device *dev,
switch (relocs[i].type) { switch (relocs[i].type) {
case RELOC_CODE: case RELOC_CODE:
if (src_offset != 0) { if (src_offset != 0) {
DRM_ERROR("Shaders must be at offset 0 of " DRM_ERROR("Shaders must be at offset 0 "
"the BO.\n"); "of the BO.\n");
goto fail; goto fail;
} }
validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader; vc4_bo = to_vc4_bo(&bo[i]->base);
if (!validated_shader) shader = vc4_bo->validated_shader;
if (!shader)
goto fail; goto fail;
if (validated_shader->uniforms_src_size > if (shader->uniforms_src_size > exec->uniforms_size) {
exec->uniforms_size) {
DRM_ERROR("Uniforms src buffer overflow\n"); DRM_ERROR("Uniforms src buffer overflow\n");
goto fail; goto fail;
} }
texture_handles_u = exec->uniforms_u; texture_handles_u = exec->uniforms_u;
uniform_data_u = (texture_handles_u + uniform_data_u = (texture_handles_u +
validated_shader->num_texture_samples); shader->num_texture_samples);
memcpy(exec->uniforms_v, uniform_data_u, memcpy(exec->uniforms_v, uniform_data_u,
validated_shader->uniforms_size); shader->uniforms_size);
for (tex = 0; for (tex = 0;
tex < validated_shader->num_texture_samples; tex < shader->num_texture_samples;
tex++) { tex++) {
if (!reloc_tex(exec, if (!reloc_tex(exec,
uniform_data_u, uniform_data_u,
&validated_shader->texture_samples[tex], &shader->texture_samples[tex],
texture_handles_u[tex])) { texture_handles_u[tex])) {
goto fail; goto fail;
} }
@@ -893,9 +890,9 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
exec->uniforms_u += validated_shader->uniforms_src_size; exec->uniforms_u += shader->uniforms_src_size;
exec->uniforms_v += validated_shader->uniforms_size; exec->uniforms_v += shader->uniforms_size;
exec->uniforms_p += validated_shader->uniforms_size; exec->uniforms_p += shader->uniforms_size;
break; break;
@@ -926,7 +923,8 @@ validate_shader_rec(struct drm_device *dev,
max_index = ((vbo->base.size - offset - attr_size) / max_index = ((vbo->base.size - offset - attr_size) /
stride); stride);
if (state->max_index > max_index) { if (state->max_index > max_index) {
DRM_ERROR("primitives use index %d out of supplied %d\n", DRM_ERROR("primitives use index %d out of "
"supplied %d\n",
state->max_index, max_index); state->max_index, max_index);
return -EINVAL; return -EINVAL;
} }

View File

@@ -24,24 +24,16 @@
/** /**
* DOC: Shader validator for VC4. * DOC: Shader validator for VC4.
* *
* The VC4 has no IOMMU between it and system memory. So, a user with access * The VC4 has no IOMMU between it and system memory, so a user with
* to execute shaders could escalate privilege by overwriting system memory * access to execute shaders could escalate privilege by overwriting
* (using the VPM write address register in the general-purpose DMA mode) or * system memory (using the VPM write address register in the
* reading system memory it shouldn't (reading it as a texture, or uniform * general-purpose DMA mode) or reading system memory it shouldn't
* data, or vertex data). * (reading it as a texture, or uniform data, or vertex data).
* *
* This walks over a shader starting from some offset within a BO, ensuring * This walks over a shader BO, ensuring that its accesses are
* that its accesses are appropriately bounded, and recording how many texture * appropriately bounded, and recording how many texture accesses are
* accesses are made and where so that we can do relocations for them in the * made and where so that we can do relocations for them in the
* uniform stream. * uniform stream.
*
* The kernel API has shaders stored in user-mapped BOs. The BOs will be
* forcibly unmapped from the process before validation, and any cache of
* validated state will be flushed if the mapping is faulted back in.
*
* Storing the shaders in BOs means that the validation process will be slow
* due to uncached reads, but since shaders are long-lived and shader BOs are
* never actually modified, this shouldn't be a problem.
*/ */
#include "vc4_drv.h" #include "vc4_drv.h"
@@ -70,7 +62,6 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b)
else else
return waddr; return waddr;
} else if (waddr <= QPU_W_ACC3) { } else if (waddr <= QPU_W_ACC3) {
return 64 + waddr - QPU_W_ACC0; return 64 + waddr - QPU_W_ACC0;
} else { } else {
return ~0; return ~0;
@@ -85,15 +76,14 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
if (add_a == QPU_MUX_A) { if (add_a == QPU_MUX_A)
return raddr_a; return raddr_a;
} else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) { else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
return 32 + raddr_b; return 32 + raddr_b;
} else if (add_a <= QPU_MUX_R3) { else if (add_a <= QPU_MUX_R3)
return 64 + add_a; return 64 + add_a;
} else { else
return ~0; return ~0;
}
} }
static bool static bool
@@ -111,9 +101,9 @@ is_tmu_write(uint32_t waddr)
} }
static bool static bool
record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader, record_texture_sample(struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state, struct vc4_shader_validation_state *validation_state,
int tmu) int tmu)
{ {
uint32_t s = validated_shader->num_texture_samples; uint32_t s = validated_shader->num_texture_samples;
int i; int i;
@@ -226,8 +216,8 @@ check_tmu_write(uint64_t inst,
validated_shader->uniforms_size += 4; validated_shader->uniforms_size += 4;
if (submit) { if (submit) {
if (!record_validated_texture_sample(validated_shader, if (!record_texture_sample(validated_shader,
validation_state, tmu)) { validation_state, tmu)) {
return false; return false;
} }
@@ -238,10 +228,10 @@ check_tmu_write(uint64_t inst,
} }
static bool static bool
check_register_write(uint64_t inst, check_reg_write(uint64_t inst,
struct vc4_validated_shader_info *validated_shader, struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state, struct vc4_shader_validation_state *validation_state,
bool is_mul) bool is_mul)
{ {
uint32_t waddr = (is_mul ? uint32_t waddr = (is_mul ?
QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
@@ -297,7 +287,7 @@ check_register_write(uint64_t inst,
return true; return true;
case QPU_W_TLB_STENCIL_SETUP: case QPU_W_TLB_STENCIL_SETUP:
return true; return true;
} }
return true; return true;
@@ -360,7 +350,7 @@ track_live_clamps(uint64_t inst,
} }
validation_state->live_max_clamp_regs[lri_add] = true; validation_state->live_max_clamp_regs[lri_add] = true;
} if (op_add == QPU_A_MIN) { } else if (op_add == QPU_A_MIN) {
/* Track live clamps of a value clamped to a minimum of 0 and /* Track live clamps of a value clamped to a minimum of 0 and
* a maximum of some uniform's offset. * a maximum of some uniform's offset.
*/ */
@@ -392,8 +382,10 @@ check_instruction_writes(uint64_t inst,
return false; return false;
} }
ok = (check_register_write(inst, validated_shader, validation_state, false) && ok = (check_reg_write(inst, validated_shader, validation_state,
check_register_write(inst, validated_shader, validation_state, true)); false) &&
check_reg_write(inst, validated_shader, validation_state,
true));
track_live_clamps(inst, validated_shader, validation_state); track_live_clamps(inst, validated_shader, validation_state);
@@ -441,7 +433,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
shader = shader_obj->vaddr; shader = shader_obj->vaddr;
max_ip = shader_obj->base.size / sizeof(uint64_t); max_ip = shader_obj->base.size / sizeof(uint64_t);
validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL); validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
if (!validated_shader) if (!validated_shader)
return NULL; return NULL;
@@ -497,7 +489,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
if (ip == max_ip) { if (ip == max_ip) {
DRM_ERROR("shader failed to terminate before " DRM_ERROR("shader failed to terminate before "
"shader BO end at %d\n", "shader BO end at %zd\n",
shader_obj->base.size); shader_obj->base.size);
goto fail; goto fail;
} }

View File

@@ -585,6 +585,13 @@ struct drm_driver {
int (*gem_open_object) (struct drm_gem_object *, struct drm_file *); int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
void (*gem_close_object) (struct drm_gem_object *, struct drm_file *); void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
/**
* Hook for allocating the GEM object struct, for use by core
* helpers.
*/
struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
size_t size);
/* prime: */ /* prime: */
/* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */ /* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv, int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
@@ -639,7 +646,6 @@ struct drm_driver {
u32 driver_features; u32 driver_features;
int dev_priv_size; int dev_priv_size;
size_t gem_obj_size;
const struct drm_ioctl_desc *ioctls; const struct drm_ioctl_desc *ioctls;
int num_ioctls; int num_ioctls;
const struct file_operations *fops; const struct file_operations *fops;