vchiq_arm: Two cacheing fixes

1) Make fragment size vary with cache line size
Without this patch, non-cache-line-aligned transfers may corrupt
(or be corrupted by) adjacent data structures.

Both ARM and VC need to be updated to enable this feature. This is
ensured by having the loader apply a new DT parameter -
cache-line-size. The existence of this parameter guarantees that the
kernel is capable, and the parameter will only be modified from the
safe default if the loader is capable.

2) Flush/invalidate vmalloc'd memory, and invalidate after reads
This commit is contained in:
Phil Elwell
2015-07-08 14:48:57 +01:00
committed by popcornmix
parent 08cb2a5643
commit fe05ba5439
2 changed files with 77 additions and 40 deletions

View File

@@ -218,6 +218,7 @@
compatible = "brcm,bcm2835-vchiq";
reg = <0x7e00b840 0xf>;
interrupts = <0 2>;
cache-line-size = <32>;
};
thermal: thermal {
@@ -270,4 +271,8 @@
clock-frequency = <126000000>;
};
};
__overrides__ {
cache_line_size = <&vchiq>, "cache-line-size:0";
};
};

View File

@@ -42,6 +42,7 @@
#include <linux/platform_data/mailbox-bcm2708.h>
#include <linux/platform_device.h>
#include <linux/uaccess.h>
#include <linux/of.h>
#include <asm/pgtable.h>
#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
@@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
} VCHIQ_2835_ARM_STATE_T;
static void __iomem *g_regs;
static FRAGMENTS_T *g_fragments_base;
static FRAGMENTS_T *g_free_fragments;
static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
static unsigned int g_fragments_size;
static char *g_fragments_base;
static char *g_free_fragments;
static struct semaphore g_free_fragments_sema;
static unsigned long g_virt_to_bus_offset;
@@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
(void)of_property_read_u32(dev->of_node, "cache-line-size",
&g_cache_line_size);
g_fragments_size = 2 * g_cache_line_size;
/* Allocate space for the channels in coherent memory */
slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
&slot_phys, GFP_KERNEL);
@@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
MAX_FRAGMENTS;
g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
g_fragments_base = (char *)slot_mem + slot_mem_size;
slot_mem_size += frag_mem_size;
g_free_fragments = g_fragments_base;
for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
*(FRAGMENTS_T **)&g_fragments_base[i] =
&g_fragments_base[i + 1];
*(char **)&g_fragments_base[i*g_fragments_size] =
&g_fragments_base[(i + 1)*g_fragments_size];
}
*(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
*(char **)&g_fragments_base[i * g_fragments_size] = NULL;
sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
@@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id)
** cached area.
** N.B. This implementation plays slightly fast and loose with the Linux
** driver programming rules, e.g. its use of __virt_to_bus instead of
** driver programming rules, e.g. its use of dmac_map_area instead of
** dma_map_single, but it isn't a multi-platform driver and it benefits
** from increased speed as a result.
*/
@@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
{
PAGELIST_T *pagelist;
struct page **pages;
struct page *page;
unsigned long *addrs;
unsigned int num_pages, offset, i;
char *addr, *base_addr, *next_addr;
@@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
pages = (struct page **)(addrs + num_pages + 1);
if (is_vmalloc_addr(buf)) {
for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
int dir = (type == PAGELIST_WRITE) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
unsigned long length = pagelist->length;
unsigned int offset = pagelist->offset;
for (actual_pages = 0; actual_pages < num_pages;
actual_pages++) {
struct page *pg = vmalloc_to_page(buf + (actual_pages *
PAGE_SIZE));
size_t bytes = PAGE_SIZE - offset;
if (bytes > length)
bytes = length;
pages[actual_pages] = pg;
dmac_map_area(page_address(pg) + offset, bytes, dir);
length -= bytes;
offset = 0;
}
*need_release = 0; /* do not try and release vmalloc pages */
*need_release = 0; /* do not try and release vmalloc pages */
} else {
down_read(&task->mm->mmap_sem);
actual_pages = get_user_pages(task, task->mm,
@@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
actual_pages = -ENOMEM;
return actual_pages;
}
*need_release = 1; /* release user pages */
*need_release = 1; /* release user pages */
}
pagelist->length = count;
@@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
/* Partial cache lines (fragments) require special measures */
if ((type == PAGELIST_READ) &&
((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
((pagelist->offset & (g_cache_line_size - 1)) ||
((pagelist->offset + pagelist->length) &
(CACHE_LINE_SIZE - 1)))) {
FRAGMENTS_T *fragments;
(g_cache_line_size - 1)))) {
char *fragments;
if (down_interruptible(&g_free_fragments_sema) != 0) {
kfree(pagelist);
@@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
WARN_ON(g_free_fragments == NULL);
down(&g_free_fragments_mutex);
fragments = (FRAGMENTS_T *) g_free_fragments;
fragments = g_free_fragments;
WARN_ON(fragments == NULL);
g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
g_free_fragments = *(char **) g_free_fragments;
up(&g_free_fragments_mutex);
pagelist->type =
PAGELIST_READ_WITH_FRAGMENTS + (fragments -
g_fragments_base);
pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
(fragments - g_fragments_base) / g_fragments_size;
}
for (page = virt_to_page(pagelist);
page <= virt_to_page(addrs + num_pages - 1); page++) {
flush_dcache_page(page);
}
dmac_flush_range(pagelist, addrs + num_pages);
*ppagelist = pagelist;
@@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int actual)
/* Deal with any partial cache lines (fragments) */
if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
FRAGMENTS_T *fragments = g_fragments_base +
(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
char *fragments = g_fragments_base +
(pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
g_fragments_size;
int head_bytes, tail_bytes;
head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
(CACHE_LINE_SIZE - 1);
head_bytes = (g_cache_line_size - pagelist->offset) &
(g_cache_line_size - 1);
tail_bytes = (pagelist->offset + actual) &
(CACHE_LINE_SIZE - 1);
(g_cache_line_size - 1);
if ((actual >= 0) && (head_bytes != 0)) {
if (head_bytes > actual)
@@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int actual)
memcpy((char *)page_address(pages[0]) +
pagelist->offset,
fragments->headbuf,
fragments,
head_bytes);
}
if ((actual >= 0) && (head_bytes < actual) &&
(tail_bytes != 0)) {
memcpy((char *)page_address(pages[num_pages - 1]) +
((pagelist->offset + actual) &
(PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
fragments->tailbuf, tail_bytes);
(PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
fragments + g_cache_line_size,
tail_bytes);
}
down(&g_free_fragments_mutex);
*(FRAGMENTS_T **) fragments = g_free_fragments;
*(char **)fragments = g_free_fragments;
g_free_fragments = fragments;
up(&g_free_fragments_mutex);
up(&g_free_fragments_sema);
}
if (*need_release) {
for (i = 0; i < num_pages; i++) {
if (pagelist->type != PAGELIST_WRITE)
set_page_dirty(pages[i]);
if (*need_release) {
unsigned int length = pagelist->length;
unsigned int offset = pagelist->offset;
page_cache_release(pages[i]);
for (i = 0; i < num_pages; i++) {
struct page *pg = pages[i];
if (pagelist->type != PAGELIST_WRITE) {
unsigned int bytes = PAGE_SIZE - offset;
if (bytes > length)
bytes = length;
dmac_unmap_area(page_address(pg) + offset,
bytes, DMA_FROM_DEVICE);
length -= bytes;
offset = 0;
set_page_dirty(pg);
}
page_cache_release(pg);
}
}
}
kfree(pagelist);
}