mirror of
https://github.com/raspberrypi/linux.git
synced 2026-01-05 10:47:34 +00:00
The original version duplicated more or less the same algorithms for both system and i/o memory. In this version the drawing algorithms (copy/fill/blit) are separate from the memory access (system and i/o). The two parts are getting combined in the loadable module sources. This also makes it more robust against wrong memory access type or alignment mistakes as there's no direct pointer access or arithmetic in the algorithm sources anymore. Due to liberal use of inlining the compiled result is a single function in all 6 cases, without unnecessary function calls. Unlike earlier the use of macros could be minimized as apparently both gcc and clang is capable now to do the same with inline functions just as well. What wasn't quite the same in the two variants is the support for pixel order reversing. This version is capable to do that for both system and I/O memory, and not only for the latter. As demand for low bits per pixel modes isn't high there's a configuration option to enable this separately for the CFB and SYS modules. The pixel reversing algorithm is different than earlier and was designed so that it can take advantage of bit order reversing instructions on architectures which have them. And even for higher bits per pixel modes like four bpp. One of the shortcomings of the earlier version was the incomplete support for foreign endian framebuffers. Now all three drawing algorithms produce correct output on both endians with native and foreign framebuffers. This is one of the important differences even if otherwise the algorithms don't look too different than before. All three routines work now with aligned native word accesses. As a consequence blitting isn't limited to 32 bits on 64 bit architectures as it was before. The old routines silently assumed that rows are a multiple of the word size. Due to how the new routines function this isn't a requirement any more and access will be done aligned regardless. However if the framebuffer is configured like that then some of the fast paths won't be available. As this code is supposed to be running on all supported architectures it wasn't optimized for a particular one. That doesn't mean I haven't looked at the disassembly. That's where I noticed that it isn't a good idea to use the fallback bitreversing code for example. The low bits per pixel modes should be faster than before as the new routines can blit 4 pixels at a time. On the higher bits per pixel modes I retained the specialized aligned routines so it should be more or less the same, except on 64 bit architectures. There the blitting word size is double now which means 32 BPP isn't done a single pixel a time now. The code was tested on x86, amd64, mips32 and mips64. The latter two in big endian configuration. Originally thought I can get away with the first two, but with such bit twisting code byte ordering is tricky and not really possible to get right without actually verifying it. While writing such routines isn't rocket science a lot of time was spent on making sure that pixel ordering, foreign byte order, various bits per pixels, cpu endianness and word size will give the expected result in all sorts of combinations without making it overly complicated or full with special cases. Signed-off-by: Zsolt Kajtar <soci@c64.rulez.org> Signed-off-by: Helge Deller <deller@gmx.de>
406 lines
11 KiB
C
406 lines
11 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only
|
|
*
|
|
* Generic bit area copy and twister engine for packed pixel framebuffers
|
|
*
|
|
* Rewritten by:
|
|
* Copyright (C) 2025 Zsolt Kajtar (soci@c64.rulez.org)
|
|
*
|
|
* Based on previous work of:
|
|
* Copyright (C) 1999-2005 James Simmons <jsimmons@www.infradead.org>
|
|
* Anton Vorontsov <avorontsov@ru.mvista.com>
|
|
* Pavel Pisa <pisa@cmp.felk.cvut.cz>
|
|
* Antonino Daplas <adaplas@hotpop.com>
|
|
* Geert Uytterhoeven
|
|
* and others
|
|
*
|
|
* NOTES:
|
|
*
|
|
* Handles native and foreign byte order on both endians, standard and
|
|
* reverse pixel order in a byte (<8 BPP), word length of 32/64 bits,
|
|
* bits per pixel from 1 to the word length. Handles line lengths at byte
|
|
* granularity while maintaining aligned accesses.
|
|
*
|
|
* Optimized routines for word aligned copying and byte aligned copying
|
|
* on reverse pixel framebuffers.
|
|
*/
|
|
#include "fb_draw.h"
|
|
|
|
/* used when no reversing is necessary */
|
|
static inline unsigned long fb_no_reverse(unsigned long val, struct fb_reverse reverse)
|
|
{
|
|
return val;
|
|
}
|
|
|
|
/* modifies the masked area in a word */
|
|
static inline void fb_copy_offset_masked(unsigned long mask, int offset,
|
|
const struct fb_address *dst,
|
|
const struct fb_address *src)
|
|
{
|
|
fb_modify_offset(fb_read_offset(offset, src), mask, offset, dst);
|
|
}
|
|
|
|
/* copies the whole word */
|
|
static inline void fb_copy_offset(int offset, const struct fb_address *dst,
|
|
const struct fb_address *src)
|
|
{
|
|
fb_write_offset(fb_read_offset(offset, src), offset, dst);
|
|
}
|
|
|
|
/* forward aligned copy */
|
|
static inline void fb_copy_aligned_fwd(const struct fb_address *dst,
|
|
const struct fb_address *src,
|
|
int end, struct fb_reverse reverse)
|
|
{
|
|
unsigned long first, last;
|
|
|
|
first = fb_pixel_mask(dst->bits, reverse);
|
|
last = ~fb_pixel_mask(end & (BITS_PER_LONG-1), reverse);
|
|
|
|
/* Same alignment for source and dest */
|
|
if (end <= BITS_PER_LONG) {
|
|
/* Single word */
|
|
last = last ? (last & first) : first;
|
|
|
|
/* Trailing bits */
|
|
if (last == ~0UL)
|
|
fb_copy_offset(0, dst, src);
|
|
else
|
|
fb_copy_offset_masked(last, 0, dst, src);
|
|
} else {
|
|
/* Multiple destination words */
|
|
int offset = first != ~0UL;
|
|
|
|
/* Leading bits */
|
|
if (offset)
|
|
fb_copy_offset_masked(first, 0, dst, src);
|
|
|
|
/* Main chunk */
|
|
end /= BITS_PER_LONG;
|
|
while (offset + 4 <= end) {
|
|
fb_copy_offset(offset + 0, dst, src);
|
|
fb_copy_offset(offset + 1, dst, src);
|
|
fb_copy_offset(offset + 2, dst, src);
|
|
fb_copy_offset(offset + 3, dst, src);
|
|
offset += 4;
|
|
}
|
|
while (offset < end)
|
|
fb_copy_offset(offset++, dst, src);
|
|
|
|
/* Trailing bits */
|
|
if (last)
|
|
fb_copy_offset_masked(last, offset, dst, src);
|
|
}
|
|
}
|
|
|
|
/* reverse aligned copy */
|
|
static inline void fb_copy_aligned_rev(const struct fb_address *dst,
|
|
const struct fb_address *src,
|
|
int end, struct fb_reverse reverse)
|
|
{
|
|
unsigned long first, last;
|
|
|
|
first = fb_pixel_mask(dst->bits, reverse);
|
|
last = ~fb_pixel_mask(end & (BITS_PER_LONG-1), reverse);
|
|
|
|
if (end <= BITS_PER_LONG) {
|
|
/* Single word */
|
|
if (last)
|
|
first &= last;
|
|
if (first == ~0UL)
|
|
fb_copy_offset(0, dst, src);
|
|
else
|
|
fb_copy_offset_masked(first, 0, dst, src);
|
|
} else {
|
|
/* Multiple destination words */
|
|
int offset = first != ~0UL;
|
|
|
|
/* Trailing bits */
|
|
end /= BITS_PER_LONG;
|
|
|
|
if (last)
|
|
fb_copy_offset_masked(last, end, dst, src);
|
|
|
|
/* Main chunk */
|
|
while (end >= offset + 4) {
|
|
fb_copy_offset(end - 1, dst, src);
|
|
fb_copy_offset(end - 2, dst, src);
|
|
fb_copy_offset(end - 3, dst, src);
|
|
fb_copy_offset(end - 4, dst, src);
|
|
end -= 4;
|
|
}
|
|
while (end > offset)
|
|
fb_copy_offset(--end, dst, src);
|
|
|
|
/* Leading bits */
|
|
if (offset)
|
|
fb_copy_offset_masked(first, 0, dst, src);
|
|
}
|
|
}
|
|
|
|
static inline void fb_copy_aligned(struct fb_address *dst, struct fb_address *src,
|
|
int width, u32 height, unsigned int bits_per_line,
|
|
struct fb_reverse reverse, bool rev_copy)
|
|
{
|
|
if (rev_copy)
|
|
while (height--) {
|
|
fb_copy_aligned_rev(dst, src, width + dst->bits, reverse);
|
|
fb_address_backward(dst, bits_per_line);
|
|
fb_address_backward(src, bits_per_line);
|
|
}
|
|
else
|
|
while (height--) {
|
|
fb_copy_aligned_fwd(dst, src, width + dst->bits, reverse);
|
|
fb_address_forward(dst, bits_per_line);
|
|
fb_address_forward(src, bits_per_line);
|
|
}
|
|
}
|
|
|
|
static __always_inline void fb_copy_fwd(const struct fb_address *dst,
|
|
const struct fb_address *src, int width,
|
|
unsigned long (*reorder)(unsigned long val,
|
|
struct fb_reverse reverse),
|
|
struct fb_reverse reverse)
|
|
{
|
|
unsigned long first, last;
|
|
unsigned long d0, d1;
|
|
int end = dst->bits + width;
|
|
int shift, left, right;
|
|
|
|
first = fb_pixel_mask(dst->bits, reverse);
|
|
last = ~fb_pixel_mask(end & (BITS_PER_LONG-1), reverse);
|
|
|
|
shift = dst->bits - src->bits;
|
|
right = shift & (BITS_PER_LONG - 1);
|
|
left = -shift & (BITS_PER_LONG - 1);
|
|
|
|
if (end <= BITS_PER_LONG) {
|
|
/* Single destination word */
|
|
last = last ? (last & first) : first;
|
|
if (shift < 0) {
|
|
d0 = fb_left(reorder(fb_read_offset(-1, src), reverse), left);
|
|
if (src->bits + width > BITS_PER_LONG)
|
|
d0 |= fb_right(reorder(fb_read_offset(0, src), reverse), right);
|
|
|
|
if (last == ~0UL)
|
|
fb_write_offset(reorder(d0, reverse), 0, dst);
|
|
else
|
|
fb_modify_offset(reorder(d0, reverse), last, 0, dst);
|
|
} else {
|
|
d0 = fb_right(reorder(fb_read_offset(0, src), reverse), right);
|
|
fb_modify_offset(reorder(d0, reverse), last, 0, dst);
|
|
}
|
|
} else {
|
|
/* Multiple destination words */
|
|
int offset = first != ~0UL;
|
|
|
|
/* Leading bits */
|
|
if (shift < 0)
|
|
d0 = reorder(fb_read_offset(-1, src), reverse);
|
|
else
|
|
d0 = 0;
|
|
|
|
/* 2 source words */
|
|
if (offset) {
|
|
d1 = reorder(fb_read_offset(0, src), reverse);
|
|
d0 = fb_left(d0, left) | fb_right(d1, right);
|
|
fb_modify_offset(reorder(d0, reverse), first, 0, dst);
|
|
d0 = d1;
|
|
}
|
|
|
|
/* Main chunk */
|
|
end /= BITS_PER_LONG;
|
|
if (reorder == fb_no_reverse)
|
|
while (offset + 4 <= end) {
|
|
d1 = fb_read_offset(offset + 0, src);
|
|
d0 = fb_left(d0, left) | fb_right(d1, right);
|
|
fb_write_offset(d0, offset + 0, dst);
|
|
d0 = d1;
|
|
d1 = fb_read_offset(offset + 1, src);
|
|
d0 = fb_left(d0, left) | fb_right(d1, right);
|
|
fb_write_offset(d0, offset + 1, dst);
|
|
d0 = d1;
|
|
d1 = fb_read_offset(offset + 2, src);
|
|
d0 = fb_left(d0, left) | fb_right(d1, right);
|
|
fb_write_offset(d0, offset + 2, dst);
|
|
d0 = d1;
|
|
d1 = fb_read_offset(offset + 3, src);
|
|
d0 = fb_left(d0, left) | fb_right(d1, right);
|
|
fb_write_offset(d0, offset + 3, dst);
|
|
d0 = d1;
|
|
offset += 4;
|
|
}
|
|
|
|
while (offset < end) {
|
|
d1 = reorder(fb_read_offset(offset, src), reverse);
|
|
d0 = fb_left(d0, left) | fb_right(d1, right);
|
|
fb_write_offset(reorder(d0, reverse), offset, dst);
|
|
d0 = d1;
|
|
offset++;
|
|
}
|
|
|
|
/* Trailing bits */
|
|
if (last) {
|
|
d0 = fb_left(d0, left);
|
|
if (src->bits + width
|
|
> offset * BITS_PER_LONG + ((shift < 0) ? BITS_PER_LONG : 0))
|
|
d0 |= fb_right(reorder(fb_read_offset(offset, src), reverse),
|
|
right);
|
|
fb_modify_offset(reorder(d0, reverse), last, offset, dst);
|
|
}
|
|
}
|
|
}
|
|
|
|
static __always_inline void fb_copy_rev(const struct fb_address *dst,
|
|
const struct fb_address *src, int end,
|
|
unsigned long (*reorder)(unsigned long val,
|
|
struct fb_reverse reverse),
|
|
struct fb_reverse reverse)
|
|
{
|
|
unsigned long first, last;
|
|
unsigned long d0, d1;
|
|
int shift, left, right;
|
|
|
|
first = fb_pixel_mask(dst->bits, reverse);
|
|
last = ~fb_pixel_mask(end & (BITS_PER_LONG-1), reverse);
|
|
|
|
shift = dst->bits - src->bits;
|
|
right = shift & (BITS_PER_LONG-1);
|
|
left = -shift & (BITS_PER_LONG-1);
|
|
|
|
if (end <= BITS_PER_LONG) {
|
|
/* Single destination word */
|
|
if (last)
|
|
first &= last;
|
|
|
|
if (shift > 0) {
|
|
d0 = fb_right(reorder(fb_read_offset(1, src), reverse), right);
|
|
if (src->bits > left)
|
|
d0 |= fb_left(reorder(fb_read_offset(0, src), reverse), left);
|
|
fb_modify_offset(reorder(d0, reverse), first, 0, dst);
|
|
} else {
|
|
d0 = fb_left(reorder(fb_read_offset(0, src), reverse), left);
|
|
if (src->bits + end - dst->bits > BITS_PER_LONG)
|
|
d0 |= fb_right(reorder(fb_read_offset(1, src), reverse), right);
|
|
if (first == ~0UL)
|
|
fb_write_offset(reorder(d0, reverse), 0, dst);
|
|
else
|
|
fb_modify_offset(reorder(d0, reverse), first, 0, dst);
|
|
}
|
|
} else {
|
|
/* Multiple destination words */
|
|
int offset = first != ~0UL;
|
|
|
|
end /= BITS_PER_LONG;
|
|
|
|
/* 2 source words */
|
|
if (fb_right(~0UL, right) & last)
|
|
d0 = fb_right(reorder(fb_read_offset(end + 1, src), reverse), right);
|
|
else
|
|
d0 = 0;
|
|
|
|
/* Trailing bits */
|
|
d1 = reorder(fb_read_offset(end, src), reverse);
|
|
if (last)
|
|
fb_modify_offset(reorder(fb_left(d1, left) | d0, reverse),
|
|
last, end, dst);
|
|
d0 = d1;
|
|
|
|
/* Main chunk */
|
|
if (reorder == fb_no_reverse)
|
|
while (end >= offset + 4) {
|
|
d1 = fb_read_offset(end - 1, src);
|
|
d0 = fb_left(d1, left) | fb_right(d0, right);
|
|
fb_write_offset(d0, end - 1, dst);
|
|
d0 = d1;
|
|
d1 = fb_read_offset(end - 2, src);
|
|
d0 = fb_left(d1, left) | fb_right(d0, right);
|
|
fb_write_offset(d0, end - 2, dst);
|
|
d0 = d1;
|
|
d1 = fb_read_offset(end - 3, src);
|
|
d0 = fb_left(d1, left) | fb_right(d0, right);
|
|
fb_write_offset(d0, end - 3, dst);
|
|
d0 = d1;
|
|
d1 = fb_read_offset(end - 4, src);
|
|
d0 = fb_left(d1, left) | fb_right(d0, right);
|
|
fb_write_offset(d0, end - 4, dst);
|
|
d0 = d1;
|
|
end -= 4;
|
|
}
|
|
|
|
while (end > offset) {
|
|
end--;
|
|
d1 = reorder(fb_read_offset(end, src), reverse);
|
|
d0 = fb_left(d1, left) | fb_right(d0, right);
|
|
fb_write_offset(reorder(d0, reverse), end, dst);
|
|
d0 = d1;
|
|
}
|
|
|
|
/* Leading bits */
|
|
if (offset) {
|
|
d0 = fb_right(d0, right);
|
|
if (src->bits > left)
|
|
d0 |= fb_left(reorder(fb_read_offset(0, src), reverse), left);
|
|
fb_modify_offset(reorder(d0, reverse), first, 0, dst);
|
|
}
|
|
}
|
|
}
|
|
|
|
static __always_inline void fb_copy(struct fb_address *dst, struct fb_address *src,
|
|
int width, u32 height, unsigned int bits_per_line,
|
|
unsigned long (*reorder)(unsigned long val,
|
|
struct fb_reverse reverse),
|
|
struct fb_reverse reverse, bool rev_copy)
|
|
{
|
|
if (rev_copy)
|
|
while (height--) {
|
|
int move = src->bits < dst->bits ? -1 : 0;
|
|
|
|
fb_address_move_long(src, move);
|
|
fb_copy_rev(dst, src, width + dst->bits, reorder, reverse);
|
|
fb_address_backward(dst, bits_per_line);
|
|
fb_address_backward(src, bits_per_line);
|
|
fb_address_move_long(src, -move);
|
|
}
|
|
else
|
|
while (height--) {
|
|
int move = src->bits > dst->bits ? 1 : 0;
|
|
|
|
fb_address_move_long(src, move);
|
|
fb_copy_fwd(dst, src, width, reorder, reverse);
|
|
fb_address_forward(dst, bits_per_line);
|
|
fb_address_forward(src, bits_per_line);
|
|
fb_address_move_long(src, -move);
|
|
}
|
|
}
|
|
|
|
static inline void fb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
|
|
{
|
|
int bpp = p->var.bits_per_pixel;
|
|
u32 dy = area->dy;
|
|
u32 sy = area->sy;
|
|
u32 height = area->height;
|
|
int width = area->width * bpp;
|
|
unsigned int bits_per_line = BYTES_TO_BITS(p->fix.line_length);
|
|
struct fb_reverse reverse = fb_reverse_init(p);
|
|
struct fb_address dst = fb_address_init(p);
|
|
struct fb_address src = dst;
|
|
bool rev_copy = (dy > sy) || (dy == sy && area->dx > area->sx);
|
|
|
|
if (rev_copy) {
|
|
dy += height - 1;
|
|
sy += height - 1;
|
|
}
|
|
fb_address_forward(&dst, dy*bits_per_line + area->dx*bpp);
|
|
fb_address_forward(&src, sy*bits_per_line + area->sx*bpp);
|
|
|
|
if (src.bits == dst.bits)
|
|
fb_copy_aligned(&dst, &src, width, height, bits_per_line, reverse, rev_copy);
|
|
else if (!reverse.byte && (!reverse.pixel ||
|
|
!((src.bits ^ dst.bits) & (BITS_PER_BYTE-1)))) {
|
|
fb_copy(&dst, &src, width, height, bits_per_line,
|
|
fb_no_reverse, reverse, rev_copy);
|
|
} else
|
|
fb_copy(&dst, &src, width, height, bits_per_line,
|
|
fb_reverse_long, reverse, rev_copy);
|
|
}
|