Files
linux/drivers/video/fbdev/core/fb_imageblit.h
Zsolt Kajtar eabb032930 fbdev: Refactoring the fbcon packed pixel drawing routines
The original version duplicated more or less the same algorithms for
both system and i/o memory.

In this version the drawing algorithms (copy/fill/blit) are separate
from the memory access (system and i/o). The two parts are getting
combined in the loadable module sources. This also makes it more robust
against wrong memory access type or alignment mistakes as there's no
direct pointer access or arithmetic in the algorithm sources anymore.

Due to liberal use of inlining the compiled result is a single function
in all 6 cases, without unnecessary function calls. Unlike earlier the
use of macros could be minimized as apparently both gcc and clang is
capable now to do the same with inline functions just as well.

What wasn't quite the same in the two variants is the support for pixel
order reversing. This version is capable to do that for both system and
I/O memory, and not only for the latter. As demand for low bits per
pixel modes isn't high there's a configuration option to enable this
separately for the CFB and SYS modules.

The pixel reversing algorithm is different than earlier and was designed
so that it can take advantage of bit order reversing instructions on
architectures which have them. And even for higher bits per pixel modes
like four bpp.

One of the shortcomings of the earlier version was the incomplete
support for foreign endian framebuffers. Now all three drawing
algorithms produce correct output on both endians with native and
foreign framebuffers. This is one of the important differences even if
otherwise the algorithms don't look too different than before.

All three routines work now with aligned native word accesses. As a
consequence blitting isn't limited to 32 bits on 64 bit architectures as
it was before.

The old routines silently assumed that rows are a multiple of the word
size. Due to how the new routines function this isn't a requirement any
more and access will be done aligned regardless. However if the
framebuffer is configured like that then some of the fast paths won't be
available.

As this code is supposed to be running on all supported architectures it
wasn't optimized for a particular one. That doesn't mean I haven't
looked at the disassembly. That's where I noticed that it isn't a good
idea to use the fallback bitreversing code for example.

The low bits per pixel modes should be faster than before as the new
routines can blit 4 pixels at a time.

On the higher bits per pixel modes I retained the specialized aligned
routines so it should be more or less the same, except on 64 bit
architectures. There the blitting word size is double now which means 32
BPP isn't done a single pixel a time now.

The code was tested on x86, amd64, mips32 and mips64. The latter two in
big endian configuration. Originally thought I can get away with the
first two, but with such bit twisting code byte ordering is tricky and
not really possible to get right without actually verifying it.

While writing such routines isn't rocket science a lot of time was spent
on making sure that pixel ordering, foreign byte order, various bits per
pixels, cpu endianness and word size will give the expected result in
all sorts of combinations without making it overly complicated or full
with special cases.

Signed-off-by: Zsolt Kajtar <soci@c64.rulez.org>
Signed-off-by: Helge Deller <deller@gmx.de>
2025-03-26 22:39:21 +01:00

496 lines
14 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only
*
* Generic bitmap / 8 bpp image bitstreamer for packed pixel framebuffers
*
* Rewritten by:
* Copyright (C) 2025 Zsolt Kajtar (soci@c64.rulez.org)
*
* Based on previous work of:
* Copyright (C) June 1999 James Simmons
* Anton Vorontsov <avorontsov@ru.mvista.com>
* Pavel Pisa <pisa@cmp.felk.cvut.cz>
* Antonino A. Daplas <adaplas@gmail.com>
* and others
*
* NOTES:
*
* Handles native and foreign byte order on both endians, standard and
* reverse pixel order in a byte (<8 BPP), word length of 32/64 bits,
* bits per pixel from 1 to the word length. Handles line lengths at byte
* granularity while maintaining aligned accesses.
*
* Optimized routines for word aligned 1, 2, 4 pixel per word for high
* bpp modes and 4 pixel at a time operation for low bpp.
*
* The color image is expected to be one byte per pixel, and values should
* not exceed the bitdepth or the pseudo palette (if used).
*/
#include "fb_draw.h"
/* bitmap image iterator, one pixel at a time */
struct fb_bitmap_iter {
const u8 *data;
unsigned long colors[2];
int width, i;
};
static bool fb_bitmap_image(void *iterator, unsigned long *pixels, int *bits)
{
struct fb_bitmap_iter *iter = iterator;
if (iter->i < iter->width) {
int bit = ~iter->i & (BITS_PER_BYTE-1);
int byte = iter->i++ / BITS_PER_BYTE;
*pixels = iter->colors[(iter->data[byte] >> bit) & 1];
return true;
}
iter->data += BITS_TO_BYTES(iter->width);
iter->i = 0;
return false;
}
/* color image iterator, one pixel at a time */
struct fb_color_iter {
const u8 *data;
const u32 *palette;
struct fb_reverse reverse;
int shift;
int width, i;
};
static bool fb_color_image(void *iterator, unsigned long *pixels, int *bits)
{
struct fb_color_iter *iter = iterator;
if (iter->i < iter->width) {
unsigned long color = iter->data[iter->i++];
if (iter->palette)
color = iter->palette[color];
*pixels = color << iter->shift;
if (iter->reverse.pixel)
*pixels = fb_reverse_bits_long(*pixels);
return true;
}
iter->data += iter->width;
iter->i = 0;
return false;
}
/* bitmap image iterator, 4 pixels at a time */
struct fb_bitmap4x_iter {
const u8 *data;
u32 fgxcolor, bgcolor;
int width, i;
const u32 *expand;
int bpp;
bool top;
};
static bool fb_bitmap4x_image(void *iterator, unsigned long *pixels, int *bits)
{
struct fb_bitmap4x_iter *iter = iterator;
u8 data;
if (iter->i >= BITS_PER_BYTE/2) {
iter->i -= BITS_PER_BYTE/2;
iter->top = !iter->top;
if (iter->top)
data = *iter->data++ >> BITS_PER_BYTE/2;
else
data = iter->data[-1] & ((1 << BITS_PER_BYTE/2)-1);
} else if (iter->i != 0) {
*bits = iter->bpp * iter->i;
if (iter->top)
data = iter->data[-1] & ((1 << BITS_PER_BYTE/2)-1);
else
data = *iter->data++ >> BITS_PER_BYTE/2;
#ifndef __LITTLE_ENDIAN
data >>= BITS_PER_BYTE/2 - iter->i;
#endif
iter->i = 0;
} else {
*bits = iter->bpp * BITS_PER_BYTE/2;
iter->i = iter->width;
iter->top = false;
return false;
}
*pixels = (iter->fgxcolor & iter->expand[data]) ^ iter->bgcolor;
#ifndef __LITTLE_ENDIAN
*pixels <<= BITS_PER_LONG - *bits;
#endif
return true;
}
/* draw a line a group of pixels at a time */
static __always_inline void fb_bitblit(bool (*get)(void *iter, unsigned long *pixels,
int *bits),
void *iter, int bits, struct fb_address *dst,
struct fb_reverse reverse)
{
unsigned long pixels, val, mask, old;
int offset = 0;
int shift = dst->bits;
if (shift) {
old = fb_read_offset(0, dst);
val = fb_reverse_long(old, reverse);
val &= ~fb_right(~0UL, shift);
} else {
old = 0;
val = 0;
}
while (get(iter, &pixels, &bits)) {
val |= fb_right(pixels, shift);
shift += bits;
if (shift < BITS_PER_LONG)
continue;
val = fb_reverse_long(val, reverse);
fb_write_offset(val, offset++, dst);
shift &= BITS_PER_LONG - 1;
val = !shift ? 0 : fb_left(pixels, bits - shift);
}
if (shift) {
mask = ~fb_pixel_mask(shift, reverse);
val = fb_reverse_long(val, reverse);
if (offset || !dst->bits)
old = fb_read_offset(offset, dst);
fb_write_offset(fb_comp(val, old, mask), offset, dst);
}
}
/* draw a color image a pixel at a time */
static inline void fb_color_imageblit(const struct fb_image *image, struct fb_address *dst,
unsigned int bits_per_line, const u32 *palette, int bpp,
struct fb_reverse reverse)
{
struct fb_color_iter iter;
u32 height;
iter.data = (const u8 *)image->data;
iter.palette = palette;
iter.reverse = reverse;
#ifdef __LITTLE_ENDIAN
if (reverse.pixel)
iter.shift = BITS_PER_BYTE - bpp;
else
iter.shift = 0;
#else
if (reverse.pixel)
iter.shift = BITS_PER_LONG - BITS_PER_BYTE;
else
iter.shift = BITS_PER_LONG - bpp;
#endif
iter.width = image->width;
iter.i = 0;
height = image->height;
while (height--) {
fb_bitblit(fb_color_image, &iter, bpp, dst, reverse);
fb_address_forward(dst, bits_per_line);
}
}
#ifdef __LITTLE_ENDIAN
#define FB_GEN(a, b) (((a)/8+(((a)&4)<<((b)-2)) \
+(((a)&2)<<((b)*2-1))+(((a)&1u)<<((b)*3)))*((1<<(b))-1))
#define FB_GEN1(a) ((a)/8+((a)&4)/2+((a)&2)*2+((a)&1)*8)
#else
#define FB_GEN(a, b) (((((a)/8)<<((b)*3))+(((a)&4)<<((b)*2-2)) \
+(((a)&2)<<(b-1))+((a)&1u))*((1<<(b))-1))
#define FB_GEN1(a) (a)
#endif
#define FB_GENx(a) { FB_GEN(0, (a)), FB_GEN(1, (a)), FB_GEN(2, (a)), FB_GEN(3, (a)), \
FB_GEN(4, (a)), FB_GEN(5, (a)), FB_GEN(6, (a)), FB_GEN(7, (a)), \
FB_GEN(8, (a)), FB_GEN(9, (a)), FB_GEN(10, (a)), FB_GEN(11, (a)), \
FB_GEN(12, (a)), FB_GEN(13, (a)), FB_GEN(14, (a)), FB_GEN(15, (a)) }
/* draw a 2 color image four pixels at a time (for 1-8 bpp only) */
static inline void fb_bitmap4x_imageblit(const struct fb_image *image, struct fb_address *dst,
unsigned long fgcolor, unsigned long bgcolor, int bpp,
unsigned int bits_per_line, struct fb_reverse reverse)
{
static const u32 mul[BITS_PER_BYTE] = {
0xf, 0x55, 0x249, 0x1111, 0x8421, 0x41041, 0x204081, 0x01010101
};
static const u32 expand[BITS_PER_BYTE][1 << 4] = {
{
FB_GEN1(0), FB_GEN1(1), FB_GEN1(2), FB_GEN1(3),
FB_GEN1(4), FB_GEN1(5), FB_GEN1(6), FB_GEN1(7),
FB_GEN1(8), FB_GEN1(9), FB_GEN1(10), FB_GEN1(11),
FB_GEN1(12), FB_GEN1(13), FB_GEN1(14), FB_GEN1(15)
},
FB_GENx(2), FB_GENx(3), FB_GENx(4),
FB_GENx(5), FB_GENx(6), FB_GENx(7), FB_GENx(8),
};
struct fb_bitmap4x_iter iter;
u32 height;
iter.data = (const u8 *)image->data;
if (reverse.pixel) {
fgcolor = fb_reverse_bits_long(fgcolor << (BITS_PER_BYTE - bpp));
bgcolor = fb_reverse_bits_long(bgcolor << (BITS_PER_BYTE - bpp));
}
iter.fgxcolor = (fgcolor ^ bgcolor) * mul[bpp-1];
iter.bgcolor = bgcolor * mul[bpp-1];
iter.width = image->width;
iter.i = image->width;
iter.expand = expand[bpp-1];
iter.bpp = bpp;
iter.top = false;
height = image->height;
while (height--) {
fb_bitblit(fb_bitmap4x_image, &iter, bpp * BITS_PER_BYTE/2, dst, reverse);
fb_address_forward(dst, bits_per_line);
}
}
/* draw a bitmap image 1 pixel at a time (for >8 bpp) */
static inline void fb_bitmap1x_imageblit(const struct fb_image *image, struct fb_address *dst,
unsigned long fgcolor, unsigned long bgcolor, int bpp,
unsigned int bits_per_line, struct fb_reverse reverse)
{
struct fb_bitmap_iter iter;
u32 height;
iter.colors[0] = bgcolor;
iter.colors[1] = fgcolor;
#ifndef __LITTLE_ENDIAN
iter.colors[0] <<= BITS_PER_LONG - bpp;
iter.colors[1] <<= BITS_PER_LONG - bpp;
#endif
iter.data = (const u8 *)image->data;
iter.width = image->width;
iter.i = 0;
height = image->height;
while (height--) {
fb_bitblit(fb_bitmap_image, &iter, bpp, dst, reverse);
fb_address_forward(dst, bits_per_line);
}
}
/* one pixel per word, 64/32 bpp blitting */
static inline void fb_bitmap_1ppw(const struct fb_image *image, struct fb_address *dst,
unsigned long fgcolor, unsigned long bgcolor,
int words_per_line, struct fb_reverse reverse)
{
unsigned long tab[2];
const u8 *src = (u8 *)image->data;
int width = image->width;
int offset;
u32 height;
if (reverse.byte) {
tab[0] = swab_long(bgcolor);
tab[1] = swab_long(fgcolor);
} else {
tab[0] = bgcolor;
tab[1] = fgcolor;
}
height = image->height;
while (height--) {
for (offset = 0; offset + 8 <= width; offset += 8) {
unsigned int srcbyte = *src++;
fb_write_offset(tab[(srcbyte >> 7) & 1], offset + 0, dst);
fb_write_offset(tab[(srcbyte >> 6) & 1], offset + 1, dst);
fb_write_offset(tab[(srcbyte >> 5) & 1], offset + 2, dst);
fb_write_offset(tab[(srcbyte >> 4) & 1], offset + 3, dst);
fb_write_offset(tab[(srcbyte >> 3) & 1], offset + 4, dst);
fb_write_offset(tab[(srcbyte >> 2) & 1], offset + 5, dst);
fb_write_offset(tab[(srcbyte >> 1) & 1], offset + 6, dst);
fb_write_offset(tab[(srcbyte >> 0) & 1], offset + 7, dst);
}
if (offset < width) {
unsigned int srcbyte = *src++;
while (offset < width) {
fb_write_offset(tab[(srcbyte >> 7) & 1], offset, dst);
srcbyte <<= 1;
offset++;
}
}
fb_address_move_long(dst, words_per_line);
}
}
static inline unsigned long fb_pack(unsigned long left, unsigned long right, int bits)
{
#ifdef __LITTLE_ENDIAN
return left | right << bits;
#else
return right | left << bits;
#endif
}
/* aligned 32/16 bpp blitting */
static inline void fb_bitmap_2ppw(const struct fb_image *image, struct fb_address *dst,
unsigned long fgcolor, unsigned long bgcolor,
int words_per_line, struct fb_reverse reverse)
{
unsigned long tab[4];
const u8 *src = (u8 *)image->data;
int width = image->width / 2;
int offset;
u32 height;
tab[0] = fb_pack(bgcolor, bgcolor, BITS_PER_LONG/2);
tab[1] = fb_pack(bgcolor, fgcolor, BITS_PER_LONG/2);
tab[2] = fb_pack(fgcolor, bgcolor, BITS_PER_LONG/2);
tab[3] = fb_pack(fgcolor, fgcolor, BITS_PER_LONG/2);
if (reverse.byte) {
tab[0] = swab_long(tab[0]);
tab[1] = swab_long(tab[1]);
tab[2] = swab_long(tab[2]);
tab[3] = swab_long(tab[3]);
}
height = image->height;
while (height--) {
for (offset = 0; offset + 4 <= width; offset += 4) {
unsigned int srcbyte = *src++;
fb_write_offset(tab[(srcbyte >> 6) & 3], offset + 0, dst);
fb_write_offset(tab[(srcbyte >> 4) & 3], offset + 1, dst);
fb_write_offset(tab[(srcbyte >> 2) & 3], offset + 2, dst);
fb_write_offset(tab[(srcbyte >> 0) & 3], offset + 3, dst);
}
if (offset < width) {
unsigned int srcbyte = *src++;
while (offset < width) {
fb_write_offset(tab[(srcbyte >> 6) & 3], offset, dst);
srcbyte <<= 2;
offset++;
}
}
fb_address_move_long(dst, words_per_line);
}
}
#define FB_PATP(a, b) (((a)<<((b)*BITS_PER_LONG/4))*((1UL<<BITS_PER_LONG/4)-1UL))
#define FB_PAT4(a) (FB_PATP((a)&1, 0)|FB_PATP(((a)&2)/2, 1)| \
FB_PATP(((a)&4)/4, 2)|FB_PATP(((a)&8)/8, 3))
/* aligned 16/8 bpp blitting */
static inline void fb_bitmap_4ppw(const struct fb_image *image, struct fb_address *dst,
unsigned long fgcolor, unsigned long bgcolor,
int words_per_line, struct fb_reverse reverse)
{
static const unsigned long tab16_be[] = {
0, FB_PAT4(1UL), FB_PAT4(2UL), FB_PAT4(3UL),
FB_PAT4(4UL), FB_PAT4(5UL), FB_PAT4(6UL), FB_PAT4(7UL),
FB_PAT4(8UL), FB_PAT4(9UL), FB_PAT4(10UL), FB_PAT4(11UL),
FB_PAT4(12UL), FB_PAT4(13UL), FB_PAT4(14UL), ~0UL
};
static const unsigned long tab16_le[] = {
0, FB_PAT4(8UL), FB_PAT4(4UL), FB_PAT4(12UL),
FB_PAT4(2UL), FB_PAT4(10UL), FB_PAT4(6UL), FB_PAT4(14UL),
FB_PAT4(1UL), FB_PAT4(9UL), FB_PAT4(5UL), FB_PAT4(13UL),
FB_PAT4(3UL), FB_PAT4(11UL), FB_PAT4(7UL), ~0UL
};
const unsigned long *tab;
const u8 *src = (u8 *)image->data;
int width = image->width / 4;
int offset;
u32 height;
fgcolor = fgcolor | fgcolor << BITS_PER_LONG/4;
bgcolor = bgcolor | bgcolor << BITS_PER_LONG/4;
fgcolor = fgcolor | fgcolor << BITS_PER_LONG/2;
bgcolor = bgcolor | bgcolor << BITS_PER_LONG/2;
fgcolor ^= bgcolor;
if (BITS_PER_LONG/4 > BITS_PER_BYTE && reverse.byte) {
fgcolor = swab_long(fgcolor);
bgcolor = swab_long(bgcolor);
}
#ifdef __LITTLE_ENDIAN
tab = reverse.byte ? tab16_be : tab16_le;
#else
tab = reverse.byte ? tab16_le : tab16_be;
#endif
height = image->height;
while (height--) {
for (offset = 0; offset + 2 <= width; offset += 2, src++) {
fb_write_offset((fgcolor & tab[*src >> 4]) ^ bgcolor, offset + 0, dst);
fb_write_offset((fgcolor & tab[*src & 0xf]) ^ bgcolor, offset + 1, dst);
}
if (offset < width)
fb_write_offset((fgcolor & tab[*src++ >> 4]) ^ bgcolor, offset, dst);
fb_address_move_long(dst, words_per_line);
}
}
static inline void fb_bitmap_imageblit(const struct fb_image *image, struct fb_address *dst,
unsigned int bits_per_line, const u32 *palette, int bpp,
struct fb_reverse reverse)
{
unsigned long fgcolor, bgcolor;
if (palette) {
fgcolor = palette[image->fg_color];
bgcolor = palette[image->bg_color];
} else {
fgcolor = image->fg_color;
bgcolor = image->bg_color;
}
if (!dst->bits && !(bits_per_line & (BITS_PER_LONG-1))) {
if (bpp == BITS_PER_LONG && BITS_PER_LONG == 32) {
fb_bitmap_1ppw(image, dst, fgcolor, bgcolor,
bits_per_line / BITS_PER_LONG, reverse);
return;
}
if (bpp == BITS_PER_LONG/2 && !(image->width & 1)) {
fb_bitmap_2ppw(image, dst, fgcolor, bgcolor,
bits_per_line / BITS_PER_LONG, reverse);
return;
}
if (bpp == BITS_PER_LONG/4 && !(image->width & 3)) {
fb_bitmap_4ppw(image, dst, fgcolor, bgcolor,
bits_per_line / BITS_PER_LONG, reverse);
return;
}
}
if (bpp > 0 && bpp <= BITS_PER_BYTE)
fb_bitmap4x_imageblit(image, dst, fgcolor, bgcolor, bpp,
bits_per_line, reverse);
else if (bpp > BITS_PER_BYTE && bpp <= BITS_PER_LONG)
fb_bitmap1x_imageblit(image, dst, fgcolor, bgcolor, bpp,
bits_per_line, reverse);
}
static inline void fb_imageblit(struct fb_info *p, const struct fb_image *image)
{
int bpp = p->var.bits_per_pixel;
unsigned int bits_per_line = BYTES_TO_BITS(p->fix.line_length);
struct fb_address dst = fb_address_init(p);
struct fb_reverse reverse = fb_reverse_init(p);
const u32 *palette = fb_palette(p);
fb_address_forward(&dst, image->dy * bits_per_line + image->dx * bpp);
if (image->depth == 1)
fb_bitmap_imageblit(image, &dst, bits_per_line, palette, bpp, reverse);
else
fb_color_imageblit(image, &dst, bits_per_line, palette, bpp, reverse);
}