Files
linux/drivers/video/fbdev/core/fb_draw.h
Zsolt Kajtar eabb032930 fbdev: Refactoring the fbcon packed pixel drawing routines
The original version duplicated more or less the same algorithms for
both system and i/o memory.

In this version the drawing algorithms (copy/fill/blit) are separate
from the memory access (system and i/o). The two parts are getting
combined in the loadable module sources. This also makes it more robust
against wrong memory access type or alignment mistakes as there's no
direct pointer access or arithmetic in the algorithm sources anymore.

Due to liberal use of inlining the compiled result is a single function
in all 6 cases, without unnecessary function calls. Unlike earlier the
use of macros could be minimized as apparently both gcc and clang is
capable now to do the same with inline functions just as well.

What wasn't quite the same in the two variants is the support for pixel
order reversing. This version is capable to do that for both system and
I/O memory, and not only for the latter. As demand for low bits per
pixel modes isn't high there's a configuration option to enable this
separately for the CFB and SYS modules.

The pixel reversing algorithm is different than earlier and was designed
so that it can take advantage of bit order reversing instructions on
architectures which have them. And even for higher bits per pixel modes
like four bpp.

One of the shortcomings of the earlier version was the incomplete
support for foreign endian framebuffers. Now all three drawing
algorithms produce correct output on both endians with native and
foreign framebuffers. This is one of the important differences even if
otherwise the algorithms don't look too different than before.

All three routines work now with aligned native word accesses. As a
consequence blitting isn't limited to 32 bits on 64 bit architectures as
it was before.

The old routines silently assumed that rows are a multiple of the word
size. Due to how the new routines function this isn't a requirement any
more and access will be done aligned regardless. However if the
framebuffer is configured like that then some of the fast paths won't be
available.

As this code is supposed to be running on all supported architectures it
wasn't optimized for a particular one. That doesn't mean I haven't
looked at the disassembly. That's where I noticed that it isn't a good
idea to use the fallback bitreversing code for example.

The low bits per pixel modes should be faster than before as the new
routines can blit 4 pixels at a time.

On the higher bits per pixel modes I retained the specialized aligned
routines so it should be more or less the same, except on 64 bit
architectures. There the blitting word size is double now which means 32
BPP isn't done a single pixel a time now.

The code was tested on x86, amd64, mips32 and mips64. The latter two in
big endian configuration. Originally thought I can get away with the
first two, but with such bit twisting code byte ordering is tricky and
not really possible to get right without actually verifying it.

While writing such routines isn't rocket science a lot of time was spent
on making sure that pixel ordering, foreign byte order, various bits per
pixels, cpu endianness and word size will give the expected result in
all sorts of combinations without making it overly complicated or full
with special cases.

Signed-off-by: Zsolt Kajtar <soci@c64.rulez.org>
Signed-off-by: Helge Deller <deller@gmx.de>
2025-03-26 22:39:21 +01:00

164 lines
4.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0
*
* Various common functions used by the framebuffer drawing code
*
* Copyright (C) 2025 Zsolt Kajtar (soci@c64.rulez.org)
*/
#ifndef _FB_DRAW_H
#define _FB_DRAW_H
/* swap bytes in a long, independent of word size */
#define swab_long _swab_long(BITS_PER_LONG)
#define _swab_long(x) __swab_long(x)
#define __swab_long(x) swab##x
/* move the address pointer by the number of words */
static inline void fb_address_move_long(struct fb_address *adr, int offset)
{
adr->address += offset * (BITS_PER_LONG / BITS_PER_BYTE);
}
/* move the address pointer forward with the number of bits */
static inline void fb_address_forward(struct fb_address *adr, unsigned int offset)
{
unsigned int bits = (unsigned int)adr->bits + offset;
adr->bits = bits & (BITS_PER_LONG - 1u);
adr->address += (bits & ~(BITS_PER_LONG - 1u)) / BITS_PER_BYTE;
}
/* move the address pointer backwards with the number of bits */
static inline void fb_address_backward(struct fb_address *adr, unsigned int offset)
{
int bits = adr->bits - (int)offset;
adr->bits = bits & (BITS_PER_LONG - 1);
if (bits < 0)
adr->address -= (adr->bits - bits) / BITS_PER_BYTE;
else
adr->address += (bits - adr->bits) / BITS_PER_BYTE;
}
/* compose pixels based on mask */
static inline unsigned long fb_comp(unsigned long set, unsigned long unset, unsigned long mask)
{
return ((set ^ unset) & mask) ^ unset;
}
/* framebuffer read-modify-write access for replacing bits in the mask */
static inline void fb_modify_offset(unsigned long val, unsigned long mask,
int offset, const struct fb_address *dst)
{
fb_write_offset(fb_comp(val, fb_read_offset(offset, dst), mask), offset, dst);
}
/*
* get current palette, if applicable for visual
*
* The pseudo color table entries (and colors) are right justified and in the
* same byte order as it's expected to be placed into a native ordered
* framebuffer memory. What that means:
*
* Expected bytes in framebuffer memory (in native order):
* RR GG BB RR GG BB RR GG BB ...
*
* Pseudo palette entry on little endian arch:
* RR | GG << 8 | BB << 16
*
* Pseudo palette entry on a big endian arch:
* RR << 16 | GG << 8 | BB
*/
static inline const u32 *fb_palette(struct fb_info *info)
{
return (info->fix.visual == FB_VISUAL_TRUECOLOR ||
info->fix.visual == FB_VISUAL_DIRECTCOLOR) ? info->pseudo_palette : NULL;
}
/* move pixels right on screen when framebuffer is in native order */
static inline unsigned long fb_right(unsigned long value, int index)
{
#ifdef __LITTLE_ENDIAN
return value << index;
#else
return value >> index;
#endif
}
/* move pixels left on screen when framebuffer is in native order */
static inline unsigned long fb_left(unsigned long value, int index)
{
#ifdef __LITTLE_ENDIAN
return value >> index;
#else
return value << index;
#endif
}
/* reversal options */
struct fb_reverse {
bool byte, pixel;
};
/* reverse bits of each byte in a long */
static inline unsigned long fb_reverse_bits_long(unsigned long val)
{
#if defined(CONFIG_HAVE_ARCH_BITREVERSE) && BITS_PER_LONG == 32
return bitrev8x4(val);
#else
val = fb_comp(val >> 1, val << 1, ~0UL / 3);
val = fb_comp(val >> 2, val << 2, ~0UL / 5);
return fb_comp(val >> 4, val << 4, ~0UL / 17);
#endif
}
/* apply byte and bit reversals as necessary */
static inline unsigned long fb_reverse_long(unsigned long val,
struct fb_reverse reverse)
{
if (reverse.pixel)
val = fb_reverse_bits_long(val);
return reverse.byte ? swab_long(val) : val;
}
/* calculate a pixel mask for the given reversal */
static inline unsigned long fb_pixel_mask(int index, struct fb_reverse reverse)
{
#ifdef FB_REV_PIXELS_IN_BYTE
if (reverse.byte)
return reverse.pixel ? fb_left(~0UL, index) : swab_long(fb_right(~0UL, index));
else
return reverse.pixel ? swab_long(fb_left(~0UL, index)) : fb_right(~0UL, index);
#else
return reverse.byte ? swab_long(fb_right(~0UL, index)) : fb_right(~0UL, index);
#endif
}
/*
* initialise reversals based on info
*
* Normally the first byte is the low byte on little endian and in the high
* on big endian. If it's the other way around then that's reverse byte order.
*
* Normally the first pixel is the LSB on little endian and the MSB on big
* endian. If that's not the case that's reverse pixel order.
*/
static inline struct fb_reverse fb_reverse_init(struct fb_info *info)
{
struct fb_reverse reverse;
#ifdef __LITTLE_ENDIAN
reverse.byte = fb_be_math(info) != 0;
#else
reverse.byte = fb_be_math(info) == 0;
#endif
#ifdef FB_REV_PIXELS_IN_BYTE
reverse.pixel = info->var.bits_per_pixel < BITS_PER_BYTE
&& (info->var.nonstd & FB_NONSTD_REV_PIX_IN_B);
#else
reverse.pixel = false;
#endif
return reverse;
}
#endif /* FB_DRAW_H */