mirror of
https://github.com/raspberrypi/linux.git
synced 2025-12-06 10:00:17 +00:00
Improve __copy_to_user and __copy_from_user performance
Provide a __copy_from_user that uses memcpy. On BCM2708, use optimised memcpy/memmove/memcmp/memset implementations. arch/arm: Add mmiocpy/set aliases for memcpy/set See: https://github.com/raspberrypi/linux/issues/1082 copy_from_user: CPU_SW_DOMAIN_PAN compatibility The downstream copy_from_user acceleration must also play nice with CONFIG_CPU_SW_DOMAIN_PAN. See: https://github.com/raspberrypi/linux/issues/1381 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
This commit is contained in:
@@ -39,4 +39,9 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
|
|||||||
return __memset64(p, v, n * 8, v >> 32);
|
return __memset64(p, v, n * 8, v >> 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_BCM2835_FAST_MEMCPY
|
||||||
|
#define __HAVE_ARCH_MEMCMP
|
||||||
|
extern int memcmp(const void *, const void *, size_t);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -512,6 +512,9 @@ do { \
|
|||||||
extern unsigned long __must_check
|
extern unsigned long __must_check
|
||||||
arm_copy_from_user(void *to, const void __user *from, unsigned long n);
|
arm_copy_from_user(void *to, const void __user *from, unsigned long n);
|
||||||
|
|
||||||
|
extern unsigned long __must_check
|
||||||
|
__copy_from_user_std(void *to, const void __user *from, unsigned long n);
|
||||||
|
|
||||||
static inline unsigned long __must_check
|
static inline unsigned long __must_check
|
||||||
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
|
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
|
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
|
||||||
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
|
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
|
||||||
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
|
delay.o delay-loop.o findbit.o memchr.o \
|
||||||
memmove.o memset.o setbit.o \
|
setbit.o \
|
||||||
strchr.o strrchr.o \
|
strchr.o strrchr.o \
|
||||||
testchangebit.o testclearbit.o testsetbit.o \
|
testchangebit.o testclearbit.o testsetbit.o \
|
||||||
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
|
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
|
||||||
@@ -19,6 +19,16 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
|
|||||||
mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
|
mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
|
||||||
copy_from_user.o copy_to_user.o
|
copy_from_user.o copy_to_user.o
|
||||||
|
|
||||||
|
# Choose optimised implementations for Raspberry Pi
|
||||||
|
ifeq ($(CONFIG_BCM2835_FAST_MEMCPY),y)
|
||||||
|
CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
|
||||||
|
CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
|
||||||
|
obj-$(CONFIG_MODULES) += exports_rpi.o
|
||||||
|
lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
|
||||||
|
else
|
||||||
|
lib-y += memcpy.o memmove.o memset.o
|
||||||
|
endif
|
||||||
|
|
||||||
# using lib_ here won't override already available weak symbols
|
# using lib_ here won't override already available weak symbols
|
||||||
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
|
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
|
||||||
|
|
||||||
|
|||||||
159
arch/arm/lib/arm-mem.h
Normal file
159
arch/arm/lib/arm-mem.h
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||||||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro myfunc fname
|
||||||
|
.func fname
|
||||||
|
.global fname
|
||||||
|
fname:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro preload_leading_step1 backwards, ptr, base
|
||||||
|
/* If the destination is already 16-byte aligned, then we need to preload
|
||||||
|
* between 0 and prefetch_distance (inclusive) cache lines ahead so there
|
||||||
|
* are no gaps when the inner loop starts.
|
||||||
|
*/
|
||||||
|
.if backwards
|
||||||
|
sub ptr, base, #1
|
||||||
|
bic ptr, ptr, #31
|
||||||
|
.else
|
||||||
|
bic ptr, base, #31
|
||||||
|
.endif
|
||||||
|
.set OFFSET, 0
|
||||||
|
.rept prefetch_distance+1
|
||||||
|
pld [ptr, #OFFSET]
|
||||||
|
.if backwards
|
||||||
|
.set OFFSET, OFFSET-32
|
||||||
|
.else
|
||||||
|
.set OFFSET, OFFSET+32
|
||||||
|
.endif
|
||||||
|
.endr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
|
||||||
|
/* However, if the destination is not 16-byte aligned, we may need to
|
||||||
|
* preload one more cache line than that. The question we need to ask is:
|
||||||
|
* are the leading bytes more than the amount by which the source
|
||||||
|
* pointer will be rounded down for preloading, and if so, by how many
|
||||||
|
* cache lines?
|
||||||
|
*/
|
||||||
|
.if backwards
|
||||||
|
/* Here we compare against how many bytes we are into the
|
||||||
|
* cache line, counting down from the highest such address.
|
||||||
|
* Effectively, we want to calculate
|
||||||
|
* leading_bytes = dst&15
|
||||||
|
* cacheline_offset = 31-((src-leading_bytes-1)&31)
|
||||||
|
* extra_needed = leading_bytes - cacheline_offset
|
||||||
|
* and test if extra_needed is <= 0, or rearranging:
|
||||||
|
* leading_bytes + (src-leading_bytes-1)&31 <= 31
|
||||||
|
*/
|
||||||
|
mov tmp, base, lsl #32-5
|
||||||
|
sbc tmp, tmp, leading_bytes, lsl #32-5
|
||||||
|
adds tmp, tmp, leading_bytes, lsl #32-5
|
||||||
|
bcc 61f
|
||||||
|
pld [ptr, #-32*(prefetch_distance+1)]
|
||||||
|
.else
|
||||||
|
/* Effectively, we want to calculate
|
||||||
|
* leading_bytes = (-dst)&15
|
||||||
|
* cacheline_offset = (src+leading_bytes)&31
|
||||||
|
* extra_needed = leading_bytes - cacheline_offset
|
||||||
|
* and test if extra_needed is <= 0.
|
||||||
|
*/
|
||||||
|
mov tmp, base, lsl #32-5
|
||||||
|
add tmp, tmp, leading_bytes, lsl #32-5
|
||||||
|
rsbs tmp, tmp, leading_bytes, lsl #32-5
|
||||||
|
bls 61f
|
||||||
|
pld [ptr, #32*(prefetch_distance+1)]
|
||||||
|
.endif
|
||||||
|
61:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro preload_trailing backwards, base, remain, tmp
|
||||||
|
/* We need either 0, 1 or 2 extra preloads */
|
||||||
|
.if backwards
|
||||||
|
rsb tmp, base, #0
|
||||||
|
mov tmp, tmp, lsl #32-5
|
||||||
|
.else
|
||||||
|
mov tmp, base, lsl #32-5
|
||||||
|
.endif
|
||||||
|
adds tmp, tmp, remain, lsl #32-5
|
||||||
|
adceqs tmp, tmp, #0
|
||||||
|
/* The instruction above has two effects: ensures Z is only
|
||||||
|
* set if C was clear (so Z indicates that both shifted quantities
|
||||||
|
* were 0), and clears C if Z was set (so C indicates that the sum
|
||||||
|
* of the shifted quantities was greater and not equal to 32) */
|
||||||
|
beq 82f
|
||||||
|
.if backwards
|
||||||
|
sub tmp, base, #1
|
||||||
|
bic tmp, tmp, #31
|
||||||
|
.else
|
||||||
|
bic tmp, base, #31
|
||||||
|
.endif
|
||||||
|
bcc 81f
|
||||||
|
.if backwards
|
||||||
|
pld [tmp, #-32*(prefetch_distance+1)]
|
||||||
|
81:
|
||||||
|
pld [tmp, #-32*prefetch_distance]
|
||||||
|
.else
|
||||||
|
pld [tmp, #32*(prefetch_distance+2)]
|
||||||
|
81:
|
||||||
|
pld [tmp, #32*(prefetch_distance+1)]
|
||||||
|
.endif
|
||||||
|
82:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
|
||||||
|
.if backwards
|
||||||
|
sub tmp0, base, #1
|
||||||
|
bic tmp0, tmp0, #31
|
||||||
|
pld [tmp0]
|
||||||
|
sub tmp1, base, remain, lsl #shift
|
||||||
|
.else
|
||||||
|
bic tmp0, base, #31
|
||||||
|
pld [tmp0]
|
||||||
|
add tmp1, base, remain, lsl #shift
|
||||||
|
sub tmp1, tmp1, #1
|
||||||
|
.endif
|
||||||
|
bic tmp1, tmp1, #31
|
||||||
|
cmp tmp1, tmp0
|
||||||
|
beq 92f
|
||||||
|
.if narrow_case
|
||||||
|
/* In this case, all the data fits in either 1 or 2 cache lines */
|
||||||
|
pld [tmp1]
|
||||||
|
.else
|
||||||
|
91:
|
||||||
|
.if backwards
|
||||||
|
sub tmp0, tmp0, #32
|
||||||
|
.else
|
||||||
|
add tmp0, tmp0, #32
|
||||||
|
.endif
|
||||||
|
cmp tmp0, tmp1
|
||||||
|
pld [tmp0]
|
||||||
|
bne 91b
|
||||||
|
.endif
|
||||||
|
92:
|
||||||
|
.endm
|
||||||
@@ -107,7 +107,8 @@
|
|||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
ENTRY(arm_copy_from_user)
|
ENTRY(__copy_from_user_std)
|
||||||
|
WEAK(arm_copy_from_user)
|
||||||
#ifdef CONFIG_CPU_SPECTRE
|
#ifdef CONFIG_CPU_SPECTRE
|
||||||
get_thread_info r3
|
get_thread_info r3
|
||||||
ldr r3, [r3, #TI_ADDR_LIMIT]
|
ldr r3, [r3, #TI_ADDR_LIMIT]
|
||||||
@@ -117,6 +118,7 @@ ENTRY(arm_copy_from_user)
|
|||||||
#include "copy_template.S"
|
#include "copy_template.S"
|
||||||
|
|
||||||
ENDPROC(arm_copy_from_user)
|
ENDPROC(arm_copy_from_user)
|
||||||
|
ENDPROC(__copy_from_user_std)
|
||||||
|
|
||||||
.pushsection .fixup,"ax"
|
.pushsection .fixup,"ax"
|
||||||
.align 0
|
.align 0
|
||||||
|
|||||||
37
arch/arm/lib/exports_rpi.c
Normal file
37
arch/arm/lib/exports_rpi.c
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions, and the following disclaimer,
|
||||||
|
* without modification.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. The names of the above-listed copyright holders may not be used
|
||||||
|
* to endorse or promote products derived from this software without
|
||||||
|
* specific prior written permission.
|
||||||
|
*
|
||||||
|
* ALTERNATIVELY, this software may be distributed under the terms of the
|
||||||
|
* GNU General Public License ("GPL") version 2, as published by the Free
|
||||||
|
* Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(memcmp);
|
||||||
285
arch/arm/lib/memcmp_rpi.S
Normal file
285
arch/arm/lib/memcmp_rpi.S
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||||||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
#include "arm-mem.h"
|
||||||
|
|
||||||
|
/* Prevent the stack from becoming executable */
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
.section .note.GNU-stack,"",%progbits
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.text
|
||||||
|
.arch armv6
|
||||||
|
.object_arch armv4
|
||||||
|
.arm
|
||||||
|
.altmacro
|
||||||
|
.p2align 2
|
||||||
|
|
||||||
|
.macro memcmp_process_head unaligned
|
||||||
|
.if unaligned
|
||||||
|
ldr DAT0, [S_1], #4
|
||||||
|
ldr DAT1, [S_1], #4
|
||||||
|
ldr DAT2, [S_1], #4
|
||||||
|
ldr DAT3, [S_1], #4
|
||||||
|
.else
|
||||||
|
ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
.endif
|
||||||
|
ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcmp_process_tail
|
||||||
|
cmp DAT0, DAT4
|
||||||
|
cmpeq DAT1, DAT5
|
||||||
|
cmpeq DAT2, DAT6
|
||||||
|
cmpeq DAT3, DAT7
|
||||||
|
bne 200f
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcmp_leading_31bytes
|
||||||
|
movs DAT0, OFF, lsl #31
|
||||||
|
ldrmib DAT0, [S_1], #1
|
||||||
|
ldrcsh DAT1, [S_1], #2
|
||||||
|
ldrmib DAT4, [S_2], #1
|
||||||
|
ldrcsh DAT5, [S_2], #2
|
||||||
|
movpl DAT0, #0
|
||||||
|
movcc DAT1, #0
|
||||||
|
movpl DAT4, #0
|
||||||
|
movcc DAT5, #0
|
||||||
|
submi N, N, #1
|
||||||
|
subcs N, N, #2
|
||||||
|
cmp DAT0, DAT4
|
||||||
|
cmpeq DAT1, DAT5
|
||||||
|
bne 200f
|
||||||
|
movs DAT0, OFF, lsl #29
|
||||||
|
ldrmi DAT0, [S_1], #4
|
||||||
|
ldrcs DAT1, [S_1], #4
|
||||||
|
ldrcs DAT2, [S_1], #4
|
||||||
|
ldrmi DAT4, [S_2], #4
|
||||||
|
ldmcsia S_2!, {DAT5, DAT6}
|
||||||
|
movpl DAT0, #0
|
||||||
|
movcc DAT1, #0
|
||||||
|
movcc DAT2, #0
|
||||||
|
movpl DAT4, #0
|
||||||
|
movcc DAT5, #0
|
||||||
|
movcc DAT6, #0
|
||||||
|
submi N, N, #4
|
||||||
|
subcs N, N, #8
|
||||||
|
cmp DAT0, DAT4
|
||||||
|
cmpeq DAT1, DAT5
|
||||||
|
cmpeq DAT2, DAT6
|
||||||
|
bne 200f
|
||||||
|
tst OFF, #16
|
||||||
|
beq 105f
|
||||||
|
memcmp_process_head 1
|
||||||
|
sub N, N, #16
|
||||||
|
memcmp_process_tail
|
||||||
|
105:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcmp_trailing_15bytes unaligned
|
||||||
|
movs N, N, lsl #29
|
||||||
|
.if unaligned
|
||||||
|
ldrcs DAT0, [S_1], #4
|
||||||
|
ldrcs DAT1, [S_1], #4
|
||||||
|
.else
|
||||||
|
ldmcsia S_1!, {DAT0, DAT1}
|
||||||
|
.endif
|
||||||
|
ldrmi DAT2, [S_1], #4
|
||||||
|
ldmcsia S_2!, {DAT4, DAT5}
|
||||||
|
ldrmi DAT6, [S_2], #4
|
||||||
|
movcc DAT0, #0
|
||||||
|
movcc DAT1, #0
|
||||||
|
movpl DAT2, #0
|
||||||
|
movcc DAT4, #0
|
||||||
|
movcc DAT5, #0
|
||||||
|
movpl DAT6, #0
|
||||||
|
cmp DAT0, DAT4
|
||||||
|
cmpeq DAT1, DAT5
|
||||||
|
cmpeq DAT2, DAT6
|
||||||
|
bne 200f
|
||||||
|
movs N, N, lsl #2
|
||||||
|
ldrcsh DAT0, [S_1], #2
|
||||||
|
ldrmib DAT1, [S_1]
|
||||||
|
ldrcsh DAT4, [S_2], #2
|
||||||
|
ldrmib DAT5, [S_2]
|
||||||
|
movcc DAT0, #0
|
||||||
|
movpl DAT1, #0
|
||||||
|
movcc DAT4, #0
|
||||||
|
movpl DAT5, #0
|
||||||
|
cmp DAT0, DAT4
|
||||||
|
cmpeq DAT1, DAT5
|
||||||
|
bne 200f
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcmp_long_inner_loop unaligned
|
||||||
|
110:
|
||||||
|
memcmp_process_head unaligned
|
||||||
|
pld [S_2, #prefetch_distance*32 + 16]
|
||||||
|
memcmp_process_tail
|
||||||
|
memcmp_process_head unaligned
|
||||||
|
pld [S_1, OFF]
|
||||||
|
memcmp_process_tail
|
||||||
|
subs N, N, #32
|
||||||
|
bhs 110b
|
||||||
|
/* Just before the final (prefetch_distance+1) 32-byte blocks,
|
||||||
|
* deal with final preloads */
|
||||||
|
preload_trailing 0, S_1, N, DAT0
|
||||||
|
preload_trailing 0, S_2, N, DAT0
|
||||||
|
add N, N, #(prefetch_distance+2)*32 - 16
|
||||||
|
120:
|
||||||
|
memcmp_process_head unaligned
|
||||||
|
memcmp_process_tail
|
||||||
|
subs N, N, #16
|
||||||
|
bhs 120b
|
||||||
|
/* Trailing words and bytes */
|
||||||
|
tst N, #15
|
||||||
|
beq 199f
|
||||||
|
memcmp_trailing_15bytes unaligned
|
||||||
|
199: /* Reached end without detecting a difference */
|
||||||
|
mov a1, #0
|
||||||
|
setend le
|
||||||
|
pop {DAT1-DAT6, pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcmp_short_inner_loop unaligned
|
||||||
|
subs N, N, #16 /* simplifies inner loop termination */
|
||||||
|
blo 122f
|
||||||
|
120:
|
||||||
|
memcmp_process_head unaligned
|
||||||
|
memcmp_process_tail
|
||||||
|
subs N, N, #16
|
||||||
|
bhs 120b
|
||||||
|
122: /* Trailing words and bytes */
|
||||||
|
tst N, #15
|
||||||
|
beq 199f
|
||||||
|
memcmp_trailing_15bytes unaligned
|
||||||
|
199: /* Reached end without detecting a difference */
|
||||||
|
mov a1, #0
|
||||||
|
setend le
|
||||||
|
pop {DAT1-DAT6, pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* int memcmp(const void *s1, const void *s2, size_t n);
|
||||||
|
* On entry:
|
||||||
|
* a1 = pointer to buffer 1
|
||||||
|
* a2 = pointer to buffer 2
|
||||||
|
* a3 = number of bytes to compare (as unsigned chars)
|
||||||
|
* On exit:
|
||||||
|
* a1 = >0/=0/<0 if s1 >/=/< s2
|
||||||
|
*/
|
||||||
|
|
||||||
|
.set prefetch_distance, 2
|
||||||
|
|
||||||
|
ENTRY(memcmp)
|
||||||
|
S_1 .req a1
|
||||||
|
S_2 .req a2
|
||||||
|
N .req a3
|
||||||
|
DAT0 .req a4
|
||||||
|
DAT1 .req v1
|
||||||
|
DAT2 .req v2
|
||||||
|
DAT3 .req v3
|
||||||
|
DAT4 .req v4
|
||||||
|
DAT5 .req v5
|
||||||
|
DAT6 .req v6
|
||||||
|
DAT7 .req ip
|
||||||
|
OFF .req lr
|
||||||
|
|
||||||
|
push {DAT1-DAT6, lr}
|
||||||
|
setend be /* lowest-addressed bytes are most significant */
|
||||||
|
|
||||||
|
/* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
|
||||||
|
cmp N, #(prefetch_distance+3)*32 - 1
|
||||||
|
blo 170f
|
||||||
|
|
||||||
|
/* Long case */
|
||||||
|
/* Adjust N so that the decrement instruction can also test for
|
||||||
|
* inner loop termination. We want it to stop when there are
|
||||||
|
* (prefetch_distance+1) complete blocks to go. */
|
||||||
|
sub N, N, #(prefetch_distance+2)*32
|
||||||
|
preload_leading_step1 0, DAT0, S_1
|
||||||
|
preload_leading_step1 0, DAT1, S_2
|
||||||
|
tst S_2, #31
|
||||||
|
beq 154f
|
||||||
|
rsb OFF, S_2, #0 /* no need to AND with 15 here */
|
||||||
|
preload_leading_step2 0, DAT0, S_1, OFF, DAT2
|
||||||
|
preload_leading_step2 0, DAT1, S_2, OFF, DAT2
|
||||||
|
memcmp_leading_31bytes
|
||||||
|
154: /* Second source now cacheline (32-byte) aligned; we have at
|
||||||
|
* least one prefetch to go. */
|
||||||
|
/* Prefetch offset is best selected such that it lies in the
|
||||||
|
* first 8 of each 32 bytes - but it's just as easy to aim for
|
||||||
|
* the first one */
|
||||||
|
and OFF, S_1, #31
|
||||||
|
rsb OFF, OFF, #32*prefetch_distance
|
||||||
|
tst S_1, #3
|
||||||
|
bne 140f
|
||||||
|
memcmp_long_inner_loop 0
|
||||||
|
140: memcmp_long_inner_loop 1
|
||||||
|
|
||||||
|
170: /* Short case */
|
||||||
|
teq N, #0
|
||||||
|
beq 199f
|
||||||
|
preload_all 0, 0, 0, S_1, N, DAT0, DAT1
|
||||||
|
preload_all 0, 0, 0, S_2, N, DAT0, DAT1
|
||||||
|
tst S_2, #3
|
||||||
|
beq 174f
|
||||||
|
172: subs N, N, #1
|
||||||
|
blo 199f
|
||||||
|
ldrb DAT0, [S_1], #1
|
||||||
|
ldrb DAT4, [S_2], #1
|
||||||
|
cmp DAT0, DAT4
|
||||||
|
bne 200f
|
||||||
|
tst S_2, #3
|
||||||
|
bne 172b
|
||||||
|
174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
|
||||||
|
tst S_1, #3
|
||||||
|
bne 140f
|
||||||
|
memcmp_short_inner_loop 0
|
||||||
|
140: memcmp_short_inner_loop 1
|
||||||
|
|
||||||
|
200: /* Difference found: determine sign. */
|
||||||
|
movhi a1, #1
|
||||||
|
movlo a1, #-1
|
||||||
|
setend le
|
||||||
|
pop {DAT1-DAT6, pc}
|
||||||
|
|
||||||
|
.unreq S_1
|
||||||
|
.unreq S_2
|
||||||
|
.unreq N
|
||||||
|
.unreq DAT0
|
||||||
|
.unreq DAT1
|
||||||
|
.unreq DAT2
|
||||||
|
.unreq DAT3
|
||||||
|
.unreq DAT4
|
||||||
|
.unreq DAT5
|
||||||
|
.unreq DAT6
|
||||||
|
.unreq DAT7
|
||||||
|
.unreq OFF
|
||||||
|
ENDPROC(memcmp)
|
||||||
61
arch/arm/lib/memcpy_rpi.S
Normal file
61
arch/arm/lib/memcpy_rpi.S
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||||||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
#include "arm-mem.h"
|
||||||
|
#include "memcpymove.h"
|
||||||
|
|
||||||
|
/* Prevent the stack from becoming executable */
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
.section .note.GNU-stack,"",%progbits
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.text
|
||||||
|
.arch armv6
|
||||||
|
.object_arch armv4
|
||||||
|
.arm
|
||||||
|
.altmacro
|
||||||
|
.p2align 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* void *memcpy(void * restrict s1, const void * restrict s2, size_t n);
|
||||||
|
* On entry:
|
||||||
|
* a1 = pointer to destination
|
||||||
|
* a2 = pointer to source
|
||||||
|
* a3 = number of bytes to copy
|
||||||
|
* On exit:
|
||||||
|
* a1 preserved
|
||||||
|
*/
|
||||||
|
|
||||||
|
.set prefetch_distance, 3
|
||||||
|
|
||||||
|
ENTRY(mmiocpy)
|
||||||
|
ENTRY(memcpy)
|
||||||
|
memcpy 0
|
||||||
|
ENDPROC(memcpy)
|
||||||
|
ENDPROC(mmiocpy)
|
||||||
506
arch/arm/lib/memcpymove.h
Normal file
506
arch/arm/lib/memcpymove.h
Normal file
@@ -0,0 +1,506 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||||||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro unaligned_words backwards, align, use_pld, words, r0, r1, r2, r3, r4, r5, r6, r7, r8
|
||||||
|
.if words == 1
|
||||||
|
.if backwards
|
||||||
|
mov r1, r0, lsl #32-align*8
|
||||||
|
ldr r0, [S, #-4]!
|
||||||
|
orr r1, r1, r0, lsr #align*8
|
||||||
|
str r1, [D, #-4]!
|
||||||
|
.else
|
||||||
|
mov r0, r1, lsr #align*8
|
||||||
|
ldr r1, [S, #4]!
|
||||||
|
orr r0, r0, r1, lsl #32-align*8
|
||||||
|
str r0, [D], #4
|
||||||
|
.endif
|
||||||
|
.elseif words == 2
|
||||||
|
.if backwards
|
||||||
|
ldr r1, [S, #-4]!
|
||||||
|
mov r2, r0, lsl #32-align*8
|
||||||
|
ldr r0, [S, #-4]!
|
||||||
|
orr r2, r2, r1, lsr #align*8
|
||||||
|
mov r1, r1, lsl #32-align*8
|
||||||
|
orr r1, r1, r0, lsr #align*8
|
||||||
|
stmdb D!, {r1, r2}
|
||||||
|
.else
|
||||||
|
ldr r1, [S, #4]!
|
||||||
|
mov r0, r2, lsr #align*8
|
||||||
|
ldr r2, [S, #4]!
|
||||||
|
orr r0, r0, r1, lsl #32-align*8
|
||||||
|
mov r1, r1, lsr #align*8
|
||||||
|
orr r1, r1, r2, lsl #32-align*8
|
||||||
|
stmia D!, {r0, r1}
|
||||||
|
.endif
|
||||||
|
.elseif words == 4
|
||||||
|
.if backwards
|
||||||
|
ldmdb S!, {r2, r3}
|
||||||
|
mov r4, r0, lsl #32-align*8
|
||||||
|
ldmdb S!, {r0, r1}
|
||||||
|
orr r4, r4, r3, lsr #align*8
|
||||||
|
mov r3, r3, lsl #32-align*8
|
||||||
|
orr r3, r3, r2, lsr #align*8
|
||||||
|
mov r2, r2, lsl #32-align*8
|
||||||
|
orr r2, r2, r1, lsr #align*8
|
||||||
|
mov r1, r1, lsl #32-align*8
|
||||||
|
orr r1, r1, r0, lsr #align*8
|
||||||
|
stmdb D!, {r1, r2, r3, r4}
|
||||||
|
.else
|
||||||
|
ldmib S!, {r1, r2}
|
||||||
|
mov r0, r4, lsr #align*8
|
||||||
|
ldmib S!, {r3, r4}
|
||||||
|
orr r0, r0, r1, lsl #32-align*8
|
||||||
|
mov r1, r1, lsr #align*8
|
||||||
|
orr r1, r1, r2, lsl #32-align*8
|
||||||
|
mov r2, r2, lsr #align*8
|
||||||
|
orr r2, r2, r3, lsl #32-align*8
|
||||||
|
mov r3, r3, lsr #align*8
|
||||||
|
orr r3, r3, r4, lsl #32-align*8
|
||||||
|
stmia D!, {r0, r1, r2, r3}
|
||||||
|
.endif
|
||||||
|
.elseif words == 8
|
||||||
|
.if backwards
|
||||||
|
ldmdb S!, {r4, r5, r6, r7}
|
||||||
|
mov r8, r0, lsl #32-align*8
|
||||||
|
ldmdb S!, {r0, r1, r2, r3}
|
||||||
|
.if use_pld
|
||||||
|
pld [S, OFF]
|
||||||
|
.endif
|
||||||
|
orr r8, r8, r7, lsr #align*8
|
||||||
|
mov r7, r7, lsl #32-align*8
|
||||||
|
orr r7, r7, r6, lsr #align*8
|
||||||
|
mov r6, r6, lsl #32-align*8
|
||||||
|
orr r6, r6, r5, lsr #align*8
|
||||||
|
mov r5, r5, lsl #32-align*8
|
||||||
|
orr r5, r5, r4, lsr #align*8
|
||||||
|
mov r4, r4, lsl #32-align*8
|
||||||
|
orr r4, r4, r3, lsr #align*8
|
||||||
|
mov r3, r3, lsl #32-align*8
|
||||||
|
orr r3, r3, r2, lsr #align*8
|
||||||
|
mov r2, r2, lsl #32-align*8
|
||||||
|
orr r2, r2, r1, lsr #align*8
|
||||||
|
mov r1, r1, lsl #32-align*8
|
||||||
|
orr r1, r1, r0, lsr #align*8
|
||||||
|
stmdb D!, {r5, r6, r7, r8}
|
||||||
|
stmdb D!, {r1, r2, r3, r4}
|
||||||
|
.else
|
||||||
|
ldmib S!, {r1, r2, r3, r4}
|
||||||
|
mov r0, r8, lsr #align*8
|
||||||
|
ldmib S!, {r5, r6, r7, r8}
|
||||||
|
.if use_pld
|
||||||
|
pld [S, OFF]
|
||||||
|
.endif
|
||||||
|
orr r0, r0, r1, lsl #32-align*8
|
||||||
|
mov r1, r1, lsr #align*8
|
||||||
|
orr r1, r1, r2, lsl #32-align*8
|
||||||
|
mov r2, r2, lsr #align*8
|
||||||
|
orr r2, r2, r3, lsl #32-align*8
|
||||||
|
mov r3, r3, lsr #align*8
|
||||||
|
orr r3, r3, r4, lsl #32-align*8
|
||||||
|
mov r4, r4, lsr #align*8
|
||||||
|
orr r4, r4, r5, lsl #32-align*8
|
||||||
|
mov r5, r5, lsr #align*8
|
||||||
|
orr r5, r5, r6, lsl #32-align*8
|
||||||
|
mov r6, r6, lsr #align*8
|
||||||
|
orr r6, r6, r7, lsl #32-align*8
|
||||||
|
mov r7, r7, lsr #align*8
|
||||||
|
orr r7, r7, r8, lsl #32-align*8
|
||||||
|
stmia D!, {r0, r1, r2, r3}
|
||||||
|
stmia D!, {r4, r5, r6, r7}
|
||||||
|
.endif
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcpy_leading_15bytes backwards, align
|
||||||
|
movs DAT1, DAT2, lsl #31
|
||||||
|
sub N, N, DAT2
|
||||||
|
.if backwards
|
||||||
|
ldrmib DAT0, [S, #-1]!
|
||||||
|
ldrcsh DAT1, [S, #-2]!
|
||||||
|
strmib DAT0, [D, #-1]!
|
||||||
|
strcsh DAT1, [D, #-2]!
|
||||||
|
.else
|
||||||
|
ldrmib DAT0, [S], #1
|
||||||
|
ldrcsh DAT1, [S], #2
|
||||||
|
strmib DAT0, [D], #1
|
||||||
|
strcsh DAT1, [D], #2
|
||||||
|
.endif
|
||||||
|
movs DAT1, DAT2, lsl #29
|
||||||
|
.if backwards
|
||||||
|
ldrmi DAT0, [S, #-4]!
|
||||||
|
.if align == 0
|
||||||
|
ldmcsdb S!, {DAT1, DAT2}
|
||||||
|
.else
|
||||||
|
ldrcs DAT2, [S, #-4]!
|
||||||
|
ldrcs DAT1, [S, #-4]!
|
||||||
|
.endif
|
||||||
|
strmi DAT0, [D, #-4]!
|
||||||
|
stmcsdb D!, {DAT1, DAT2}
|
||||||
|
.else
|
||||||
|
ldrmi DAT0, [S], #4
|
||||||
|
.if align == 0
|
||||||
|
ldmcsia S!, {DAT1, DAT2}
|
||||||
|
.else
|
||||||
|
ldrcs DAT1, [S], #4
|
||||||
|
ldrcs DAT2, [S], #4
|
||||||
|
.endif
|
||||||
|
strmi DAT0, [D], #4
|
||||||
|
stmcsia D!, {DAT1, DAT2}
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcpy_trailing_15bytes backwards, align
|
||||||
|
movs N, N, lsl #29
|
||||||
|
.if backwards
|
||||||
|
.if align == 0
|
||||||
|
ldmcsdb S!, {DAT0, DAT1}
|
||||||
|
.else
|
||||||
|
ldrcs DAT1, [S, #-4]!
|
||||||
|
ldrcs DAT0, [S, #-4]!
|
||||||
|
.endif
|
||||||
|
ldrmi DAT2, [S, #-4]!
|
||||||
|
stmcsdb D!, {DAT0, DAT1}
|
||||||
|
strmi DAT2, [D, #-4]!
|
||||||
|
.else
|
||||||
|
.if align == 0
|
||||||
|
ldmcsia S!, {DAT0, DAT1}
|
||||||
|
.else
|
||||||
|
ldrcs DAT0, [S], #4
|
||||||
|
ldrcs DAT1, [S], #4
|
||||||
|
.endif
|
||||||
|
ldrmi DAT2, [S], #4
|
||||||
|
stmcsia D!, {DAT0, DAT1}
|
||||||
|
strmi DAT2, [D], #4
|
||||||
|
.endif
|
||||||
|
movs N, N, lsl #2
|
||||||
|
.if backwards
|
||||||
|
ldrcsh DAT0, [S, #-2]!
|
||||||
|
ldrmib DAT1, [S, #-1]
|
||||||
|
strcsh DAT0, [D, #-2]!
|
||||||
|
strmib DAT1, [D, #-1]
|
||||||
|
.else
|
||||||
|
ldrcsh DAT0, [S], #2
|
||||||
|
ldrmib DAT1, [S]
|
||||||
|
strcsh DAT0, [D], #2
|
||||||
|
strmib DAT1, [D]
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcpy_long_inner_loop backwards, align
|
||||||
|
.if align != 0
|
||||||
|
.if backwards
|
||||||
|
ldr DAT0, [S, #-align]!
|
||||||
|
.else
|
||||||
|
ldr LAST, [S, #-align]!
|
||||||
|
.endif
|
||||||
|
.endif
|
||||||
|
110:
|
||||||
|
.if align == 0
|
||||||
|
.if backwards
|
||||||
|
ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
||||||
|
pld [S, OFF]
|
||||||
|
stmdb D!, {DAT4, DAT5, DAT6, LAST}
|
||||||
|
stmdb D!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
.else
|
||||||
|
ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
||||||
|
pld [S, OFF]
|
||||||
|
stmia D!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
stmia D!, {DAT4, DAT5, DAT6, LAST}
|
||||||
|
.endif
|
||||||
|
.else
|
||||||
|
unaligned_words backwards, align, 1, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
|
||||||
|
.endif
|
||||||
|
subs N, N, #32
|
||||||
|
bhs 110b
|
||||||
|
/* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
|
||||||
|
preload_trailing backwards, S, N, OFF
|
||||||
|
add N, N, #(prefetch_distance+2)*32 - 32
|
||||||
|
120:
|
||||||
|
.if align == 0
|
||||||
|
.if backwards
|
||||||
|
ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
||||||
|
stmdb D!, {DAT4, DAT5, DAT6, LAST}
|
||||||
|
stmdb D!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
.else
|
||||||
|
ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
||||||
|
stmia D!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
stmia D!, {DAT4, DAT5, DAT6, LAST}
|
||||||
|
.endif
|
||||||
|
.else
|
||||||
|
unaligned_words backwards, align, 0, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
|
||||||
|
.endif
|
||||||
|
subs N, N, #32
|
||||||
|
bhs 120b
|
||||||
|
tst N, #16
|
||||||
|
.if align == 0
|
||||||
|
.if backwards
|
||||||
|
ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
stmnedb D!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
ldmneia S!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
stmneia D!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.endif
|
||||||
|
.else
|
||||||
|
beq 130f
|
||||||
|
unaligned_words backwards, align, 0, 4, DAT0, DAT1, DAT2, DAT3, LAST
|
||||||
|
130:
|
||||||
|
.endif
|
||||||
|
/* Trailing words and bytes */
|
||||||
|
tst N, #15
|
||||||
|
beq 199f
|
||||||
|
.if align != 0
|
||||||
|
add S, S, #align
|
||||||
|
.endif
|
||||||
|
memcpy_trailing_15bytes backwards, align
|
||||||
|
199:
|
||||||
|
pop {DAT3, DAT4, DAT5, DAT6, DAT7}
|
||||||
|
pop {D, DAT1, DAT2, pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcpy_medium_inner_loop backwards, align
|
||||||
|
120:
|
||||||
|
.if backwards
|
||||||
|
.if align == 0
|
||||||
|
ldmdb S!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
ldr LAST, [S, #-4]!
|
||||||
|
ldr DAT2, [S, #-4]!
|
||||||
|
ldr DAT1, [S, #-4]!
|
||||||
|
ldr DAT0, [S, #-4]!
|
||||||
|
.endif
|
||||||
|
stmdb D!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
.if align == 0
|
||||||
|
ldmia S!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
ldr DAT0, [S], #4
|
||||||
|
ldr DAT1, [S], #4
|
||||||
|
ldr DAT2, [S], #4
|
||||||
|
ldr LAST, [S], #4
|
||||||
|
.endif
|
||||||
|
stmia D!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.endif
|
||||||
|
subs N, N, #16
|
||||||
|
bhs 120b
|
||||||
|
/* Trailing words and bytes */
|
||||||
|
tst N, #15
|
||||||
|
beq 199f
|
||||||
|
memcpy_trailing_15bytes backwards, align
|
||||||
|
199:
|
||||||
|
pop {D, DAT1, DAT2, pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcpy_short_inner_loop backwards, align
|
||||||
|
tst N, #16
|
||||||
|
.if backwards
|
||||||
|
.if align == 0
|
||||||
|
ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
ldrne LAST, [S, #-4]!
|
||||||
|
ldrne DAT2, [S, #-4]!
|
||||||
|
ldrne DAT1, [S, #-4]!
|
||||||
|
ldrne DAT0, [S, #-4]!
|
||||||
|
.endif
|
||||||
|
stmnedb D!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
.if align == 0
|
||||||
|
ldmneia S!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.else
|
||||||
|
ldrne DAT0, [S], #4
|
||||||
|
ldrne DAT1, [S], #4
|
||||||
|
ldrne DAT2, [S], #4
|
||||||
|
ldrne LAST, [S], #4
|
||||||
|
.endif
|
||||||
|
stmneia D!, {DAT0, DAT1, DAT2, LAST}
|
||||||
|
.endif
|
||||||
|
memcpy_trailing_15bytes backwards, align
|
||||||
|
199:
|
||||||
|
pop {D, DAT1, DAT2, pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro memcpy backwards
|
||||||
|
D .req a1
|
||||||
|
S .req a2
|
||||||
|
N .req a3
|
||||||
|
DAT0 .req a4
|
||||||
|
DAT1 .req v1
|
||||||
|
DAT2 .req v2
|
||||||
|
DAT3 .req v3
|
||||||
|
DAT4 .req v4
|
||||||
|
DAT5 .req v5
|
||||||
|
DAT6 .req v6
|
||||||
|
DAT7 .req sl
|
||||||
|
LAST .req ip
|
||||||
|
OFF .req lr
|
||||||
|
|
||||||
|
.cfi_startproc
|
||||||
|
|
||||||
|
push {D, DAT1, DAT2, lr}
|
||||||
|
|
||||||
|
.cfi_def_cfa_offset 16
|
||||||
|
.cfi_rel_offset D, 0
|
||||||
|
.cfi_undefined S
|
||||||
|
.cfi_undefined N
|
||||||
|
.cfi_undefined DAT0
|
||||||
|
.cfi_rel_offset DAT1, 4
|
||||||
|
.cfi_rel_offset DAT2, 8
|
||||||
|
.cfi_undefined LAST
|
||||||
|
.cfi_rel_offset lr, 12
|
||||||
|
|
||||||
|
.if backwards
|
||||||
|
add D, D, N
|
||||||
|
add S, S, N
|
||||||
|
.endif
|
||||||
|
|
||||||
|
/* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
|
||||||
|
cmp N, #31
|
||||||
|
blo 170f
|
||||||
|
/* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
|
||||||
|
cmp N, #(prefetch_distance+3)*32 - 1
|
||||||
|
blo 160f
|
||||||
|
|
||||||
|
/* Long case */
|
||||||
|
push {DAT3, DAT4, DAT5, DAT6, DAT7}
|
||||||
|
|
||||||
|
.cfi_def_cfa_offset 36
|
||||||
|
.cfi_rel_offset D, 20
|
||||||
|
.cfi_rel_offset DAT1, 24
|
||||||
|
.cfi_rel_offset DAT2, 28
|
||||||
|
.cfi_rel_offset DAT3, 0
|
||||||
|
.cfi_rel_offset DAT4, 4
|
||||||
|
.cfi_rel_offset DAT5, 8
|
||||||
|
.cfi_rel_offset DAT6, 12
|
||||||
|
.cfi_rel_offset DAT7, 16
|
||||||
|
.cfi_rel_offset lr, 32
|
||||||
|
|
||||||
|
/* Adjust N so that the decrement instruction can also test for
|
||||||
|
* inner loop termination. We want it to stop when there are
|
||||||
|
* (prefetch_distance+1) complete blocks to go. */
|
||||||
|
sub N, N, #(prefetch_distance+2)*32
|
||||||
|
preload_leading_step1 backwards, DAT0, S
|
||||||
|
.if backwards
|
||||||
|
/* Bug in GAS: it accepts, but mis-assembles the instruction
|
||||||
|
* ands DAT2, D, #60, 2
|
||||||
|
* which sets DAT2 to the number of leading bytes until destination is aligned and also clears C (sets borrow)
|
||||||
|
*/
|
||||||
|
.word 0xE210513C
|
||||||
|
beq 154f
|
||||||
|
.else
|
||||||
|
ands DAT2, D, #15
|
||||||
|
beq 154f
|
||||||
|
rsb DAT2, DAT2, #16 /* number of leading bytes until destination aligned */
|
||||||
|
.endif
|
||||||
|
preload_leading_step2 backwards, DAT0, S, DAT2, OFF
|
||||||
|
memcpy_leading_15bytes backwards, 1
|
||||||
|
154: /* Destination now 16-byte aligned; we have at least one prefetch as well as at least one 16-byte output block */
|
||||||
|
/* Prefetch offset is best selected such that it lies in the first 8 of each 32 bytes - but it's just as easy to aim for the first one */
|
||||||
|
.if backwards
|
||||||
|
rsb OFF, S, #3
|
||||||
|
and OFF, OFF, #28
|
||||||
|
sub OFF, OFF, #32*(prefetch_distance+1)
|
||||||
|
.else
|
||||||
|
and OFF, S, #28
|
||||||
|
rsb OFF, OFF, #32*prefetch_distance
|
||||||
|
.endif
|
||||||
|
movs DAT0, S, lsl #31
|
||||||
|
bhi 157f
|
||||||
|
bcs 156f
|
||||||
|
bmi 155f
|
||||||
|
memcpy_long_inner_loop backwards, 0
|
||||||
|
155: memcpy_long_inner_loop backwards, 1
|
||||||
|
156: memcpy_long_inner_loop backwards, 2
|
||||||
|
157: memcpy_long_inner_loop backwards, 3
|
||||||
|
|
||||||
|
.cfi_def_cfa_offset 16
|
||||||
|
.cfi_rel_offset D, 0
|
||||||
|
.cfi_rel_offset DAT1, 4
|
||||||
|
.cfi_rel_offset DAT2, 8
|
||||||
|
.cfi_same_value DAT3
|
||||||
|
.cfi_same_value DAT4
|
||||||
|
.cfi_same_value DAT5
|
||||||
|
.cfi_same_value DAT6
|
||||||
|
.cfi_same_value DAT7
|
||||||
|
.cfi_rel_offset lr, 12
|
||||||
|
|
||||||
|
160: /* Medium case */
|
||||||
|
preload_all backwards, 0, 0, S, N, DAT2, OFF
|
||||||
|
sub N, N, #16 /* simplifies inner loop termination */
|
||||||
|
.if backwards
|
||||||
|
ands DAT2, D, #15
|
||||||
|
beq 164f
|
||||||
|
.else
|
||||||
|
ands DAT2, D, #15
|
||||||
|
beq 164f
|
||||||
|
rsb DAT2, DAT2, #16
|
||||||
|
.endif
|
||||||
|
memcpy_leading_15bytes backwards, align
|
||||||
|
164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
|
||||||
|
tst S, #3
|
||||||
|
bne 140f
|
||||||
|
memcpy_medium_inner_loop backwards, 0
|
||||||
|
140: memcpy_medium_inner_loop backwards, 1
|
||||||
|
|
||||||
|
170: /* Short case, less than 31 bytes, so no guarantee of at least one 16-byte block */
|
||||||
|
teq N, #0
|
||||||
|
beq 199f
|
||||||
|
preload_all backwards, 1, 0, S, N, DAT2, LAST
|
||||||
|
tst D, #3
|
||||||
|
beq 174f
|
||||||
|
172: subs N, N, #1
|
||||||
|
blo 199f
|
||||||
|
.if backwards
|
||||||
|
ldrb DAT0, [S, #-1]!
|
||||||
|
strb DAT0, [D, #-1]!
|
||||||
|
.else
|
||||||
|
ldrb DAT0, [S], #1
|
||||||
|
strb DAT0, [D], #1
|
||||||
|
.endif
|
||||||
|
tst D, #3
|
||||||
|
bne 172b
|
||||||
|
174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
|
||||||
|
tst S, #3
|
||||||
|
bne 140f
|
||||||
|
memcpy_short_inner_loop backwards, 0
|
||||||
|
140: memcpy_short_inner_loop backwards, 1
|
||||||
|
|
||||||
|
.cfi_endproc
|
||||||
|
|
||||||
|
.unreq D
|
||||||
|
.unreq S
|
||||||
|
.unreq N
|
||||||
|
.unreq DAT0
|
||||||
|
.unreq DAT1
|
||||||
|
.unreq DAT2
|
||||||
|
.unreq DAT3
|
||||||
|
.unreq DAT4
|
||||||
|
.unreq DAT5
|
||||||
|
.unreq DAT6
|
||||||
|
.unreq DAT7
|
||||||
|
.unreq LAST
|
||||||
|
.unreq OFF
|
||||||
|
.endm
|
||||||
61
arch/arm/lib/memmove_rpi.S
Normal file
61
arch/arm/lib/memmove_rpi.S
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||||||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
#include "arm-mem.h"
|
||||||
|
#include "memcpymove.h"
|
||||||
|
|
||||||
|
/* Prevent the stack from becoming executable */
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
.section .note.GNU-stack,"",%progbits
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.text
|
||||||
|
.arch armv6
|
||||||
|
.object_arch armv4
|
||||||
|
.arm
|
||||||
|
.altmacro
|
||||||
|
.p2align 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* void *memmove(void *s1, const void *s2, size_t n);
|
||||||
|
* On entry:
|
||||||
|
* a1 = pointer to destination
|
||||||
|
* a2 = pointer to source
|
||||||
|
* a3 = number of bytes to copy
|
||||||
|
* On exit:
|
||||||
|
* a1 preserved
|
||||||
|
*/
|
||||||
|
|
||||||
|
.set prefetch_distance, 3
|
||||||
|
|
||||||
|
ENTRY(memmove)
|
||||||
|
cmp a2, a1
|
||||||
|
bpl memcpy /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
|
||||||
|
memcpy 1
|
||||||
|
ENDPROC(memmove)
|
||||||
128
arch/arm/lib/memset_rpi.S
Normal file
128
arch/arm/lib/memset_rpi.S
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2013, Raspberry Pi Foundation
|
||||||
|
Copyright (c) 2013, RISC OS Open Ltd
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
#include "arm-mem.h"
|
||||||
|
|
||||||
|
/* Prevent the stack from becoming executable */
|
||||||
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
|
.section .note.GNU-stack,"",%progbits
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.text
|
||||||
|
.arch armv6
|
||||||
|
.object_arch armv4
|
||||||
|
.arm
|
||||||
|
.altmacro
|
||||||
|
.p2align 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* void *memset(void *s, int c, size_t n);
|
||||||
|
* On entry:
|
||||||
|
* a1 = pointer to buffer to fill
|
||||||
|
* a2 = byte pattern to fill with (caller-narrowed)
|
||||||
|
* a3 = number of bytes to fill
|
||||||
|
* On exit:
|
||||||
|
* a1 preserved
|
||||||
|
*/
|
||||||
|
ENTRY(mmioset)
|
||||||
|
ENTRY(memset)
|
||||||
|
ENTRY(__memset32)
|
||||||
|
ENTRY(__memset64)
|
||||||
|
|
||||||
|
S .req a1
|
||||||
|
DAT0 .req a2
|
||||||
|
N .req a3
|
||||||
|
DAT1 .req a4
|
||||||
|
DAT2 .req ip
|
||||||
|
DAT3 .req lr
|
||||||
|
|
||||||
|
orr DAT0, DAT0, DAT0, lsl #8
|
||||||
|
push {S, lr}
|
||||||
|
orr DAT0, DAT0, DAT0, lsl #16
|
||||||
|
mov DAT1, DAT0
|
||||||
|
|
||||||
|
/* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
|
||||||
|
cmp N, #31
|
||||||
|
blo 170f
|
||||||
|
|
||||||
|
161: sub N, N, #16 /* simplifies inner loop termination */
|
||||||
|
/* Leading words and bytes */
|
||||||
|
tst S, #15
|
||||||
|
beq 164f
|
||||||
|
rsb DAT3, S, #0 /* bits 0-3 = number of leading bytes until aligned */
|
||||||
|
movs DAT2, DAT3, lsl #31
|
||||||
|
submi N, N, #1
|
||||||
|
strmib DAT0, [S], #1
|
||||||
|
subcs N, N, #2
|
||||||
|
strcsh DAT0, [S], #2
|
||||||
|
movs DAT2, DAT3, lsl #29
|
||||||
|
submi N, N, #4
|
||||||
|
strmi DAT0, [S], #4
|
||||||
|
subcs N, N, #8
|
||||||
|
stmcsia S!, {DAT0, DAT1}
|
||||||
|
164: /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
|
||||||
|
mov DAT2, DAT0
|
||||||
|
mov DAT3, DAT0
|
||||||
|
/* Now the inner loop of 16-byte stores */
|
||||||
|
165: stmia S!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
subs N, N, #16
|
||||||
|
bhs 165b
|
||||||
|
166: /* Trailing words and bytes */
|
||||||
|
movs N, N, lsl #29
|
||||||
|
stmcsia S!, {DAT0, DAT1}
|
||||||
|
strmi DAT0, [S], #4
|
||||||
|
movs N, N, lsl #2
|
||||||
|
strcsh DAT0, [S], #2
|
||||||
|
strmib DAT0, [S]
|
||||||
|
199: pop {S, pc}
|
||||||
|
|
||||||
|
170: /* Short case */
|
||||||
|
mov DAT2, DAT0
|
||||||
|
mov DAT3, DAT0
|
||||||
|
tst S, #3
|
||||||
|
beq 174f
|
||||||
|
172: subs N, N, #1
|
||||||
|
blo 199b
|
||||||
|
strb DAT0, [S], #1
|
||||||
|
tst S, #3
|
||||||
|
bne 172b
|
||||||
|
174: tst N, #16
|
||||||
|
stmneia S!, {DAT0, DAT1, DAT2, DAT3}
|
||||||
|
b 166b
|
||||||
|
|
||||||
|
.unreq S
|
||||||
|
.unreq DAT0
|
||||||
|
.unreq N
|
||||||
|
.unreq DAT1
|
||||||
|
.unreq DAT2
|
||||||
|
.unreq DAT3
|
||||||
|
ENDPROC(__memset64)
|
||||||
|
ENDPROC(__memset32)
|
||||||
|
ENDPROC(memset)
|
||||||
|
ENDPROC(mmioset)
|
||||||
@@ -19,6 +19,14 @@
|
|||||||
#include <asm/current.h>
|
#include <asm/current.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
|
|
||||||
|
#ifndef COPY_FROM_USER_THRESHOLD
|
||||||
|
#define COPY_FROM_USER_THRESHOLD 64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef COPY_TO_USER_THRESHOLD
|
||||||
|
#define COPY_TO_USER_THRESHOLD 64
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
||||||
{
|
{
|
||||||
@@ -81,7 +89,44 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long noinline
|
static int
|
||||||
|
pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
||||||
|
{
|
||||||
|
unsigned long addr = (unsigned long)_addr;
|
||||||
|
pgd_t *pgd;
|
||||||
|
pmd_t *pmd;
|
||||||
|
pte_t *pte;
|
||||||
|
pud_t *pud;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
|
pgd = pgd_offset(current->mm, addr);
|
||||||
|
if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
pud = pud_offset(pgd, addr);
|
||||||
|
if (unlikely(pud_none(*pud) || pud_bad(*pud)))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pmd = pmd_offset(pud, addr);
|
||||||
|
if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
|
||||||
|
if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
|
||||||
|
pte_unmap_unlock(pte, ptl);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*ptep = pte;
|
||||||
|
*ptlp = ptl;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long noinline
|
||||||
__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
|
__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
|
||||||
{
|
{
|
||||||
unsigned long ua_flags;
|
unsigned long ua_flags;
|
||||||
@@ -134,6 +179,57 @@ out:
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long noinline
|
||||||
|
__copy_from_user_memcpy(void *to, const void __user *from, unsigned long n)
|
||||||
|
{
|
||||||
|
unsigned long ua_flags;
|
||||||
|
int atomic;
|
||||||
|
|
||||||
|
if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
|
||||||
|
memcpy(to, (const void *)from, n);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the mmap semaphore is taken only if not in an atomic context */
|
||||||
|
atomic = in_atomic();
|
||||||
|
|
||||||
|
if (!atomic)
|
||||||
|
down_read(¤t->mm->mmap_sem);
|
||||||
|
while (n) {
|
||||||
|
pte_t *pte;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
int tocopy;
|
||||||
|
|
||||||
|
while (!pin_page_for_read(from, &pte, &ptl)) {
|
||||||
|
char temp;
|
||||||
|
if (!atomic)
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
if (__get_user(temp, (char __user *)from))
|
||||||
|
goto out;
|
||||||
|
if (!atomic)
|
||||||
|
down_read(¤t->mm->mmap_sem);
|
||||||
|
}
|
||||||
|
|
||||||
|
tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1;
|
||||||
|
if (tocopy > n)
|
||||||
|
tocopy = n;
|
||||||
|
|
||||||
|
ua_flags = uaccess_save_and_enable();
|
||||||
|
memcpy(to, (const void *)from, tocopy);
|
||||||
|
uaccess_restore(ua_flags);
|
||||||
|
to += tocopy;
|
||||||
|
from += tocopy;
|
||||||
|
n -= tocopy;
|
||||||
|
|
||||||
|
pte_unmap_unlock(pte, ptl);
|
||||||
|
}
|
||||||
|
if (!atomic)
|
||||||
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
|
||||||
|
out:
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned long
|
unsigned long
|
||||||
arm_copy_to_user(void __user *to, const void *from, unsigned long n)
|
arm_copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||||
{
|
{
|
||||||
@@ -144,7 +240,7 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||||||
* With frame pointer disabled, tail call optimization kicks in
|
* With frame pointer disabled, tail call optimization kicks in
|
||||||
* as well making this test almost invisible.
|
* as well making this test almost invisible.
|
||||||
*/
|
*/
|
||||||
if (n < 64) {
|
if (n < COPY_TO_USER_THRESHOLD) {
|
||||||
unsigned long ua_flags = uaccess_save_and_enable();
|
unsigned long ua_flags = uaccess_save_and_enable();
|
||||||
n = __copy_to_user_std(to, from, n);
|
n = __copy_to_user_std(to, from, n);
|
||||||
uaccess_restore(ua_flags);
|
uaccess_restore(ua_flags);
|
||||||
@@ -155,6 +251,26 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long __must_check
|
||||||
|
arm_copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This test is stubbed out of the main function above to keep
|
||||||
|
* the overhead for small copies low by avoiding a large
|
||||||
|
* register dump on the stack just to reload them right away.
|
||||||
|
* With frame pointer disabled, tail call optimization kicks in
|
||||||
|
* as well making this test almost invisible.
|
||||||
|
*/
|
||||||
|
if (n < COPY_TO_USER_THRESHOLD) {
|
||||||
|
unsigned long ua_flags = uaccess_save_and_enable();
|
||||||
|
n = __copy_from_user_std(to, from, n);
|
||||||
|
uaccess_restore(ua_flags);
|
||||||
|
} else {
|
||||||
|
n = __copy_from_user_memcpy(to, from, n);
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned long noinline
|
static unsigned long noinline
|
||||||
__clear_user_memset(void __user *addr, unsigned long n)
|
__clear_user_memset(void __user *addr, unsigned long n)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -188,6 +188,13 @@ config ARCH_BCM_53573
|
|||||||
The base chip is BCM53573 and there are some packaging modifications
|
The base chip is BCM53573 and there are some packaging modifications
|
||||||
like BCM47189 and BCM47452.
|
like BCM47189 and BCM47452.
|
||||||
|
|
||||||
|
config BCM2835_FAST_MEMCPY
|
||||||
|
bool "Enable optimized __copy_to_user and __copy_from_user"
|
||||||
|
depends on ARCH_BCM2835 && ARCH_MULTI_V6
|
||||||
|
default y
|
||||||
|
help
|
||||||
|
Optimized versions of __copy_to_user and __copy_from_user for Pi1.
|
||||||
|
|
||||||
config ARCH_BCM_63XX
|
config ARCH_BCM_63XX
|
||||||
bool "Broadcom BCM63xx DSL SoC"
|
bool "Broadcom BCM63xx DSL SoC"
|
||||||
depends on ARCH_MULTI_V7
|
depends on ARCH_MULTI_V7
|
||||||
|
|||||||
Reference in New Issue
Block a user