mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-04 12:52:15 +00:00
953 lines
22 KiB
ArmAsm
953 lines
22 KiB
ArmAsm
/* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
|
|
|
|
/*-
|
|
* Copyright 2003 Wasabi Systems, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Written by Steve C. Woodford for Wasabi Systems, Inc.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed for the NetBSD Project by
|
|
* Wasabi Systems, Inc.
|
|
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
|
|
* or promote products derived from this software without specific prior
|
|
* written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <machine/asm.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
.text
|
|
.align 0
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
.Lcpu_info:
|
|
.word _C_LABEL(cpu_info)
|
|
#else
|
|
.Lcurpcb:
|
|
.word _C_LABEL(__pcpu) + PC_CURPCB
|
|
#endif
|
|
|
|
/*
|
|
* r0 = user space address
|
|
* r1 = kernel space address
|
|
* r2 = length
|
|
*
|
|
* Copies bytes from user space to kernel space
|
|
*/
|
|
ENTRY(copyin)
|
|
cmp r2, #0x00
|
|
movle r0, #0x00
|
|
movle pc, lr /* Bail early if length is <= 0 */
|
|
|
|
ldr r3, .L_arm_memcpy
|
|
ldr r3, [r3]
|
|
cmp r3, #0
|
|
beq .Lnormal
|
|
ldr r3, .L_min_memcpy_size
|
|
ldr r3, [r3]
|
|
cmp r2, r3
|
|
blt .Lnormal
|
|
stmfd sp!, {r0-r2, r4, lr}
|
|
mov r3, r0
|
|
mov r0, r1
|
|
mov r1, r3
|
|
mov r3, #2 /* SRC_IS_USER */
|
|
ldr r4, .L_arm_memcpy
|
|
mov lr, pc
|
|
ldr pc, [r4]
|
|
cmp r0, #0
|
|
ldmfd sp!, {r0-r2, r4, lr}
|
|
moveq r0, #0
|
|
RETeq
|
|
|
|
.Lnormal:
|
|
stmfd sp!, {r10-r11, lr}
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
/* XXX Probably not appropriate for non-Hydra SMPs */
|
|
stmfd sp!, {r0-r2}
|
|
bl _C_LABEL(cpu_number)
|
|
ldr r10, .Lcpu_info
|
|
ldmfd sp!, {r0-r2}
|
|
ldr r10, [r10, r0, lsl #2]
|
|
ldr r10, [r10, #CI_CURPCB]
|
|
#else
|
|
ldr r10, .Lcurpcb
|
|
ldr r10, [r10]
|
|
#endif
|
|
|
|
mov r3, #0x00
|
|
adr ip, .Lcopyin_fault
|
|
ldr r11, [r10, #PCB_ONFAULT]
|
|
str ip, [r10, #PCB_ONFAULT]
|
|
bl .Lcopyin_guts
|
|
str r11, [r10, #PCB_ONFAULT]
|
|
mov r0, #0x00
|
|
ldmfd sp!, {r10-r11, pc}
|
|
|
|
.Lcopyin_fault:
|
|
str r11, [r10, #PCB_ONFAULT]
|
|
cmp r3, #0x00
|
|
ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
|
|
ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
|
|
ldmfd sp!, {r10-r11, pc}
|
|
|
|
.Lcopyin_guts:
|
|
pld [r0]
|
|
/* Word-align the destination buffer */
|
|
ands ip, r1, #0x03 /* Already word aligned? */
|
|
beq .Lcopyin_wordaligned /* Yup */
|
|
rsb ip, ip, #0x04
|
|
cmp r2, ip /* Enough bytes left to align it? */
|
|
blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
|
|
sub r2, r2, ip
|
|
rsbs ip, ip, #0x03
|
|
addne pc, pc, ip, lsl #3
|
|
nop
|
|
ldrbt ip, [r0], #0x01
|
|
strb ip, [r1], #0x01
|
|
ldrbt ip, [r0], #0x01
|
|
strb ip, [r1], #0x01
|
|
ldrbt ip, [r0], #0x01
|
|
strb ip, [r1], #0x01
|
|
cmp r2, #0x00 /* All done? */
|
|
RETeq
|
|
|
|
/* Destination buffer is now word aligned */
|
|
.Lcopyin_wordaligned:
|
|
ands ip, r0, #0x03 /* Is src also word-aligned? */
|
|
bne .Lcopyin_bad_align /* Nope. Things just got bad */
|
|
cmp r2, #0x08 /* Less than 8 bytes remaining? */
|
|
blt .Lcopyin_w_less_than8
|
|
|
|
/* Quad-align the destination buffer */
|
|
tst r1, #0x07 /* Already quad aligned? */
|
|
ldrnet ip, [r0], #0x04
|
|
strne ip, [r1], #0x04
|
|
subne r2, r2, #0x04
|
|
stmfd sp!, {r4-r9} /* Free up some registers */
|
|
mov r3, #-1 /* Signal restore r4-r9 */
|
|
|
|
/* Destination buffer quad aligned, source is word aligned */
|
|
subs r2, r2, #0x80
|
|
blt .Lcopyin_w_lessthan128
|
|
|
|
/* Copy 128 bytes at a time */
|
|
.Lcopyin_w_loop128:
|
|
ldrt r4, [r0], #0x04 /* LD:00-03 */
|
|
ldrt r5, [r0], #0x04 /* LD:04-07 */
|
|
pld [r0, #0x18] /* Prefetch 0x20 */
|
|
ldrt r6, [r0], #0x04 /* LD:08-0b */
|
|
ldrt r7, [r0], #0x04 /* LD:0c-0f */
|
|
ldrt r8, [r0], #0x04 /* LD:10-13 */
|
|
ldrt r9, [r0], #0x04 /* LD:14-17 */
|
|
strd r4, [r1], #0x08 /* ST:00-07 */
|
|
ldrt r4, [r0], #0x04 /* LD:18-1b */
|
|
ldrt r5, [r0], #0x04 /* LD:1c-1f */
|
|
strd r6, [r1], #0x08 /* ST:08-0f */
|
|
ldrt r6, [r0], #0x04 /* LD:20-23 */
|
|
ldrt r7, [r0], #0x04 /* LD:24-27 */
|
|
pld [r0, #0x18] /* Prefetch 0x40 */
|
|
strd r8, [r1], #0x08 /* ST:10-17 */
|
|
ldrt r8, [r0], #0x04 /* LD:28-2b */
|
|
ldrt r9, [r0], #0x04 /* LD:2c-2f */
|
|
strd r4, [r1], #0x08 /* ST:18-1f */
|
|
ldrt r4, [r0], #0x04 /* LD:30-33 */
|
|
ldrt r5, [r0], #0x04 /* LD:34-37 */
|
|
strd r6, [r1], #0x08 /* ST:20-27 */
|
|
ldrt r6, [r0], #0x04 /* LD:38-3b */
|
|
ldrt r7, [r0], #0x04 /* LD:3c-3f */
|
|
strd r8, [r1], #0x08 /* ST:28-2f */
|
|
ldrt r8, [r0], #0x04 /* LD:40-43 */
|
|
ldrt r9, [r0], #0x04 /* LD:44-47 */
|
|
pld [r0, #0x18] /* Prefetch 0x60 */
|
|
strd r4, [r1], #0x08 /* ST:30-37 */
|
|
ldrt r4, [r0], #0x04 /* LD:48-4b */
|
|
ldrt r5, [r0], #0x04 /* LD:4c-4f */
|
|
strd r6, [r1], #0x08 /* ST:38-3f */
|
|
ldrt r6, [r0], #0x04 /* LD:50-53 */
|
|
ldrt r7, [r0], #0x04 /* LD:54-57 */
|
|
strd r8, [r1], #0x08 /* ST:40-47 */
|
|
ldrt r8, [r0], #0x04 /* LD:58-5b */
|
|
ldrt r9, [r0], #0x04 /* LD:5c-5f */
|
|
strd r4, [r1], #0x08 /* ST:48-4f */
|
|
ldrt r4, [r0], #0x04 /* LD:60-63 */
|
|
ldrt r5, [r0], #0x04 /* LD:64-67 */
|
|
pld [r0, #0x18] /* Prefetch 0x80 */
|
|
strd r6, [r1], #0x08 /* ST:50-57 */
|
|
ldrt r6, [r0], #0x04 /* LD:68-6b */
|
|
ldrt r7, [r0], #0x04 /* LD:6c-6f */
|
|
strd r8, [r1], #0x08 /* ST:58-5f */
|
|
ldrt r8, [r0], #0x04 /* LD:70-73 */
|
|
ldrt r9, [r0], #0x04 /* LD:74-77 */
|
|
strd r4, [r1], #0x08 /* ST:60-67 */
|
|
ldrt r4, [r0], #0x04 /* LD:78-7b */
|
|
ldrt r5, [r0], #0x04 /* LD:7c-7f */
|
|
strd r6, [r1], #0x08 /* ST:68-6f */
|
|
strd r8, [r1], #0x08 /* ST:70-77 */
|
|
subs r2, r2, #0x80
|
|
strd r4, [r1], #0x08 /* ST:78-7f */
|
|
bge .Lcopyin_w_loop128
|
|
|
|
.Lcopyin_w_lessthan128:
|
|
adds r2, r2, #0x80 /* Adjust for extra sub */
|
|
ldmeqfd sp!, {r4-r9}
|
|
RETeq
|
|
subs r2, r2, #0x20
|
|
blt .Lcopyin_w_lessthan32
|
|
|
|
/* Copy 32 bytes at a time */
|
|
.Lcopyin_w_loop32:
|
|
ldrt r4, [r0], #0x04
|
|
ldrt r5, [r0], #0x04
|
|
pld [r0, #0x18]
|
|
ldrt r6, [r0], #0x04
|
|
ldrt r7, [r0], #0x04
|
|
ldrt r8, [r0], #0x04
|
|
ldrt r9, [r0], #0x04
|
|
strd r4, [r1], #0x08
|
|
ldrt r4, [r0], #0x04
|
|
ldrt r5, [r0], #0x04
|
|
strd r6, [r1], #0x08
|
|
strd r8, [r1], #0x08
|
|
subs r2, r2, #0x20
|
|
strd r4, [r1], #0x08
|
|
bge .Lcopyin_w_loop32
|
|
|
|
.Lcopyin_w_lessthan32:
|
|
adds r2, r2, #0x20 /* Adjust for extra sub */
|
|
ldmeqfd sp!, {r4-r9}
|
|
RETeq /* Return now if done */
|
|
|
|
and r4, r2, #0x18
|
|
rsb r5, r4, #0x18
|
|
subs r2, r2, r4
|
|
add pc, pc, r5, lsl #1
|
|
nop
|
|
|
|
/* At least 24 bytes remaining */
|
|
ldrt r4, [r0], #0x04
|
|
ldrt r5, [r0], #0x04
|
|
nop
|
|
strd r4, [r1], #0x08
|
|
|
|
/* At least 16 bytes remaining */
|
|
ldrt r4, [r0], #0x04
|
|
ldrt r5, [r0], #0x04
|
|
nop
|
|
strd r4, [r1], #0x08
|
|
|
|
/* At least 8 bytes remaining */
|
|
ldrt r4, [r0], #0x04
|
|
ldrt r5, [r0], #0x04
|
|
nop
|
|
strd r4, [r1], #0x08
|
|
|
|
/* Less than 8 bytes remaining */
|
|
ldmfd sp!, {r4-r9}
|
|
RETeq /* Return now if done */
|
|
mov r3, #0x00
|
|
|
|
.Lcopyin_w_less_than8:
|
|
subs r2, r2, #0x04
|
|
ldrget ip, [r0], #0x04
|
|
strge ip, [r1], #0x04
|
|
RETeq /* Return now if done */
|
|
addlt r2, r2, #0x04
|
|
ldrbt ip, [r0], #0x01
|
|
cmp r2, #0x02
|
|
ldrgebt r2, [r0], #0x01
|
|
strb ip, [r1], #0x01
|
|
ldrgtbt ip, [r0]
|
|
strgeb r2, [r1], #0x01
|
|
strgtb ip, [r1]
|
|
RET
|
|
|
|
/*
|
|
* At this point, it has not been possible to word align both buffers.
|
|
* The destination buffer (r1) is word aligned, but the source buffer
|
|
* (r0) is not.
|
|
*/
|
|
.Lcopyin_bad_align:
|
|
stmfd sp!, {r4-r7}
|
|
mov r3, #0x01
|
|
bic r0, r0, #0x03
|
|
cmp ip, #2
|
|
ldrt ip, [r0], #0x04
|
|
bgt .Lcopyin_bad3
|
|
beq .Lcopyin_bad2
|
|
b .Lcopyin_bad1
|
|
|
|
.Lcopyin_bad1_loop16:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #8
|
|
#else
|
|
mov r4, ip, lsr #8
|
|
#endif
|
|
ldrt r5, [r0], #0x04
|
|
pld [r0, #0x018]
|
|
ldrt r6, [r0], #0x04
|
|
ldrt r7, [r0], #0x04
|
|
ldrt ip, [r0], #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, r5, lsr #24
|
|
mov r5, r5, lsl #8
|
|
orr r5, r5, r6, lsr #24
|
|
mov r6, r6, lsl #8
|
|
orr r6, r6, r7, lsr #24
|
|
mov r7, r7, lsl #8
|
|
orr r7, r7, ip, lsr #24
|
|
#else
|
|
orr r4, r4, r5, lsl #24
|
|
mov r5, r5, lsr #8
|
|
orr r5, r5, r6, lsl #24
|
|
mov r6, r6, lsr #8
|
|
orr r6, r6, r7, lsl #24
|
|
mov r7, r7, lsr #8
|
|
orr r7, r7, ip, lsl #24
|
|
#endif
|
|
str r4, [r1], #0x04
|
|
str r5, [r1], #0x04
|
|
str r6, [r1], #0x04
|
|
str r7, [r1], #0x04
|
|
.Lcopyin_bad1:
|
|
subs r2, r2, #0x10
|
|
bge .Lcopyin_bad1_loop16
|
|
|
|
adds r2, r2, #0x10
|
|
ldmeqfd sp!, {r4-r7}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x04
|
|
sublt r0, r0, #0x03
|
|
blt .Lcopyin_l4
|
|
|
|
.Lcopyin_bad1_loop4:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #8
|
|
#else
|
|
mov r4, ip, lsr #8
|
|
#endif
|
|
ldrt ip, [r0], #0x04
|
|
subs r2, r2, #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, ip, lsr #24
|
|
#else
|
|
orr r4, r4, ip, lsl #24
|
|
#endif
|
|
str r4, [r1], #0x04
|
|
bge .Lcopyin_bad1_loop4
|
|
sub r0, r0, #0x03
|
|
b .Lcopyin_l4
|
|
|
|
.Lcopyin_bad2_loop16:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #16
|
|
#else
|
|
mov r4, ip, lsr #16
|
|
#endif
|
|
ldrt r5, [r0], #0x04
|
|
pld [r0, #0x018]
|
|
ldrt r6, [r0], #0x04
|
|
ldrt r7, [r0], #0x04
|
|
ldrt ip, [r0], #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, r5, lsr #16
|
|
mov r5, r5, lsl #16
|
|
orr r5, r5, r6, lsr #16
|
|
mov r6, r6, lsl #16
|
|
orr r6, r6, r7, lsr #16
|
|
mov r7, r7, lsl #16
|
|
orr r7, r7, ip, lsr #16
|
|
#else
|
|
orr r4, r4, r5, lsl #16
|
|
mov r5, r5, lsr #16
|
|
orr r5, r5, r6, lsl #16
|
|
mov r6, r6, lsr #16
|
|
orr r6, r6, r7, lsl #16
|
|
mov r7, r7, lsr #16
|
|
orr r7, r7, ip, lsl #16
|
|
#endif
|
|
str r4, [r1], #0x04
|
|
str r5, [r1], #0x04
|
|
str r6, [r1], #0x04
|
|
str r7, [r1], #0x04
|
|
.Lcopyin_bad2:
|
|
subs r2, r2, #0x10
|
|
bge .Lcopyin_bad2_loop16
|
|
|
|
adds r2, r2, #0x10
|
|
ldmeqfd sp!, {r4-r7}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x04
|
|
sublt r0, r0, #0x02
|
|
blt .Lcopyin_l4
|
|
|
|
.Lcopyin_bad2_loop4:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #16
|
|
#else
|
|
mov r4, ip, lsr #16
|
|
#endif
|
|
ldrt ip, [r0], #0x04
|
|
subs r2, r2, #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, ip, lsr #16
|
|
#else
|
|
orr r4, r4, ip, lsl #16
|
|
#endif
|
|
str r4, [r1], #0x04
|
|
bge .Lcopyin_bad2_loop4
|
|
sub r0, r0, #0x02
|
|
b .Lcopyin_l4
|
|
|
|
.Lcopyin_bad3_loop16:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #24
|
|
#else
|
|
mov r4, ip, lsr #24
|
|
#endif
|
|
ldrt r5, [r0], #0x04
|
|
pld [r0, #0x018]
|
|
ldrt r6, [r0], #0x04
|
|
ldrt r7, [r0], #0x04
|
|
ldrt ip, [r0], #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, r5, lsr #8
|
|
mov r5, r5, lsl #24
|
|
orr r5, r5, r6, lsr #8
|
|
mov r6, r6, lsl #24
|
|
orr r6, r6, r7, lsr #8
|
|
mov r7, r7, lsl #24
|
|
orr r7, r7, ip, lsr #8
|
|
#else
|
|
orr r4, r4, r5, lsl #8
|
|
mov r5, r5, lsr #24
|
|
orr r5, r5, r6, lsl #8
|
|
mov r6, r6, lsr #24
|
|
orr r6, r6, r7, lsl #8
|
|
mov r7, r7, lsr #24
|
|
orr r7, r7, ip, lsl #8
|
|
#endif
|
|
str r4, [r1], #0x04
|
|
str r5, [r1], #0x04
|
|
str r6, [r1], #0x04
|
|
str r7, [r1], #0x04
|
|
.Lcopyin_bad3:
|
|
subs r2, r2, #0x10
|
|
bge .Lcopyin_bad3_loop16
|
|
|
|
adds r2, r2, #0x10
|
|
ldmeqfd sp!, {r4-r7}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x04
|
|
sublt r0, r0, #0x01
|
|
blt .Lcopyin_l4
|
|
|
|
.Lcopyin_bad3_loop4:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #24
|
|
#else
|
|
mov r4, ip, lsr #24
|
|
#endif
|
|
ldrt ip, [r0], #0x04
|
|
subs r2, r2, #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, ip, lsr #8
|
|
#else
|
|
orr r4, r4, ip, lsl #8
|
|
#endif
|
|
str r4, [r1], #0x04
|
|
bge .Lcopyin_bad3_loop4
|
|
sub r0, r0, #0x01
|
|
|
|
.Lcopyin_l4:
|
|
ldmfd sp!, {r4-r7}
|
|
mov r3, #0x00
|
|
adds r2, r2, #0x04
|
|
RETeq
|
|
.Lcopyin_l4_2:
|
|
rsbs r2, r2, #0x03
|
|
addne pc, pc, r2, lsl #3
|
|
nop
|
|
ldrbt ip, [r0], #0x01
|
|
strb ip, [r1], #0x01
|
|
ldrbt ip, [r0], #0x01
|
|
strb ip, [r1], #0x01
|
|
ldrbt ip, [r0]
|
|
strb ip, [r1]
|
|
RET
|
|
|
|
|
|
/*
|
|
* r0 = kernel space address
|
|
* r1 = user space address
|
|
* r2 = length
|
|
*
|
|
* Copies bytes from kernel space to user space
|
|
*/
|
|
ENTRY(copyout)
|
|
cmp r2, #0x00
|
|
movle r0, #0x00
|
|
movle pc, lr /* Bail early if length is <= 0 */
|
|
|
|
ldr r3, .L_arm_memcpy
|
|
ldr r3, [r3]
|
|
cmp r3, #0
|
|
beq .Lnormale
|
|
ldr r3, .L_min_memcpy_size
|
|
ldr r3, [r3]
|
|
cmp r2, r3
|
|
blt .Lnormale
|
|
stmfd sp!, {r0-r2, r4, lr}
|
|
mov r3, r0
|
|
mov r0, r1
|
|
mov r1, r3
|
|
mov r3, #1 /* DST_IS_USER */
|
|
ldr r4, .L_arm_memcpy
|
|
mov lr, pc
|
|
ldr pc, [r4]
|
|
cmp r0, #0
|
|
ldmfd sp!, {r0-r2, r4, lr}
|
|
moveq r0, #0
|
|
RETeq
|
|
|
|
.Lnormale:
|
|
stmfd sp!, {r10-r11, lr}
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
/* XXX Probably not appropriate for non-Hydra SMPs */
|
|
stmfd sp!, {r0-r2}
|
|
bl _C_LABEL(cpu_number)
|
|
ldr r10, .Lcpu_info
|
|
ldmfd sp!, {r0-r2}
|
|
ldr r10, [r10, r0, lsl #2]
|
|
ldr r10, [r10, #CI_CURPCB]
|
|
#else
|
|
ldr r10, .Lcurpcb
|
|
ldr r10, [r10]
|
|
#endif
|
|
|
|
mov r3, #0x00
|
|
adr ip, .Lcopyout_fault
|
|
ldr r11, [r10, #PCB_ONFAULT]
|
|
str ip, [r10, #PCB_ONFAULT]
|
|
bl .Lcopyout_guts
|
|
str r11, [r10, #PCB_ONFAULT]
|
|
mov r0, #0x00
|
|
ldmfd sp!, {r10-r11, pc}
|
|
|
|
.Lcopyout_fault:
|
|
str r11, [r10, #PCB_ONFAULT]
|
|
cmp r3, #0x00
|
|
ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
|
|
ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
|
|
ldmfd sp!, {r10-r11, pc}
|
|
|
|
.Lcopyout_guts:
|
|
pld [r0]
|
|
/* Word-align the destination buffer */
|
|
ands ip, r1, #0x03 /* Already word aligned? */
|
|
beq .Lcopyout_wordaligned /* Yup */
|
|
rsb ip, ip, #0x04
|
|
cmp r2, ip /* Enough bytes left to align it? */
|
|
blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
|
|
sub r2, r2, ip
|
|
rsbs ip, ip, #0x03
|
|
addne pc, pc, ip, lsl #3
|
|
nop
|
|
ldrb ip, [r0], #0x01
|
|
strbt ip, [r1], #0x01
|
|
ldrb ip, [r0], #0x01
|
|
strbt ip, [r1], #0x01
|
|
ldrb ip, [r0], #0x01
|
|
strbt ip, [r1], #0x01
|
|
cmp r2, #0x00 /* All done? */
|
|
RETeq
|
|
|
|
/* Destination buffer is now word aligned */
|
|
.Lcopyout_wordaligned:
|
|
ands ip, r0, #0x03 /* Is src also word-aligned? */
|
|
bne .Lcopyout_bad_align /* Nope. Things just got bad */
|
|
cmp r2, #0x08 /* Less than 8 bytes remaining? */
|
|
blt .Lcopyout_w_less_than8
|
|
|
|
/* Quad-align the destination buffer */
|
|
tst r0, #0x07 /* Already quad aligned? */
|
|
ldrne ip, [r0], #0x04
|
|
subne r2, r2, #0x04
|
|
strnet ip, [r1], #0x04
|
|
|
|
stmfd sp!, {r4-r9} /* Free up some registers */
|
|
mov r3, #-1 /* Signal restore r4-r9 */
|
|
|
|
/* Destination buffer word aligned, source is quad aligned */
|
|
subs r2, r2, #0x80
|
|
blt .Lcopyout_w_lessthan128
|
|
|
|
/* Copy 128 bytes at a time */
|
|
.Lcopyout_w_loop128:
|
|
ldrd r4, [r0], #0x08 /* LD:00-07 */
|
|
pld [r0, #0x18] /* Prefetch 0x20 */
|
|
ldrd r6, [r0], #0x08 /* LD:08-0f */
|
|
ldrd r8, [r0], #0x08 /* LD:10-17 */
|
|
strt r4, [r1], #0x04 /* ST:00-03 */
|
|
strt r5, [r1], #0x04 /* ST:04-07 */
|
|
ldrd r4, [r0], #0x08 /* LD:18-1f */
|
|
strt r6, [r1], #0x04 /* ST:08-0b */
|
|
strt r7, [r1], #0x04 /* ST:0c-0f */
|
|
ldrd r6, [r0], #0x08 /* LD:20-27 */
|
|
pld [r0, #0x18] /* Prefetch 0x40 */
|
|
strt r8, [r1], #0x04 /* ST:10-13 */
|
|
strt r9, [r1], #0x04 /* ST:14-17 */
|
|
ldrd r8, [r0], #0x08 /* LD:28-2f */
|
|
strt r4, [r1], #0x04 /* ST:18-1b */
|
|
strt r5, [r1], #0x04 /* ST:1c-1f */
|
|
ldrd r4, [r0], #0x08 /* LD:30-37 */
|
|
strt r6, [r1], #0x04 /* ST:20-23 */
|
|
strt r7, [r1], #0x04 /* ST:24-27 */
|
|
ldrd r6, [r0], #0x08 /* LD:38-3f */
|
|
strt r8, [r1], #0x04 /* ST:28-2b */
|
|
strt r9, [r1], #0x04 /* ST:2c-2f */
|
|
ldrd r8, [r0], #0x08 /* LD:40-47 */
|
|
pld [r0, #0x18] /* Prefetch 0x60 */
|
|
strt r4, [r1], #0x04 /* ST:30-33 */
|
|
strt r5, [r1], #0x04 /* ST:34-37 */
|
|
ldrd r4, [r0], #0x08 /* LD:48-4f */
|
|
strt r6, [r1], #0x04 /* ST:38-3b */
|
|
strt r7, [r1], #0x04 /* ST:3c-3f */
|
|
ldrd r6, [r0], #0x08 /* LD:50-57 */
|
|
strt r8, [r1], #0x04 /* ST:40-43 */
|
|
strt r9, [r1], #0x04 /* ST:44-47 */
|
|
ldrd r8, [r0], #0x08 /* LD:58-4f */
|
|
strt r4, [r1], #0x04 /* ST:48-4b */
|
|
strt r5, [r1], #0x04 /* ST:4c-4f */
|
|
ldrd r4, [r0], #0x08 /* LD:60-67 */
|
|
pld [r0, #0x18] /* Prefetch 0x80 */
|
|
strt r6, [r1], #0x04 /* ST:50-53 */
|
|
strt r7, [r1], #0x04 /* ST:54-57 */
|
|
ldrd r6, [r0], #0x08 /* LD:68-6f */
|
|
strt r8, [r1], #0x04 /* ST:58-5b */
|
|
strt r9, [r1], #0x04 /* ST:5c-5f */
|
|
ldrd r8, [r0], #0x08 /* LD:70-77 */
|
|
strt r4, [r1], #0x04 /* ST:60-63 */
|
|
strt r5, [r1], #0x04 /* ST:64-67 */
|
|
ldrd r4, [r0], #0x08 /* LD:78-7f */
|
|
strt r6, [r1], #0x04 /* ST:68-6b */
|
|
strt r7, [r1], #0x04 /* ST:6c-6f */
|
|
strt r8, [r1], #0x04 /* ST:70-73 */
|
|
strt r9, [r1], #0x04 /* ST:74-77 */
|
|
subs r2, r2, #0x80
|
|
strt r4, [r1], #0x04 /* ST:78-7b */
|
|
strt r5, [r1], #0x04 /* ST:7c-7f */
|
|
bge .Lcopyout_w_loop128
|
|
|
|
.Lcopyout_w_lessthan128:
|
|
adds r2, r2, #0x80 /* Adjust for extra sub */
|
|
ldmeqfd sp!, {r4-r9}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x20
|
|
blt .Lcopyout_w_lessthan32
|
|
|
|
/* Copy 32 bytes at a time */
|
|
.Lcopyout_w_loop32:
|
|
ldrd r4, [r0], #0x08
|
|
pld [r0, #0x18]
|
|
ldrd r6, [r0], #0x08
|
|
ldrd r8, [r0], #0x08
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
ldrd r4, [r0], #0x08
|
|
strt r6, [r1], #0x04
|
|
strt r7, [r1], #0x04
|
|
strt r8, [r1], #0x04
|
|
strt r9, [r1], #0x04
|
|
subs r2, r2, #0x20
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
bge .Lcopyout_w_loop32
|
|
|
|
.Lcopyout_w_lessthan32:
|
|
adds r2, r2, #0x20 /* Adjust for extra sub */
|
|
ldmeqfd sp!, {r4-r9}
|
|
RETeq /* Return now if done */
|
|
|
|
and r4, r2, #0x18
|
|
rsb r5, r4, #0x18
|
|
subs r2, r2, r4
|
|
add pc, pc, r5, lsl #1
|
|
nop
|
|
|
|
/* At least 24 bytes remaining */
|
|
ldrd r4, [r0], #0x08
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
nop
|
|
|
|
/* At least 16 bytes remaining */
|
|
ldrd r4, [r0], #0x08
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
nop
|
|
|
|
/* At least 8 bytes remaining */
|
|
ldrd r4, [r0], #0x08
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
nop
|
|
|
|
/* Less than 8 bytes remaining */
|
|
ldmfd sp!, {r4-r9}
|
|
RETeq /* Return now if done */
|
|
mov r3, #0x00
|
|
|
|
.Lcopyout_w_less_than8:
|
|
subs r2, r2, #0x04
|
|
ldrge ip, [r0], #0x04
|
|
strget ip, [r1], #0x04
|
|
RETeq /* Return now if done */
|
|
addlt r2, r2, #0x04
|
|
ldrb ip, [r0], #0x01
|
|
cmp r2, #0x02
|
|
ldrgeb r2, [r0], #0x01
|
|
strbt ip, [r1], #0x01
|
|
ldrgtb ip, [r0]
|
|
strgebt r2, [r1], #0x01
|
|
strgtbt ip, [r1]
|
|
RET
|
|
|
|
/*
|
|
* At this point, it has not been possible to word align both buffers.
|
|
* The destination buffer (r1) is word aligned, but the source buffer
|
|
* (r0) is not.
|
|
*/
|
|
.Lcopyout_bad_align:
|
|
stmfd sp!, {r4-r7}
|
|
mov r3, #0x01
|
|
bic r0, r0, #0x03
|
|
cmp ip, #2
|
|
ldr ip, [r0], #0x04
|
|
bgt .Lcopyout_bad3
|
|
beq .Lcopyout_bad2
|
|
b .Lcopyout_bad1
|
|
|
|
.Lcopyout_bad1_loop16:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #8
|
|
#else
|
|
mov r4, ip, lsr #8
|
|
#endif
|
|
ldr r5, [r0], #0x04
|
|
pld [r0, #0x018]
|
|
ldr r6, [r0], #0x04
|
|
ldr r7, [r0], #0x04
|
|
ldr ip, [r0], #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, r5, lsr #24
|
|
mov r5, r5, lsl #8
|
|
orr r5, r5, r6, lsr #24
|
|
mov r6, r6, lsl #8
|
|
orr r6, r6, r7, lsr #24
|
|
mov r7, r7, lsl #8
|
|
orr r7, r7, ip, lsr #24
|
|
#else
|
|
orr r4, r4, r5, lsl #24
|
|
mov r5, r5, lsr #8
|
|
orr r5, r5, r6, lsl #24
|
|
mov r6, r6, lsr #8
|
|
orr r6, r6, r7, lsl #24
|
|
mov r7, r7, lsr #8
|
|
orr r7, r7, ip, lsl #24
|
|
#endif
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
strt r6, [r1], #0x04
|
|
strt r7, [r1], #0x04
|
|
.Lcopyout_bad1:
|
|
subs r2, r2, #0x10
|
|
bge .Lcopyout_bad1_loop16
|
|
|
|
adds r2, r2, #0x10
|
|
ldmeqfd sp!, {r4-r7}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x04
|
|
sublt r0, r0, #0x03
|
|
blt .Lcopyout_l4
|
|
|
|
.Lcopyout_bad1_loop4:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #8
|
|
#else
|
|
mov r4, ip, lsr #8
|
|
#endif
|
|
ldr ip, [r0], #0x04
|
|
subs r2, r2, #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, ip, lsr #24
|
|
#else
|
|
orr r4, r4, ip, lsl #24
|
|
#endif
|
|
strt r4, [r1], #0x04
|
|
bge .Lcopyout_bad1_loop4
|
|
sub r0, r0, #0x03
|
|
b .Lcopyout_l4
|
|
|
|
.Lcopyout_bad2_loop16:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #16
|
|
#else
|
|
mov r4, ip, lsr #16
|
|
#endif
|
|
ldr r5, [r0], #0x04
|
|
pld [r0, #0x018]
|
|
ldr r6, [r0], #0x04
|
|
ldr r7, [r0], #0x04
|
|
ldr ip, [r0], #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, r5, lsr #16
|
|
mov r5, r5, lsl #16
|
|
orr r5, r5, r6, lsr #16
|
|
mov r6, r6, lsl #16
|
|
orr r6, r6, r7, lsr #16
|
|
mov r7, r7, lsl #16
|
|
orr r7, r7, ip, lsr #16
|
|
#else
|
|
orr r4, r4, r5, lsl #16
|
|
mov r5, r5, lsr #16
|
|
orr r5, r5, r6, lsl #16
|
|
mov r6, r6, lsr #16
|
|
orr r6, r6, r7, lsl #16
|
|
mov r7, r7, lsr #16
|
|
orr r7, r7, ip, lsl #16
|
|
#endif
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
strt r6, [r1], #0x04
|
|
strt r7, [r1], #0x04
|
|
.Lcopyout_bad2:
|
|
subs r2, r2, #0x10
|
|
bge .Lcopyout_bad2_loop16
|
|
|
|
adds r2, r2, #0x10
|
|
ldmeqfd sp!, {r4-r7}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x04
|
|
sublt r0, r0, #0x02
|
|
blt .Lcopyout_l4
|
|
|
|
.Lcopyout_bad2_loop4:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #16
|
|
#else
|
|
mov r4, ip, lsr #16
|
|
#endif
|
|
ldr ip, [r0], #0x04
|
|
subs r2, r2, #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, ip, lsr #16
|
|
#else
|
|
orr r4, r4, ip, lsl #16
|
|
#endif
|
|
strt r4, [r1], #0x04
|
|
bge .Lcopyout_bad2_loop4
|
|
sub r0, r0, #0x02
|
|
b .Lcopyout_l4
|
|
|
|
.Lcopyout_bad3_loop16:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #24
|
|
#else
|
|
mov r4, ip, lsr #24
|
|
#endif
|
|
ldr r5, [r0], #0x04
|
|
pld [r0, #0x018]
|
|
ldr r6, [r0], #0x04
|
|
ldr r7, [r0], #0x04
|
|
ldr ip, [r0], #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, r5, lsr #8
|
|
mov r5, r5, lsl #24
|
|
orr r5, r5, r6, lsr #8
|
|
mov r6, r6, lsl #24
|
|
orr r6, r6, r7, lsr #8
|
|
mov r7, r7, lsl #24
|
|
orr r7, r7, ip, lsr #8
|
|
#else
|
|
orr r4, r4, r5, lsl #8
|
|
mov r5, r5, lsr #24
|
|
orr r5, r5, r6, lsl #8
|
|
mov r6, r6, lsr #24
|
|
orr r6, r6, r7, lsl #8
|
|
mov r7, r7, lsr #24
|
|
orr r7, r7, ip, lsl #8
|
|
#endif
|
|
strt r4, [r1], #0x04
|
|
strt r5, [r1], #0x04
|
|
strt r6, [r1], #0x04
|
|
strt r7, [r1], #0x04
|
|
.Lcopyout_bad3:
|
|
subs r2, r2, #0x10
|
|
bge .Lcopyout_bad3_loop16
|
|
|
|
adds r2, r2, #0x10
|
|
ldmeqfd sp!, {r4-r7}
|
|
RETeq /* Return now if done */
|
|
subs r2, r2, #0x04
|
|
sublt r0, r0, #0x01
|
|
blt .Lcopyout_l4
|
|
|
|
.Lcopyout_bad3_loop4:
|
|
#ifdef __ARMEB__
|
|
mov r4, ip, lsl #24
|
|
#else
|
|
mov r4, ip, lsr #24
|
|
#endif
|
|
ldr ip, [r0], #0x04
|
|
subs r2, r2, #0x04
|
|
#ifdef __ARMEB__
|
|
orr r4, r4, ip, lsr #8
|
|
#else
|
|
orr r4, r4, ip, lsl #8
|
|
#endif
|
|
strt r4, [r1], #0x04
|
|
bge .Lcopyout_bad3_loop4
|
|
sub r0, r0, #0x01
|
|
|
|
.Lcopyout_l4:
|
|
ldmfd sp!, {r4-r7}
|
|
mov r3, #0x00
|
|
adds r2, r2, #0x04
|
|
RETeq
|
|
.Lcopyout_l4_2:
|
|
rsbs r2, r2, #0x03
|
|
addne pc, pc, r2, lsl #3
|
|
nop
|
|
ldrb ip, [r0], #0x01
|
|
strbt ip, [r1], #0x01
|
|
ldrb ip, [r0], #0x01
|
|
strbt ip, [r1], #0x01
|
|
ldrb ip, [r0]
|
|
strbt ip, [r1]
|
|
RET
|