Skip to content

Commit 41db876

Browse files
committed
Made ROM padding code ~42% faster.
1 parent 3a70cdc commit 41db876

File tree

3 files changed

+98
-10
lines changed

3 files changed

+98
-10
lines changed

include/arm11/fast_rom_padding.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#pragma once
2+
3+
/*
4+
* This file is part of open_agb_firm
5+
* Copyright (C) 2024 profi200
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
21+
#include "types.h"
22+
23+
24+
25+
#ifdef __cplusplus
26+
extern "C"
27+
{
28+
#endif
29+
30+
void makeOpenBusPaddingFast(u32 *romEnd);
31+
32+
#ifdef __cplusplus
33+
} // extern "C"
34+
#endif

source/arm11/fast_rom_padding.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
@ This file is part of open_agb_firm
2+
@ Copyright (C) 2024 profi200
3+
@
4+
@ This program is free software: you can redistribute it and/or modify
5+
@ it under the terms of the GNU General Public License as published by
6+
@ the Free Software Foundation, either version 3 of the License, or
7+
@ (at your option) any later version.
8+
@
9+
@ This program is distributed in the hope that it will be useful,
10+
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
@ GNU General Public License for more details.
13+
@
14+
@ You should have received a copy of the GNU General Public License
15+
@ along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
#include "asm_macros.h"
18+
19+
.syntax unified
20+
.cpu mpcore
21+
.fpu vfpv2
22+
23+
24+
25+
@ void makeOpenBusPaddingFast(u32 *romEnd);
26+
BEGIN_ASM_FUNC makeOpenBusPaddingFast
27+
@ Save registers and calculate size from start and highest ROM address.
28+
stmfd sp!, {r4, lr} @ Save registers.
29+
rsb r1, r0, #0x22000000 @ r1 = 0x22000000 - r0;
30+
31+
@ Generate pattern halves from address.
32+
lsr r2, r0, #1 @ r2 = r0>>1;
33+
add r3, r2, #1 @ r3 = r2 + 1;
34+
35+
@ Generate constant for incrementing the pattern halves.
36+
mov r12, #2 @ r12 = 2;
37+
add r12, r12, #0x20000 @ r12 += 0x20000;
38+
39+
@ Join pattern halves and precalculate the next 3 patterns.
40+
pkhbt r2, r2, r3, lsl #16 @ r2 = (r2 & 0xFFFF) | r3<<16;
41+
uadd16 r3, r2, r12 @ r3 = ((r2 + 0x20000) & 0xFFFF0000) | ((r2 + 2) & 0xFFFF); // r12 is 0x20002.
42+
uadd16 r4, r3, r12 @ r4 = ((r3 + 0x20000) & 0xFFFF0000) | ((r3 + 2) & 0xFFFF); // r12 is 0x20002.
43+
uadd16 lr, r4, r12 @ lr = ((r4 + 0x20000) & 0xFFFF0000) | ((r4 + 2) & 0xFFFF); // r12 is 0x20002.
44+
45+
@ Adjust constant for unrolled loop. 0x20002 --> 0x80008.
46+
lsl r12, r12, #2 @ r12 <<= 2;
47+
makeOpenBusPaddingFast_blk_lp:
48+
@ Store 16 pattern bytes at a time and decrement size.
49+
stmia r0!, {r2-r4, lr} @ *((_16BytesBlock*)r0) = r2_to_r4_lr; r0 += 16;
50+
subs r1, r1, #16 @ r1 -= 16; // Updates flags.
51+
52+
@ Increment patterns and jump back if we are not done yet.
53+
uadd16 r2, r2, r12 @ r2 = ((r2 + 0x80000) & 0xFFFF0000) | ((r2 + 8) & 0xFFFF); // r12 is 0x80008.
54+
uadd16 r3, r3, r12 @ r3 = ((r3 + 0x80000) & 0xFFFF0000) | ((r3 + 8) & 0xFFFF); // r12 is 0x80008.
55+
uadd16 r4, r4, r12 @ r3 = ((r4 + 0x80000) & 0xFFFF0000) | ((r4 + 8) & 0xFFFF); // r12 is 0x80008.
56+
uadd16 lr, lr, r12 @ lr = ((lr + 0x80000) & 0xFFFF0000) | ((lr + 8) & 0xFFFF); // r12 is 0x80008.
57+
bne makeOpenBusPaddingFast_blk_lp @ if(r1 != 0) goto makeOpenBusPaddingFast_blk_lp;
58+
59+
ldmfd sp!, {r4, pc} @ Restore registers and return.
60+
END_ASM_FUNC

source/arm11/open_agb_firm.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include <string.h>
2121
#include "types.h"
2222
#include "util.h"
23-
#include "arm_intrinsic.h"
23+
#include "arm11/fast_rom_padding.h"
2424
#include "oaf_error_codes.h"
2525
#include "fs.h"
2626
#include "arm11/fmt.h"
@@ -49,9 +49,9 @@ static u32 fixRomPadding(const u32 romFileSize)
4949
// Pad unused ROM area with 0xFFs (trimmed ROMs).
5050
// Smallest retail ROM chip is 8 Mbit (1 MiB).
5151
u32 romSize = nextPow2(romFileSize);
52-
if(romSize < 0x100000) romSize = 0x100000;
52+
romSize = (romSize < 0x100000 ? 0x100000 : romSize);
5353
const uintptr_t romLoc = LGY_ROM_LOC;
54-
memset((void*)(romLoc + romFileSize), 0xFFFFFFFF, romSize - romFileSize);
54+
memset((void*)(romLoc + romFileSize), 0xFF, romSize - romFileSize);
5555

5656
u32 mirroredSize = romSize;
5757
if(romSize == 0x100000) // 1 MiB.
@@ -69,13 +69,7 @@ static u32 fixRomPadding(const u32 romFileSize)
6969
}
7070

7171
// Fake "open bus" padding.
72-
u32 padding = (romLoc + mirroredSize) / 2;
73-
padding = __pkhbt(padding, padding + 1, 16); // Copy lower half + 1 to upper half.
74-
for(uintptr_t i = romLoc + mirroredSize; i < romLoc + LGY_MAX_ROM_SIZE; i += 4)
75-
{
76-
*(u32*)i = padding;
77-
padding = __uadd16(padding, 0x20002); // Unsigned parallel halfword-wise addition.
78-
}
72+
makeOpenBusPaddingFast((u32*)(romLoc + mirroredSize));
7973

8074
// We don't return the mirrored size because the db hashes are over unmirrored dumps.
8175
return romSize;

0 commit comments

Comments
 (0)