Skip to content

Commit

Permalink
Made ROM padding code ~42% faster.
Browse files Browse the repository at this point in the history
  • Loading branch information
profi200 committed Jul 26, 2024
1 parent 3a70cdc commit 41db876
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 10 deletions.
34 changes: 34 additions & 0 deletions include/arm11/fast_rom_padding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#pragma once

/*
* This file is part of open_agb_firm
* Copyright (C) 2024 profi200
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include "types.h"



#ifdef __cplusplus
extern "C"
{
#endif

void makeOpenBusPaddingFast(u32 *romEnd);

#ifdef __cplusplus
} // extern "C"
#endif
60 changes: 60 additions & 0 deletions source/arm11/fast_rom_padding.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
@ This file is part of open_agb_firm
@ Copyright (C) 2024 profi200
@
@ This program is free software: you can redistribute it and/or modify
@ it under the terms of the GNU General Public License as published by
@ the Free Software Foundation, either version 3 of the License, or
@ (at your option) any later version.
@
@ This program is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ GNU General Public License for more details.
@
@ You should have received a copy of the GNU General Public License
@ along with this program. If not, see <http://www.gnu.org/licenses/>.

#include "asm_macros.h"

.syntax unified
.cpu mpcore
.fpu vfpv2



@ void makeOpenBusPaddingFast(u32 *romEnd);
BEGIN_ASM_FUNC makeOpenBusPaddingFast
@ Save registers and calculate size from start and highest ROM address.
stmfd sp!, {r4, lr} @ Save registers.
rsb r1, r0, #0x22000000 @ r1 = 0x22000000 - r0;

@ Generate pattern halves from address.
lsr r2, r0, #1 @ r2 = r0>>1;
add r3, r2, #1 @ r3 = r2 + 1;

@ Generate constant for incrementing the pattern halves.
mov r12, #2 @ r12 = 2;
add r12, r12, #0x20000 @ r12 += 0x20000;

@ Join pattern halves and precalculate the next 3 patterns.
pkhbt r2, r2, r3, lsl #16 @ r2 = (r2 & 0xFFFF) | r3<<16;
uadd16 r3, r2, r12 @ r3 = ((r2 + 0x20000) & 0xFFFF0000) | ((r2 + 2) & 0xFFFF); // r12 is 0x20002.
uadd16 r4, r3, r12 @ r4 = ((r3 + 0x20000) & 0xFFFF0000) | ((r3 + 2) & 0xFFFF); // r12 is 0x20002.
uadd16 lr, r4, r12 @ lr = ((r4 + 0x20000) & 0xFFFF0000) | ((r4 + 2) & 0xFFFF); // r12 is 0x20002.

@ Adjust constant for unrolled loop. 0x20002 --> 0x80008.
lsl r12, r12, #2 @ r12 <<= 2;
makeOpenBusPaddingFast_blk_lp:
@ Store 16 pattern bytes at a time and decrement size.
stmia r0!, {r2-r4, lr} @ *((_16BytesBlock*)r0) = r2_to_r4_lr; r0 += 16;
subs r1, r1, #16 @ r1 -= 16; // Updates flags.

@ Increment patterns and jump back if we are not done yet.
uadd16 r2, r2, r12 @ r2 = ((r2 + 0x80000) & 0xFFFF0000) | ((r2 + 8) & 0xFFFF); // r12 is 0x80008.
uadd16 r3, r3, r12 @ r3 = ((r3 + 0x80000) & 0xFFFF0000) | ((r3 + 8) & 0xFFFF); // r12 is 0x80008.
uadd16 r4, r4, r12 @ r3 = ((r4 + 0x80000) & 0xFFFF0000) | ((r4 + 8) & 0xFFFF); // r12 is 0x80008.
uadd16 lr, lr, r12 @ lr = ((lr + 0x80000) & 0xFFFF0000) | ((lr + 8) & 0xFFFF); // r12 is 0x80008.
bne makeOpenBusPaddingFast_blk_lp @ if(r1 != 0) goto makeOpenBusPaddingFast_blk_lp;

ldmfd sp!, {r4, pc} @ Restore registers and return.
END_ASM_FUNC
14 changes: 4 additions & 10 deletions source/arm11/open_agb_firm.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <string.h>
#include "types.h"
#include "util.h"
#include "arm_intrinsic.h"
#include "arm11/fast_rom_padding.h"
#include "oaf_error_codes.h"
#include "fs.h"
#include "arm11/fmt.h"
Expand Down Expand Up @@ -49,9 +49,9 @@ static u32 fixRomPadding(const u32 romFileSize)
// Pad unused ROM area with 0xFFs (trimmed ROMs).
// Smallest retail ROM chip is 8 Mbit (1 MiB).
u32 romSize = nextPow2(romFileSize);
if(romSize < 0x100000) romSize = 0x100000;
romSize = (romSize < 0x100000 ? 0x100000 : romSize);
const uintptr_t romLoc = LGY_ROM_LOC;
memset((void*)(romLoc + romFileSize), 0xFFFFFFFF, romSize - romFileSize);
memset((void*)(romLoc + romFileSize), 0xFF, romSize - romFileSize);

u32 mirroredSize = romSize;
if(romSize == 0x100000) // 1 MiB.
Expand All @@ -69,13 +69,7 @@ static u32 fixRomPadding(const u32 romFileSize)
}

// Fake "open bus" padding.
u32 padding = (romLoc + mirroredSize) / 2;
padding = __pkhbt(padding, padding + 1, 16); // Copy lower half + 1 to upper half.
for(uintptr_t i = romLoc + mirroredSize; i < romLoc + LGY_MAX_ROM_SIZE; i += 4)
{
*(u32*)i = padding;
padding = __uadd16(padding, 0x20002); // Unsigned parallel halfword-wise addition.
}
makeOpenBusPaddingFast((u32*)(romLoc + mirroredSize));

// We don't return the mirrored size because the db hashes are over unmirrored dumps.
return romSize;
Expand Down

0 comments on commit 41db876

Please sign in to comment.