Skip to content

Commit 235e52e

Browse files
committed
c18n: Introduce fast path in trampolines
These fast paths check if the compartment transition is a self-transition and if so, jump directly to the target without modifying the trusted stack. Calls through the PLT continue to use the old slow path as self-transitions can already be ruled out when relocating jump slots. Introduce the LD_COMPARTMENT_NO_FAST_PATH environment variable which forces all trampolines to use the slow path.
1 parent 1e8ec53 commit 235e52e

File tree

7 files changed

+127
-30
lines changed

7 files changed

+127
-30
lines changed

libexec/rtld-elf/aarch64/rtld_c18n_asm.S

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ ENTRY(create_untrusted_stk)
104104
/*
105105
* NON-STANDARD CALLING CONVENTION
106106
*
107-
* w19: Callee's compartment ID
107+
* w13: Callee's compartment ID
108108
* c26: Callee to be tail-called
109109
*
110110
* The function resolves the callee's stack, installs it, and tail-calls
@@ -123,7 +123,7 @@ ENTRY(create_untrusted_stk)
123123

124124
save_arguments
125125

126-
mov w0, w19
126+
mov w0, w13
127127
bl resolve_untrusted_stk_impl
128128
mov c10, c0
129129

@@ -166,16 +166,27 @@ ENTRY(tramp_hook)
166166
*
167167
* c10-c11 hold the first two arguments of tramp_hook.
168168
*
169-
* All argument registers and callee-saved registers must be preserved.
169+
* All argument registers, callee-saved registers, and arguments used
170+
* by the stack resolver must be preserved.
170171
*/
171172

172173
save_arguments
173174

175+
/*
176+
* Save arguments used by the stack resolver.
177+
*/
178+
mov w19, w13
179+
174180
mov c0, c10
175181
mov c1, c11
176182
mrs c2, TRUSTED_STACK
177183
bl tramp_hook_impl
178184

185+
/*
186+
* Restore arguments used by the stack resolver.
187+
*/
188+
mov w13, w19
189+
179190
restore_arguments
180191

181192
#ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
@@ -217,13 +228,54 @@ TRAMP(tramp_push_frame)
217228
*/
218229
ldp x10, x11, [TRUSTED_STACK_C, #TRUSTED_FRAME_CALLER]
219230
/*
220-
* Get the stack lookup table.
231+
* Extract the caller's compartment ID.
221232
*/
222-
mrs STACK_TABLE_C, STACK_TABLE
233+
ubfx x12, x10, #32, #16
234+
/*
235+
* Get the callee's compartment ID.
236+
*/
237+
1: movz w13, #0 /* To be patched at runtime */
238+
239+
/*
240+
* Tail-call the target if the caller's and callee's compartment IDs
241+
* match. Otherwise, go to the middle of the slow path.
242+
*/
243+
cmp w12, w13
244+
b.ne 5f
245+
/*
246+
* Load the target capability and overwrite c18.
247+
*/
248+
2: ldr c18, #0 /* To be patched at runtime */
249+
#ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
250+
br x18
251+
#else
252+
brr c18
253+
#endif
254+
255+
/*
256+
* Entry point of the slow path.
257+
*/
258+
3: mrs TRUSTED_STACK_C, TRUSTED_STACK
259+
260+
/*
261+
* Load the caller's compartment ID and the landing address from the
262+
* previous trusted frame.
263+
*/
264+
ldp x10, x11, [TRUSTED_STACK_C, #TRUSTED_FRAME_CALLER]
223265
/*
224266
* Extract the caller's compartment ID.
225267
*/
226268
ubfx x12, x10, #32, #16
269+
/*
270+
* Get the callee's compartment ID.
271+
*/
272+
4: movz w13, #0 /* To be patched at runtime */
273+
274+
5:
275+
/*
276+
* Get the stack lookup table.
277+
*/
278+
mrs STACK_TABLE_C, STACK_TABLE
227279
/*
228280
* Load the caller's old stack top from the stack lookup table.
229281
*/
@@ -239,21 +291,17 @@ TRAMP(tramp_push_frame)
239291
/*
240292
* Get the length of the stack lookup table.
241293
*/
242-
gclen x13, STACK_TABLE_C
294+
gclen x14, STACK_TABLE_C
243295

244296
stp c19, c20, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 2)]
245-
/*
246-
* Get the callee's compartment ID.
247-
*/
248-
1: movz w19, #0 /* To be patched at runtime */
249297
/*
250298
* Use subs instead of cmp to clear a capability tag.
251299
*/
252-
subs x14, x13, x19
300+
subs x19, x14, x13
253301
/*
254302
* If the stack lookup table index is out-of-bounds, set it to zero.
255303
*/
256-
csel w20, w19, wzr, hi
304+
csel w20, w13, wzr, hi
257305
/*
258306
* Load the callee's stack if the stack lookup table index is within
259307
* bounds. Otherwise the resolver will be loaded.
@@ -288,18 +336,18 @@ TRAMP(tramp_push_frame)
288336
/*
289337
* Get the landing address.
290338
*/
291-
2: adr c24, #0 /* To be patched at runtime */
339+
6: adr c24, #0 /* To be patched at runtime */
292340

293341
stp c25, c26, [TRUSTED_STACK_C, #(-CAP_WIDTH * TRUSTED_FRAME_SIZE + CAP_WIDTH * 8)]
294342
/*
295343
* Compute the number of return value registers. If the call is a tail-
296344
* call, it is the minimum of that of the caller and the callee.
297345
*/
298-
3: ubfm x25, x23, #48, #0 /* To be patched at runtime */
346+
7: ubfm x25, x23, #48, #0 /* To be patched at runtime */
299347
/*
300348
* Load the target capability.
301349
*/
302-
4: ldr c26, #0 /* To be patched at runtime */
350+
8: ldr c26, #0 /* To be patched at runtime */
303351

304352
/*
305353
* Save the caller's current stack top and old stack top.
@@ -347,10 +395,10 @@ TRAMP(tramp_push_frame)
347395
*/
348396
str x24, [TRUSTED_STACK_C, #TRUSTED_FRAME_LANDING]
349397
/*
350-
* Combine the caller's compartment ID and the number of return value
398+
* Combine the callee's compartment ID and the number of return value
351399
* registers.
352400
*/
353-
orr w24, w19, w25, lsl #16
401+
orr w24, w13, w25, lsl #16
354402
/*
355403
* Save the callee's compartment ID and the number of return value
356404
* registers.
@@ -369,10 +417,20 @@ TRAMP(tramp_push_frame)
369417
set_untrusted_stk c15
370418
TRAMPEND(tramp_push_frame)
371419

372-
PATCH_POINT(tramp_push_frame, cid, 1b)
373-
PATCH_POINT(tramp_push_frame, landing, 2b)
374-
PATCH_POINT(tramp_push_frame, n_rets, 3b)
375-
PATCH_POINT(tramp_push_frame, target, 4b)
420+
PATCH_POINT(tramp_push_frame, cid_fast, 1b)
421+
PATCH_POINT(tramp_push_frame, target_fast, 2b)
422+
PATCH_POINT(tramp_push_frame, cid, 4b)
423+
PATCH_POINT(tramp_push_frame, landing, 6b)
424+
PATCH_POINT(tramp_push_frame, n_rets, 7b)
425+
PATCH_POINT(tramp_push_frame, target, 8b)
426+
427+
.section .rodata
428+
.globl c18n_tramp_entry_slow_offset
429+
.balign 8
430+
.type c18n_tramp_entry_slow_offset,%object
431+
c18n_tramp_entry_slow_offset:
432+
.quad 3b - tramp_push_frame
433+
.size c18n_tramp_entry_slow_offset, . - c18n_tramp_entry_slow_offset
376434

377435
/*
378436
* Save the address of the current frame to c29 so that unwinders can locate it.

libexec/rtld-elf/aarch64/rtld_c18n_machdep.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ tramp_compile(char **entry, const struct tramp_data *data)
147147
callee = compart_id_for_address(data->defobj, (ptraddr_t)data->target);
148148

149149
COPY(push_frame);
150+
PATCH_MOV(push_frame, cid_fast, cid_to_index(callee).val);
151+
PATCH_LDR_IMM(push_frame, target_fast, target_off);
150152
PATCH_MOV(push_frame, cid, cid_to_index(callee).val);
151153
landing_off = PATCH_OFF(push_frame, landing);
152154
/*

libexec/rtld-elf/aarch64/rtld_c18n_machdep.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939

4040
#define SIG_FRAME_SIZE 1360
4141

42+
#define INST_ALIGN 4
43+
4244
#ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
4345
#define TRUSTED_STACK rddc_el0
4446
#define UNTRUSTED_STACK csp

libexec/rtld-elf/rtld.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ static struct ld_env_var_desc ld_env_vars[] = {
441441
LD_ENV_DESC(COMPARTMENT_UNWIND, false),
442442
LD_ENV_DESC(COMPARTMENT_STATS, false),
443443
LD_ENV_DESC(COMPARTMENT_SWITCH_COUNT, false),
444+
LD_ENV_DESC(COMPARTMENT_NO_FAST_PATH, false),
444445
#endif
445446
};
446447

@@ -852,6 +853,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp)
852853
ld_compartment_unwind = ld_get_env_var(LD_COMPARTMENT_UNWIND);
853854
ld_compartment_stats = ld_get_env_var(LD_COMPARTMENT_STATS);
854855
ld_compartment_switch_count = ld_get_env_var(LD_COMPARTMENT_SWITCH_COUNT);
856+
ld_compartment_no_fast_path = ld_get_env_var(LD_COMPARTMENT_NO_FAST_PATH);
855857
/*
856858
* DISABLE takes precedence over ENABLE.
857859
*/

libexec/rtld-elf/rtld.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ enum {
510510
LD_COMPARTMENT_UNWIND,
511511
LD_COMPARTMENT_STATS,
512512
LD_COMPARTMENT_SWITCH_COUNT,
513+
LD_COMPARTMENT_NO_FAST_PATH,
513514
#endif
514515
};
515516

libexec/rtld-elf/rtld_c18n.c

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,16 @@ const char *ld_compartment_stats;
145145
/* Export count of compartment switches to statistics */
146146
const char *ld_compartment_switch_count;
147147

148+
/* Do not use the fast paths in trampolines for self-transitions */
149+
const char *ld_compartment_no_fast_path;
150+
148151
/* Compartmentalisation information exported to the kernel */
149152
static struct cheri_c18n_info *c18n_info;
150153
struct rtld_c18n_stats *c18n_stats;
151154

155+
/* Offset of a trampoline's slow path entry point relative to the fast path */
156+
extern const size_t c18n_tramp_entry_slow_offset;
157+
152158
#define INC_NUM_COMPART (c18n_stats->rcs_compart++, comparts.size++)
153159
#define INC_NUM_BYTES(n) \
154160
atomic_fetch_add_explicit(&c18n_stats->rcs_bytes_total, (n), \
@@ -1438,10 +1444,12 @@ tramp_create(const struct tramp_data *data)
14381444
}
14391445

14401446
static void *
1441-
tramp_make_entry(struct tramp_header *header)
1447+
tramp_make_entry(struct tramp_header *header, bool slow)
14421448
{
1443-
void *entry = header->entry;
1449+
uint8_t *entry = header->entry;
14441450

1451+
if (ld_compartment_no_fast_path != NULL || slow)
1452+
entry += c18n_tramp_entry_slow_offset;
14451453
entry = cheri_clearperm(entry, FUNC_PTR_REMOVE_PERMS);
14461454
#ifndef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
14471455
entry = cheri_capmode(entry);
@@ -1597,7 +1605,7 @@ tramp_intern(const Plt_Entry *plt, compart_id_t caller,
15971605
* Defensive programming: if the requester supplies an untagged target
15981606
* capability, return an untagged trampoline.
15991607
*/
1600-
tramp_entry = tramp_make_entry(header);
1608+
tramp_entry = tramp_make_entry(header, plt != NULL);
16011609
if (!cheri_gettag(data->target))
16021610
tramp_entry = cheri_cleartag(tramp_entry);
16031611

@@ -1636,18 +1644,36 @@ tramp_reflect(const void *data)
16361644
#ifndef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
16371645
data = (const char *)data - 1;
16381646
#endif
1647+
/*
1648+
* INVARIANT: The pointer being reflected never points to before the
1649+
* function pointer entry point of the trampoline.
1650+
*
1651+
* When the fast path is enabled, the function pointer entry point is
1652+
* the first instruction of the trampoline. Otherwise, the function
1653+
* pointer entry point is `c18n_tramp_entry_slow_offset` bytes after
1654+
* the first instruction of the trampoline, and the fast path is never
1655+
* exposed. The return entry point is after either function pointer
1656+
* entry point.
1657+
*/
1658+
if (ld_compartment_no_fast_path != NULL)
1659+
data = (const char *)data - c18n_tramp_entry_slow_offset;
16391660
data = __containerof(data, struct tramp_header, entry);
16401661

16411662
for (page = atomic_load_explicit(&tramp_pgs.head, memory_order_acquire);
1642-
page != NULL; page = SLIST_NEXT(page, link)) {
1663+
page != NULL; page = SLIST_NEXT(page, link)) {
16431664
ret = cheri_buildcap(page, (uintptr_t)data);
16441665
if (!cheri_gettag(ret))
16451666
continue;
1646-
if (cheri_gettag(ret->defobj))
1667+
/*
1668+
* INVARIANT: The rederived pointer never points to before the
1669+
* actual trampoline header.
1670+
*/
1671+
if (__builtin_is_aligned(ret, _Alignof(typeof(*ret))) &&
1672+
cheri_gettag(ret->defobj))
16471673
/*
1648-
* At this point, the provided data must have been (a)
1649-
* tagged and (b) pointing to the entry point of a
1650-
* trampoline.
1674+
* If the rederived pointer is correctly aligned and
1675+
* the `defobj` field is tagged, then it must point to
1676+
* the actual trampoline header.
16511677
*/
16521678
return (ret);
16531679
else {

libexec/rtld-elf/rtld_c18n.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ extern const char *ld_compartment_sig;
4343
extern const char *ld_compartment_unwind;
4444
extern const char *ld_compartment_stats;
4545
extern const char *ld_compartment_switch_count;
46+
extern const char *ld_compartment_no_fast_path;
4647
extern struct rtld_c18n_stats *c18n_stats;
4748

4849
/*
@@ -207,10 +208,15 @@ struct tramp_header {
207208
* that the tagged value is visible to the trampoline when it is run.
208209
*/
209210
_Atomic(void *) target;
211+
/*
212+
* INVARIANT: This field must be the last tagged member of the
213+
* trampoline. Trampoline reflection relies on this to locate the
214+
* header.
215+
*/
210216
const Obj_Entry *defobj;
211217
size_t symnum;
212218
struct func_sig sig;
213-
uint32_t entry[];
219+
_Alignas(INST_ALIGN) uint8_t entry[];
214220
};
215221

216222
/*

0 commit comments

Comments
 (0)