Skip to content

Commit 11741f9

Browse files
committed
Add new opcodes: MOVE_LAST, CAR_LAST, CDR_LAST, CONS_MOVE
Implement optimize_moves() pass in Chunk that walks code backward Track jump targets to be conservative about control flow Add Value::take() for O(1) move semantics Add take_car()/take_cdr() for cons cell destructuring
1 parent deaa8ec commit 11741f9

File tree

4 files changed

+528
-0
lines changed

4 files changed

+528
-0
lines changed

src/bytecode.rs

Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ impl std::fmt::Debug for Op {
7171
Self::JUMP_IF_NIL => write!(f, "JumpIfNil({}, {})", self.a(), self.sbx()),
7272
Self::JUMP_IF_NOT_NIL => write!(f, "JumpIfNotNil({}, {})", self.a(), self.sbx()),
7373
Self::CONS => write!(f, "Cons({}, {}, {})", self.a(), self.b(), self.c()),
74+
Self::MOVE_LAST => write!(f, "MoveLast({}, {})", self.a(), self.b()),
75+
Self::CAR_LAST => write!(f, "CarLast({}, {})", self.a(), self.b()),
76+
Self::CDR_LAST => write!(f, "CdrLast({}, {})", self.a(), self.b()),
77+
Self::CONS_MOVE => write!(f, "ConsMove({}, {}, {})", self.a(), self.b(), self.c()),
7478
_ => write!(f, "Unknown(0x{:08x})", self.0),
7579
}
7680
}
@@ -133,6 +137,15 @@ impl Op {
133137
// Specialized cons opcode (very common in list construction)
134138
pub const CONS: u8 = 50; // ABC: dest, car, cdr - create cons cell
135139

140+
// Move-semantics variants (last-use optimization)
141+
// These opcodes move from source instead of cloning (source becomes nil)
142+
pub const MOVE_LAST: u8 = 51; // AB: dest, src - move (don't clone) from src
143+
pub const CAR_LAST: u8 = 52; // AB: dest, src - move car (don't clone list)
144+
pub const CDR_LAST: u8 = 53; // AB: dest, src - move cdr (don't clone list)
145+
// For CONS, we use high bit of car/cdr registers to indicate move
146+
// If B & 0x80, move from car register (B & 0x7F); if C & 0x80, move from cdr register (C & 0x7F)
147+
pub const CONS_MOVE: u8 = 54; // ABC: dest, car|0x80?, cdr|0x80? - cons with optional moves
148+
136149
// ========== Constructors ==========
137150

138151
/// Create ABC format instruction: [opcode:8][A:8][B:8][C:8]
@@ -454,6 +467,36 @@ impl Op {
454467
Self::abc(Self::CONS, dest, car, cdr)
455468
}
456469

470+
// ========== Move-semantics variants (last-use optimization) ==========
471+
472+
/// Move from src to dest (source becomes nil after move)
473+
#[inline(always)]
474+
pub const fn move_last(dest: Reg, src: Reg) -> Self {
475+
Self::abc(Self::MOVE_LAST, dest, src, 0)
476+
}
477+
478+
/// Car with move semantics (source list is consumed)
479+
#[inline(always)]
480+
pub const fn car_last(dest: Reg, src: Reg) -> Self {
481+
Self::abc(Self::CAR_LAST, dest, src, 0)
482+
}
483+
484+
/// Cdr with move semantics (source list is consumed)
485+
#[inline(always)]
486+
pub const fn cdr_last(dest: Reg, src: Reg) -> Self {
487+
Self::abc(Self::CDR_LAST, dest, src, 0)
488+
}
489+
490+
/// Cons with optional move semantics
491+
/// move_car: if true, move from car register instead of clone
492+
/// move_cdr: if true, move from cdr register instead of clone
493+
#[inline(always)]
494+
pub const fn cons_move(dest: Reg, car: Reg, cdr: Reg, move_car: bool, move_cdr: bool) -> Self {
495+
let car_with_flag = if move_car { car | 0x80 } else { car };
496+
let cdr_with_flag = if move_cdr { cdr | 0x80 } else { cdr };
497+
Self::abc(Self::CONS_MOVE, dest, car_with_flag, cdr_with_flag)
498+
}
499+
457500
// ========== Jump patching helpers ==========
458501

459502
/// Check if this is a jump instruction (for patching)
@@ -514,6 +557,305 @@ impl Chunk {
514557
}
515558
}
516559

560+
/// Perform liveness analysis and upgrade opcodes to move variants where profitable.
561+
/// This is a backward dataflow analysis that tracks which registers are "live"
562+
/// (will be used later). When a register is used for the last time, we can
563+
/// use move semantics instead of clone.
564+
pub fn optimize_moves(&mut self) {
565+
if self.code.is_empty() {
566+
return;
567+
}
568+
569+
// First, recursively optimize nested function prototypes
570+
for proto in &mut self.protos {
571+
proto.optimize_moves();
572+
}
573+
574+
// Step 1: Find all jump targets (instructions that can be jumped to)
575+
// At jump targets, we need to be conservative because registers might
576+
// be live from a different path
577+
let mut jump_targets: Vec<bool> = vec![false; self.code.len()];
578+
for (i, op) in self.code.iter().enumerate() {
579+
let opcode = op.opcode();
580+
// Check if this is a jump instruction and mark the target
581+
if opcode == Op::JUMP || opcode == Op::JUMP_IF_FALSE || opcode == Op::JUMP_IF_TRUE
582+
|| opcode == Op::JUMP_IF_NIL || opcode == Op::JUMP_IF_NOT_NIL
583+
{
584+
let offset = op.sbx() as isize;
585+
let target = (i as isize + 1 + offset) as usize;
586+
if target < self.code.len() {
587+
jump_targets[target] = true;
588+
}
589+
} else if opcode >= Op::JUMP_IF_LT && opcode <= Op::JUMP_IF_GE_IMM {
590+
// These use i8 offset in C byte
591+
let offset = op.c() as i8 as isize;
592+
let target = (i as isize + 1 + offset) as usize;
593+
if target < self.code.len() {
594+
jump_targets[target] = true;
595+
}
596+
}
597+
}
598+
599+
// Step 2: Compute "ever_live" - registers that are ever used in the function
600+
// This is needed to be conservative at join points
601+
let mut ever_live: u128 = 0;
602+
for op in self.code.iter() {
603+
let opcode = op.opcode();
604+
// Add all registers that are read by any instruction
605+
match opcode {
606+
Op::MOVE | Op::CAR | Op::CDR | Op::NEG | Op::NOT => {
607+
ever_live |= 1u128 << op.b();
608+
}
609+
Op::ADD | Op::SUB | Op::MUL | Op::DIV | Op::MOD |
610+
Op::LT | Op::LE | Op::GT | Op::GE | Op::EQ | Op::NE |
611+
Op::CONS | Op::GET_LIST => {
612+
ever_live |= 1u128 << op.b();
613+
ever_live |= 1u128 << op.c();
614+
}
615+
Op::ADD_IMM | Op::SUB_IMM |
616+
Op::LT_IMM | Op::LE_IMM | Op::GT_IMM | Op::GE_IMM => {
617+
ever_live |= 1u128 << op.b();
618+
}
619+
Op::SET_GLOBAL | Op::RETURN => {
620+
ever_live |= 1u128 << op.a();
621+
}
622+
Op::JUMP_IF_FALSE | Op::JUMP_IF_TRUE |
623+
Op::JUMP_IF_NIL | Op::JUMP_IF_NOT_NIL => {
624+
ever_live |= 1u128 << op.a();
625+
}
626+
Op::JUMP_IF_LT | Op::JUMP_IF_LE | Op::JUMP_IF_GT | Op::JUMP_IF_GE => {
627+
ever_live |= 1u128 << op.a();
628+
ever_live |= 1u128 << op.b();
629+
}
630+
Op::JUMP_IF_LT_IMM | Op::JUMP_IF_LE_IMM |
631+
Op::JUMP_IF_GT_IMM | Op::JUMP_IF_GE_IMM => {
632+
ever_live |= 1u128 << op.a();
633+
}
634+
_ => {}
635+
}
636+
}
637+
638+
// Track which registers are live at each point (bitset for efficiency)
639+
// We use u128 to support up to 128 registers (more than enough)
640+
let mut live: u128 = 0;
641+
642+
// Walk backward through the code
643+
for i in (0..self.code.len()).rev() {
644+
// At jump targets, be conservative: all ever-used registers might be live
645+
if jump_targets[i] {
646+
live |= ever_live;
647+
}
648+
649+
let op = self.code[i];
650+
let opcode = op.opcode();
651+
652+
match opcode {
653+
// ===== AB-format instructions that can use move semantics =====
654+
655+
Op::MOVE => {
656+
let dest = op.a();
657+
let src = op.b();
658+
// If src is not live after this instruction, use move semantics
659+
let src_live_after = (live & (1u128 << src)) != 0;
660+
// Update liveness: dest is now dead (overwritten), src is now live
661+
live &= !(1u128 << dest);
662+
live |= 1u128 << src;
663+
// Upgrade to MOVE_LAST if src was not live (this is its last use)
664+
if !src_live_after && src != dest {
665+
self.code[i] = Op::move_last(dest, src);
666+
}
667+
}
668+
669+
Op::CAR => {
670+
let dest = op.a();
671+
let src = op.b();
672+
let src_live_after = (live & (1u128 << src)) != 0;
673+
live &= !(1u128 << dest);
674+
live |= 1u128 << src;
675+
if !src_live_after && src != dest {
676+
self.code[i] = Op::car_last(dest, src);
677+
}
678+
}
679+
680+
Op::CDR => {
681+
let dest = op.a();
682+
let src = op.b();
683+
let src_live_after = (live & (1u128 << src)) != 0;
684+
live &= !(1u128 << dest);
685+
live |= 1u128 << src;
686+
if !src_live_after && src != dest {
687+
self.code[i] = Op::cdr_last(dest, src);
688+
}
689+
}
690+
691+
Op::CONS => {
692+
let dest = op.a();
693+
let car = op.b();
694+
let cdr = op.c();
695+
let car_live_after = (live & (1u128 << car)) != 0;
696+
let cdr_live_after = (live & (1u128 << cdr)) != 0;
697+
live &= !(1u128 << dest);
698+
live |= 1u128 << car;
699+
live |= 1u128 << cdr;
700+
// Upgrade to CONS_MOVE if either car or cdr is last use
701+
let move_car = !car_live_after && car != dest;
702+
let move_cdr = !cdr_live_after && cdr != dest;
703+
if move_car || move_cdr {
704+
self.code[i] = Op::cons_move(dest, car, cdr, move_car, move_cdr);
705+
}
706+
}
707+
708+
// ===== Other AB-format instructions (read src, write dest) =====
709+
710+
Op::NEG | Op::NOT => {
711+
let dest = op.a();
712+
let src = op.b();
713+
live &= !(1u128 << dest);
714+
live |= 1u128 << src;
715+
}
716+
717+
// ===== ABC-format arithmetic/comparison (read B, C; write A) =====
718+
719+
Op::ADD | Op::SUB | Op::MUL | Op::DIV | Op::MOD |
720+
Op::LT | Op::LE | Op::GT | Op::GE | Op::EQ | Op::NE => {
721+
let dest = op.a();
722+
let b = op.b();
723+
let c = op.c();
724+
live &= !(1u128 << dest);
725+
live |= 1u128 << b;
726+
live |= 1u128 << c;
727+
}
728+
729+
// ===== Immediate variants (read B only) =====
730+
731+
Op::ADD_IMM | Op::SUB_IMM |
732+
Op::LT_IMM | Op::LE_IMM | Op::GT_IMM | Op::GE_IMM => {
733+
let dest = op.a();
734+
let src = op.b();
735+
live &= !(1u128 << dest);
736+
live |= 1u128 << src;
737+
}
738+
739+
// ===== Load instructions (write only) =====
740+
741+
Op::LOAD_CONST | Op::LOAD_NIL | Op::LOAD_TRUE | Op::LOAD_FALSE |
742+
Op::GET_GLOBAL | Op::CLOSURE => {
743+
let dest = op.a();
744+
live &= !(1u128 << dest);
745+
}
746+
747+
// ===== Store instructions (read only) =====
748+
749+
Op::SET_GLOBAL | Op::RETURN => {
750+
let src = op.a();
751+
live |= 1u128 << src;
752+
}
753+
754+
// ===== Jump instructions (conditionally read) =====
755+
756+
Op::JUMP => {
757+
// Unconditional jump reads nothing
758+
// Note: For proper analysis we'd need to handle control flow
759+
// This simplified version treats code as linear (conservative for loops)
760+
}
761+
762+
Op::JUMP_IF_FALSE | Op::JUMP_IF_TRUE |
763+
Op::JUMP_IF_NIL | Op::JUMP_IF_NOT_NIL => {
764+
let reg = op.a();
765+
live |= 1u128 << reg;
766+
}
767+
768+
Op::JUMP_IF_LT | Op::JUMP_IF_LE | Op::JUMP_IF_GT | Op::JUMP_IF_GE => {
769+
let left = op.a();
770+
let right = op.b();
771+
live |= 1u128 << left;
772+
live |= 1u128 << right;
773+
}
774+
775+
Op::JUMP_IF_LT_IMM | Op::JUMP_IF_LE_IMM |
776+
Op::JUMP_IF_GT_IMM | Op::JUMP_IF_GE_IMM => {
777+
let src = op.a();
778+
live |= 1u128 << src;
779+
}
780+
781+
// ===== Call instructions (complex, treat all args as live) =====
782+
783+
Op::CALL => {
784+
let dest = op.a();
785+
let func = op.b();
786+
let nargs = op.c();
787+
live &= !(1u128 << dest);
788+
live |= 1u128 << func;
789+
// Args are in consecutive registers after func
790+
for j in 0..nargs {
791+
live |= 1u128 << (func + 1 + j);
792+
}
793+
}
794+
795+
Op::CALL_GLOBAL => {
796+
let dest = op.a();
797+
let nargs = op.c();
798+
live &= !(1u128 << dest);
799+
// Args are in consecutive registers after dest
800+
for j in 0..nargs {
801+
live |= 1u128 << (dest + 1 + j);
802+
}
803+
}
804+
805+
Op::TAIL_CALL => {
806+
let func = op.a();
807+
let nargs = op.b();
808+
live |= 1u128 << func;
809+
for j in 0..nargs {
810+
live |= 1u128 << (func + 1 + j);
811+
}
812+
}
813+
814+
Op::TAIL_CALL_GLOBAL => {
815+
let first_arg = op.b();
816+
let nargs = op.c();
817+
for j in 0..nargs {
818+
live |= 1u128 << (first_arg + j);
819+
}
820+
}
821+
822+
// ===== List operations =====
823+
824+
Op::NEW_LIST => {
825+
let dest = op.a();
826+
let nargs = op.b();
827+
live &= !(1u128 << dest);
828+
for j in 0..nargs {
829+
live |= 1u128 << (dest + 1 + j);
830+
}
831+
}
832+
833+
Op::GET_LIST => {
834+
let dest = op.a();
835+
let list = op.b();
836+
let index = op.c();
837+
live &= !(1u128 << dest);
838+
live |= 1u128 << list;
839+
live |= 1u128 << index;
840+
}
841+
842+
Op::SET_LIST => {
843+
let list = op.a();
844+
let index = op.b();
845+
let value = op.c();
846+
live |= 1u128 << list;
847+
live |= 1u128 << index;
848+
live |= 1u128 << value;
849+
}
850+
851+
// Already upgraded move variants - shouldn't appear in initial code
852+
Op::MOVE_LAST | Op::CAR_LAST | Op::CDR_LAST | Op::CONS_MOVE => {}
853+
854+
_ => {}
855+
}
856+
}
857+
}
858+
517859
pub fn add_constant(&mut self, value: Value) -> ConstIdx {
518860
// Fast path: check specialized indexes for common types (O(1))
519861
if let Some(n) = value.as_int() {

src/compiler.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,8 @@ impl Compiler {
372372
compiler.compile_expr(expr, dest, true)?;
373373
compiler.emit(Op::ret(dest));
374374
compiler.chunk.num_registers = compiler.locals.len().max(1) as u8 + 16; // extra for temps
375+
// Optimize move semantics using liveness analysis
376+
compiler.chunk.optimize_moves();
375377
Ok(compiler.chunk)
376378
}
377379

@@ -394,6 +396,8 @@ impl Compiler {
394396

395397
compiler.emit(Op::ret(dest));
396398
compiler.chunk.num_registers = compiler.locals.len().max(1) as u8 + 16;
399+
// Optimize move semantics using liveness analysis
400+
compiler.chunk.optimize_moves();
397401
Ok(compiler.chunk)
398402
}
399403

@@ -410,6 +414,8 @@ impl Compiler {
410414
compiler.compile_expr(body, dest, true)?;
411415
compiler.emit(Op::ret(dest));
412416
compiler.chunk.num_registers = compiler.locals.len().max(1) as u8 + 16;
417+
// Note: Don't call optimize_moves() here - it's called recursively from
418+
// parent chunk's optimize_moves() to ensure each proto is only optimized once
413419
Ok(compiler.chunk)
414420
}
415421

0 commit comments

Comments
 (0)