Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
feat: general improvement + sub/cmp/and/or...
Browse files Browse the repository at this point in the history
  • Loading branch information
julio4 committed May 14, 2024
1 parent 9cf9e4f commit 55c9af9
Show file tree
Hide file tree
Showing 15 changed files with 615 additions and 205 deletions.
2 changes: 2 additions & 0 deletions src/disassembler/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub enum ParseError {
CorruptedData,
InvalidOpcode(u8),
UnexpectedEOF,
InvalidModRM,
}

impl std::fmt::Display for ParseError {
Expand All @@ -13,6 +14,7 @@ impl std::fmt::Display for ParseError {
ParseError::CorruptedData => write!(f, "Corrupted data"),
ParseError::InvalidOpcode(opcode) => write!(f, "Invalid opcode: {:#04x}", opcode),
ParseError::UnexpectedEOF => write!(f, "Unexpected end of file"),
ParseError::InvalidModRM => write!(f, "Invalid ModRM byte"),
}
}
}
66 changes: 0 additions & 66 deletions src/disassembler/instruction.rs

This file was deleted.

68 changes: 68 additions & 0 deletions src/disassembler/instruction/instruction.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use crate::disassembler::instruction::Operand;

#[derive(Debug, PartialEq)]
pub enum IR {
Mov { dest: Operand, src: Operand },
Int { int_type: u8 },
Add { dest: Operand, src: Operand },
Sub { dest: Operand, src: Operand },
Ssb { dest: Operand, src: Operand },
Cmp { dest: Operand, src: Operand },
And { dest: Operand, src: Operand },
Or { dest: Operand, src: Operand },
Xor { dest: Operand, src: Operand },
Lea { dest: Operand, src: Operand },
Lds { dest: Operand, src: Operand },
Les { dest: Operand, src: Operand },
}

impl std::fmt::Display for IR {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
IR::Mov { dest, src } => write!(f, "mov {}, {}", dest, src),
IR::Int { int_type } => {
if *int_type == 3 {
write!(f, "int")
} else {
write!(f, "int {:02x}", int_type)
}
}
IR::Add { dest, src } => write!(f, "add {}, {}", dest, src),
IR::Sub { dest, src } => write!(f, "sub {}, {}", dest, src),
IR::Ssb { dest, src } => write!(f, "ssb {}, {}", dest, src),
IR::Cmp { dest, src } => write!(f, "cmp {}, {}", dest, src),
IR::And { dest, src } => write!(f, "and {}, {}", dest, src),
IR::Or { dest, src } => write!(f, "or {}, {}", dest, src),
IR::Xor { dest, src } => write!(f, "xor {}, {}", dest, src),
IR::Lea { dest, src } => write!(f, "lea {}, {}", dest, src),
IR::Lds { dest, src } => write!(f, "lds {}, {}", dest, src),
IR::Les { dest, src } => write!(f, "les {}, {}", dest, src),
}
}
}

#[derive(Debug, PartialEq)]
pub struct Instruction {
pub ir: IR,
pub raw: Vec<u8>,
}

impl Instruction {
pub fn new(ir: IR, raw: Vec<u8>) -> Self {
Instruction { ir, raw }
}
}

impl std::fmt::Display for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}\t {}",
self.raw
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
self.ir
)
}
}
5 changes: 5 additions & 0 deletions src/disassembler/instruction/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod instruction;
pub mod operand;

pub use instruction::{Instruction, IR};
pub use operand::Operand;
94 changes: 94 additions & 0 deletions src/disassembler/instruction/operand.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
use crate::disassembler::{error::ParseError, Memory, Register};

#[derive(Debug, PartialEq)]
pub enum Operand {
Register(Register),
Immediate(u8),
LongImmediate(u16),
Memory(Memory),
}

impl Operand {
/// Parse a ModRM byte and return the corresponding operand.
/// This can consume additional bytes from the instruction stream.
pub fn parse_modrm(
mod_: u8,
rm: u8,
bytes: &[u8],
w: bool,
) -> Result<(Operand, usize), ParseError> {
match mod_ {
0b11 => Ok((Operand::Register(Register::from(rm, w)), 0)),
0b00 => {
// Special case *: EA = disp-high;disp-low
if rm == 0b110 {
// parse next 2 bytes
if bytes.len() < 3 {
return Err(ParseError::UnexpectedEOF);
}
Ok((
Operand::Memory(Memory {
base: None,
index: None,
disp_low: bytes[1],
disp_high: Some(bytes[2]),
}),
2,
))
} else {
Ok((
Operand::Memory(Memory {
base: Register::get_base(rm),
index: Register::get_index(rm),
disp_low: 0,
disp_high: None,
}),
0,
))
}
}
0b01 => {
// parse next byte
if bytes.len() < 2 {
return Err(ParseError::UnexpectedEOF);
}
return Ok((
Operand::Memory(Memory {
base: Register::get_base(rm),
index: Register::get_index(rm),
disp_low: bytes[1],
disp_high: None,
}),
1,
));
}
0b10 => {
// parse next 2 bytes
if bytes.len() < 3 {
return Err(ParseError::UnexpectedEOF);
}
return Ok((
Operand::Memory(Memory {
base: Register::get_base(rm),
index: Register::get_index(rm),
disp_low: bytes[1],
disp_high: Some(bytes[2]),
}),
2,
));
}
_ => Err(ParseError::InvalidModRM),
}
}
}

impl std::fmt::Display for Operand {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Operand::Register(r) => write!(f, "{}", r),
Operand::Immediate(i) => write!(f, "{:02x}", i),
Operand::LongImmediate(i) => write!(f, "{:04x}", i),
Operand::Memory(mem) => write!(f, "{}", mem),
}
}
}
75 changes: 14 additions & 61 deletions src/disassembler/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,63 +9,21 @@ pub struct Memory {
}

impl Memory {
pub fn from_modrm(mod_: u8, rm: u8, disp_bytes: &[u8], w: bool) -> Memory {
match mod_ {
0b11 => Memory {
base: Some(Register::from(rm, w)),
index: None,
disp_low: 0,
disp_high: None,
},
0b00 => {
// Special case *: EA = disp-high;disp-low
if rm == 0b110 {
Memory {
base: None,
index: None,
disp_low: disp_bytes[0],
disp_high: Some(disp_bytes[1]),
}
} else {
Memory {
base: Self::get_base_register(rm),
index: Self::get_index_register(rm),
disp_low: 0,
disp_high: None,
}
}
}
0b01 => Memory {
base: Self::get_base_register(rm),
index: Self::get_index_register(rm),
disp_low: disp_bytes[0],
disp_high: None,
},
0b10 => Memory {
base: Self::get_base_register(rm),
index: Self::get_index_register(rm),
disp_low: disp_bytes[0],
disp_high: Some(disp_bytes[1]),
},
_ => unreachable!(),
}
}

fn get_base_register(rm: u8) -> Option<Register> {
match rm {
0b000 | 0b001 | 0b111 => Some(Register::BX),
0b010 | 0b011 | 0b110 => Some(Register::BP),
0b100 => Some(Register::SI),
0b101 => Some(Register::DI),
_ => None,
pub fn new(base: Option<Register>, index: Option<Register>, disp_low: u8) -> Self {
Memory {
base,
index,
disp_low,
disp_high: None,
}
}

fn get_index_register(rm: u8) -> Option<Register> {
match rm {
0b000 | 0b010 => Some(Register::SI),
0b001 | 0b011 => Some(Register::DI),
_ => None,
pub fn new_with_word_disp(base: Option<Register>, index: Option<Register>, disp: u16) -> Self {
Memory {
base,
index,
disp_low: disp as u8,
disp_high: Some((disp >> 8) as u8),
}
}
}
Expand All @@ -84,13 +42,8 @@ impl std::fmt::Display for Memory {
Some(d) => (d as u16) << 8 | (self.disp_low as u16),
None => self.disp_low as u16,
};
// If only base, don't print []
return if !base.is_empty() && index.is_empty() && disp == 0 {
// TODO: should we explicitly convert to reg?
write!(f, "{}", base)
}
// If only disp, convert to EA
else if base.is_empty() && index.is_empty() && disp != 0 {
return if base.is_empty() && index.is_empty() && disp != 0 {
write!(f, "0x{:04x}", disp)
} else {
write!(f, "[{}{}{}]", base, index, {
Expand All @@ -116,7 +69,7 @@ mod tests {
index: None,
disp_high: None,
};
assert_eq!(format!("{}", memory), "bx");
assert_eq!(format!("{}", memory), "[bx]");
}

#[test]
Expand Down
7 changes: 4 additions & 3 deletions src/disassembler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod parser;
pub mod program;
pub mod register;

pub use error::ParseError;
pub use instruction::IR;
pub use parser::parse_instruction;
pub use instruction::{Instruction, IR};
pub use memory::Memory;
pub use program::Program;
pub use register::Register;
Loading

0 comments on commit 55c9af9

Please sign in to comment.