//! Disassembly support for pulley bytecode. use crate::decode::*; use crate::imms::*; use crate::regs::*; use alloc::string::String; use core::fmt::Write; /// A Pulley bytecode disassembler. /// /// This is implemented as an `OpVisitor`, where you pass a `Disassembler` to a /// `Decoder` in order to disassemble instructions from a bytecode stream. /// /// Alternatively, you can use the `Disassembler::disassemble_all` method to /// disassemble a complete bytecode stream. pub struct Disassembler<'a> { raw_bytecode: &'a [u8], bytecode: SafeBytecodeStream<'a>, disas: String, start_offset: usize, start: usize, temp: String, offsets: bool, hexdump: bool, } impl<'a> Disassembler<'a> { /// Disassemble every instruction in the given bytecode stream. pub fn disassemble_all(bytecode: &'a [u8]) -> Result { let mut disas = Self::new(bytecode); Decoder::decode_all(&mut disas)?; Ok(disas.disas) } /// Create a new `Disassembler` that can be used to incrementally /// disassemble instructions from the given bytecode stream. pub fn new(bytecode: &'a [u8]) -> Self { Self { raw_bytecode: bytecode, bytecode: SafeBytecodeStream::new(bytecode), disas: String::new(), start: 0, start_offset: 0, temp: String::new(), offsets: true, hexdump: true, } } /// Whether to prefix each instruction's disassembly with its offset. /// /// True by default. pub fn offsets(&mut self, offsets: bool) -> &mut Self { self.offsets = offsets; self } /// Whether to include a hexdump of the bytecode in the disassembly. /// /// True by default. pub fn hexdump(&mut self, hexdump: bool) -> &mut Self { self.hexdump = hexdump; self } /// Configures the offset that this function starts from, if it doesn't /// start from 0. /// /// This can possibly be useful when a single function at a time is being /// disassembled. pub fn start_offset(&mut self, offset: usize) -> &mut Self { self.start_offset = offset; self } /// Get the disassembly thus far. pub fn disas(&self) -> &str { &self.disas } fn disas_op(&mut self, mnemonic: &str, operands: &[&dyn Disas]) { write!(&mut self.temp, "{mnemonic}").unwrap(); for (i, val) in operands.iter().enumerate() { if i > 0 { write!(&mut self.temp, ",").unwrap(); } write!(&mut self.temp, " ").unwrap(); val.disas(self.start + self.start_offset, &mut self.temp); } } fn disas_br_table32(&mut self, reg: XReg, amt: u32) { self.disas_op("br_table32", &[®, &amt]); for _ in 0..amt { self.after_visit(); self.start = self.bytecode.position(); if let Ok(offset) = PcRelOffset::decode(self.bytecode()) { offset.disas(self.start + self.start_offset, &mut self.temp); } } } } /// Anything inside an instruction that can be disassembled: registers, /// immediates, etc... trait Disas { fn disas(&self, position: usize, disas: &mut String); } impl Disas for XReg { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for FReg { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for VReg { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for i8 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for i16 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for i32 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for i64 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for i128 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for u8 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for u16 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for u32 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for u64 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for u128 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{self}").unwrap(); } } impl Disas for PcRelOffset { fn disas(&self, position: usize, disas: &mut String) { let offset = isize::try_from(i32::from(*self)).unwrap(); let target = position.wrapping_add(offset as usize); write!(disas, "{offset:#x} // target = {target:#x}").unwrap() } } impl Disas for U6 { fn disas(&self, _position: usize, disas: &mut String) { write!(disas, "{}", u8::from(*self)).unwrap(); } } fn disas_list(position: usize, disas: &mut String, iter: impl IntoIterator) { let mut iter = iter.into_iter(); let Some(first) = iter.next() else { return }; first.disas(position, disas); for item in iter { write!(disas, ", ").unwrap(); item.disas(position, disas); } } impl Disas for BinaryOperands where D: Reg + Disas, S1: Reg + Disas, S2: Reg + Disas, { fn disas(&self, position: usize, disas: &mut String) { self.dst.disas(position, disas); write!(disas, ", ").unwrap(); self.src1.disas(position, disas); write!(disas, ", ").unwrap(); self.src2.disas(position, disas); } } impl Disas for BinaryOperands where D: Reg + Disas, S1: Reg + Disas, { fn disas(&self, position: usize, disas: &mut String) { self.dst.disas(position, disas); write!(disas, ", ").unwrap(); self.src1.disas(position, disas); write!(disas, ", ").unwrap(); self.src2.disas(position, disas); } } impl Disas for RegSet { fn disas(&self, position: usize, disas: &mut String) { disas_list(position, disas, *self) } } macro_rules! impl_disas { ( $( $( #[$attr:meta] )* $snake_name:ident = $name:ident $( { $( $( #[$field_attr:meta] )* $field:ident : $field_ty:ty ),* } )? ; )* ) => { $( impl_disas!(@one $snake_name = $name $( { $($field: $field_ty),* } )?); )* }; // Diassembling `br_table` is a bit special as it has trailing byte after // the opcode of the branch table itself. ( @one br_table32 = BrTable32 $( { $( $field:ident : $field_ty:ty ),* } )? ) => { fn br_table32(&mut self $( $( , $field : $field_ty )* )? ) { self.disas_br_table32($($($field),*)?) } }; // All other opcodes other than `br_table` are handled in the same manner. ( @one $snake_name:ident = $name:ident $( { $( $field:ident : $field_ty:ty ),* } )? ) => { fn $snake_name(&mut self $( $( , $field : $field_ty )* )? ) { self.disas_op(stringify!($snake_name), &[$($(&$field),*)?]) } }; } impl<'a> OpVisitor for Disassembler<'a> { type BytecodeStream = SafeBytecodeStream<'a>; fn bytecode(&mut self) -> &mut Self::BytecodeStream { &mut self.bytecode } type Return = (); fn before_visit(&mut self) { self.start = self.bytecode.position(); } fn after_visit(&mut self) { if self.offsets { write!(&mut self.disas, "{:8x}: ", self.start + self.start_offset).unwrap(); } if self.hexdump { let size = self.bytecode.position() - self.start; let mut need_space = false; for byte in &self.raw_bytecode[self.start..][..size] { let space = if need_space { " " } else { "" }; write!(&mut self.disas, "{space}{byte:02x}").unwrap(); need_space = true; } for _ in 0..12_usize.saturating_sub(size) { write!(&mut self.disas, " ").unwrap(); } } self.disas.push_str(&self.temp); self.temp.clear(); self.disas.push('\n'); } for_each_op!(impl_disas); } impl ExtendedOpVisitor for Disassembler<'_> { for_each_extended_op!(impl_disas); }