add: dfs test, and code fixes

This commit is contained in:
2026-03-12 23:08:53 -07:00
parent dc9bb41cce
commit 8e36afbf67
5 changed files with 341 additions and 72 deletions

View File

@@ -220,11 +220,14 @@ pub enum VarKeyword {
/// `INZ(*named-constant)` — initialise to named constant.
InzNamed(NamedConstant),
Static,
/// `DIM(n)` — declares the variable as an array with `n` elements.
/// `DIM(n)` — declares the variable as a 1-D array with `n` elements.
Dim(Expression),
/// `DIM(rows: cols)` — declares the variable as a 2-D array.
Dim2(Expression, Expression),
Other(String),
}
// ─────────────────────────────────────────────────────────────────────────────
// Procedures
// ─────────────────────────────────────────────────────────────────────────────

View File

@@ -78,6 +78,7 @@ pub fn compile_to_object(
builder,
globals: HashMap::new(),
array_dims: HashMap::new(),
array_cols: HashMap::new(),
string_cache: HashMap::new(),
global_inits: Vec::new(),
};
@@ -141,6 +142,7 @@ pub fn emit_ir(program: &Program) -> Result<String, CodegenError> {
builder,
globals: HashMap::new(),
array_dims: HashMap::new(),
array_cols: HashMap::new(),
string_cache: HashMap::new(),
global_inits: Vec::new(),
};
@@ -182,9 +184,14 @@ struct Codegen<'ctx> {
builder: Builder<'ctx>,
/// Module-scope global variables name -> (alloca/global ptr, TypeSpec)
globals: HashMap<String, (PointerValue<'ctx>, TypeSpec)>,
/// Array dimension table: variable name -> number of elements.
/// Populated when a `DIM(n)` keyword is encountered.
/// Array dimension table: variable name -> total number of elements.
/// Populated when a `DIM(n)` or `DIM(rows:cols)` keyword is encountered.
array_dims: HashMap<String, u64>,
/// Column count for 2-D arrays: variable name -> number of columns.
/// Only present for variables declared with `DIM(rows: cols)`.
/// Used to convert a (row, col) subscript into a flat 0-based index:
/// flat = (row - 1) * cols + (col - 1)
array_cols: HashMap<String, u64>,
/// Interned string literal globals (content -> global ptr).
string_cache: HashMap<String, PointerValue<'ctx>>,
/// Global declarations that need runtime initialisation (INZ with a value).
@@ -252,7 +259,23 @@ impl<'ctx> Codegen<'ctx> {
}
});
// Generate each procedure.
// ── Pass 1: forward-declare all procedure signatures ──────────────────
// This ensures that any procedure can call any other procedure regardless
// of source order (e.g. mainline calling dfs before dfs is defined).
for proc in &program.procedures {
let fn_name = if proc.exported {
format!("rpg_{}", proc.name)
} else {
proc.name.clone()
};
// Only declare if not already in the module (runtime fns, etc.).
if self.module.get_function(&fn_name).is_none() {
let fn_ty = self.build_proc_fn_type(proc);
self.module.add_function(&fn_name, fn_ty, None);
}
}
// ── Pass 2: emit procedure bodies ────────────────────────────────────
let mut exported_name: Option<String> = None;
for proc in &program.procedures {
if proc.exported && exported_name.is_none() {
@@ -332,8 +355,35 @@ impl<'ctx> Codegen<'ctx> {
fn gen_global_decl(&mut self, decl: &Declaration) -> Result<(), CodegenError> {
match decl {
Declaration::Standalone(sd) => {
let size = sd.ty.byte_size().unwrap_or(8);
let arr_ty = self.context.i8_type().array_type(size as u32);
let elem_size = sd.ty.byte_size().unwrap_or(8);
// Check for DIM(n) or DIM(rows:cols) keywords.
let dim1 = sd.keywords.iter().find_map(|kw| {
if let VarKeyword::Dim(expr) = kw { const_int_from_expr(expr) } else { None }
});
let dim2 = sd.keywords.iter().find_map(|kw| {
if let VarKeyword::Dim2(r, c) = kw {
match (const_int_from_expr(r), const_int_from_expr(c)) {
(Some(rows), Some(cols)) => Some((rows, cols)),
_ => None,
}
} else {
None
}
});
let total_bytes = if let Some((rows, cols)) = dim2 {
self.array_dims.insert(sd.name.clone(), rows * cols);
self.array_cols.insert(sd.name.clone(), cols);
elem_size * rows * cols
} else if let Some(n) = dim1 {
self.array_dims.insert(sd.name.clone(), n);
elem_size * n
} else {
elem_size
};
let arr_ty = self.context.i8_type().array_type(total_bytes as u32);
let global = self.module.add_global(arr_ty, Some(AddressSpace::default()), &sd.name);
global.set_initializer(&arr_ty.const_zero());
let ptr = global.as_pointer_value();
@@ -375,12 +425,44 @@ impl<'ctx> Codegen<'ctx> {
proc.name.clone()
};
let function = self.module.add_function(&fn_name, fn_ty, None);
// Re-use the forward declaration emitted in pass 1 rather than adding
// a duplicate function with the same name.
let function = self.module.get_function(&fn_name)
.unwrap_or_else(|| self.module.add_function(&fn_name, fn_ty, None));
let entry_bb = self.context.append_basic_block(function, "entry");
self.builder.position_at_end(entry_bb);
let mut state = FnState::new(function);
// ── Wire PI parameters into state.locals ──────────────────────────────
// Each incoming LLVM argument gets its own alloca slot so that the body
// can read (and write) the parameter by name just like any other local.
if let Some(pi) = &proc.pi {
for (i, param) in pi.params.iter().enumerate() {
if let Some(arg_val) = function.get_nth_param(i as u32) {
// Allocate a slot of the right size in the entry block.
let ptr = self.alloca_for_type(&param.ty, &param.name);
// Store the incoming argument value.
let i64_t = self.context.i64_type();
match arg_val {
BasicValueEnum::IntValue(iv) => {
// Extend/truncate to i64, then store at the right width.
let extended = self.builder
.build_int_s_extend_or_bit_cast(iv, i64_t, "param_ext")
.unwrap_or(iv);
self.store_value(ptr, extended.into(), &param.ty);
}
other => {
// Pointer / float — store as-is.
self.store_value(ptr, other, &param.ty);
}
}
state.locals.insert(param.name.clone(), (ptr, param.ty.clone()));
}
}
}
// Allocate locals for DCL-S inside the proc.
for decl in &proc.locals {
self.gen_local_decl(decl, &mut state)?;
@@ -431,25 +513,40 @@ impl<'ctx> Codegen<'ctx> {
fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> {
match decl {
Declaration::Standalone(sd) => {
// Check if a DIM(n) keyword is present — if so we allocate a
// contiguous block of `n * elem_size` bytes.
let dim = sd.keywords.iter().find_map(|kw| {
// Check for DIM(n) or DIM(rows:cols) keywords.
let dim1 = sd.keywords.iter().find_map(|kw| {
if let VarKeyword::Dim(expr) = kw {
const_int_from_expr(expr)
} else {
None
}
});
let dim2 = sd.keywords.iter().find_map(|kw| {
if let VarKeyword::Dim2(r, c) = kw {
match (const_int_from_expr(r), const_int_from_expr(c)) {
(Some(rows), Some(cols)) => Some((rows, cols)),
_ => None,
}
} else {
None
}
});
let ptr = if let Some(n) = dim {
// Array: allocate n elements of the element type.
let ptr = if let Some((rows, cols)) = dim2 {
// 2-D array: allocate rows*cols elements of the element type.
self.alloca_for_type_dim(&sd.ty, &sd.name, rows * cols)
} else if let Some(n) = dim1 {
// 1-D array: allocate n elements of the element type.
self.alloca_for_type_dim(&sd.ty, &sd.name, n)
} else {
self.alloca_for_type(&sd.ty, &sd.name)
};
// Record the dimension so %Elem and indexing can use it.
if let Some(n) = dim {
// Record the dimension(s) so %Elem and indexing can use them.
if let Some((rows, cols)) = dim2 {
self.array_dims.insert(sd.name.clone(), rows * cols);
self.array_cols.insert(sd.name.clone(), cols);
} else if let Some(n) = dim1 {
self.array_dims.insert(sd.name.clone(), n);
}
@@ -555,6 +652,81 @@ impl<'ctx> Codegen<'ctx> {
}
}
/// Load a single element of type `ty` from `elem_ptr`, returning it sign-/zero-extended
/// to i64. Handles `Ind` (stored as i8, returned as i64 0/1), `Int`/`Uns` (sign-extended),
/// and falls back to returning the pointer itself for `Char` and other pointer-like types.
fn load_array_elem(
&self,
elem_ptr: PointerValue<'ctx>,
ty: &TypeSpec,
) -> Result<BasicValueEnum<'ctx>, CodegenError> {
let i64_t = self.context.i64_type();
match ty {
TypeSpec::Ind => {
// Stored as i8 (1 = *On, 0 = *Off).
let i8_t = self.context.i8_type();
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(inkwell::AddressSpace::default()),
"ind_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(i8_t, cast_ptr, "ind_elem") {
let iv = v.into_int_value();
let ext = self.builder.build_int_z_extend(iv, i64_t, "ind_ext").unwrap_or(iv);
return Ok(ext.into());
}
Ok(i64_t.const_zero().into())
}
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(inkwell::AddressSpace::default()),
"int_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "int_elem") {
let iv = v.into_int_value();
let ext = self.builder.build_int_s_extend(iv, i64_t, "int_ext").unwrap_or(iv);
return Ok(ext.into());
}
Ok(i64_t.const_zero().into())
}
_ => {
// Char / pointer-like — return the pointer itself.
Ok(elem_ptr.into())
}
}
}
/// Return a pointer to element `(row, col)` (both 1-based, RPG convention)
/// of a 2-D array stored in row-major order.
///
/// flat index = (row - 1) * cols + (col - 1)
fn array_elem_ptr_2d(
&self,
base_ptr: PointerValue<'ctx>,
row: inkwell::values::IntValue<'ctx>,
col: inkwell::values::IntValue<'ctx>,
cols: u64,
elem_size: u64,
) -> PointerValue<'ctx> {
let i64_t = self.context.i64_type();
let one = i64_t.const_int(1, false);
let cols_val = i64_t.const_int(cols, false);
let row0 = self.builder.build_int_sub(row, one, "row0").unwrap();
let col0 = self.builder.build_int_sub(col, one, "col0").unwrap();
let row_off = self.builder.build_int_mul(row0, cols_val, "row_off").unwrap();
let flat = self.builder.build_int_add(row_off, col0, "flat").unwrap();
let elem_bytes = i64_t.const_int(elem_size, false);
let byte_off = self.builder.build_int_mul(flat, elem_bytes, "byte_off2d").unwrap();
unsafe {
self.builder
.build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr_2d")
.unwrap()
}
}
fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> {
let size = ty.byte_size().unwrap_or(0);
if size == 0 { return Ok(()); }
@@ -952,8 +1124,16 @@ impl<'ctx> Codegen<'ctx> {
// If the LValue has an index (array assignment), compute the element pointer.
let dest_ptr = if let LValue::Index(_, indices) = &a.target {
if let Some(idx_expr) = indices.first() {
let elem_size = ty.byte_size().unwrap_or(8);
let elem_size = ty.byte_size().unwrap_or(8);
if indices.len() >= 2 {
// 2-D subscript: name(row: col) — look up the column stride.
let cols = self.array_cols.get(name).copied().unwrap_or(1);
let row_val = self.gen_expression(&indices[0], state)?;
let col_val = self.gen_expression(&indices[1], state)?;
let row_i = self.coerce_to_i64(row_val);
let col_i = self.coerce_to_i64(col_val);
self.array_elem_ptr_2d(ptr, row_i, col_i, cols, elem_size)
} else if let Some(idx_expr) = indices.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
self.array_elem_ptr(ptr, idx_i, elem_size)
@@ -1323,8 +1503,6 @@ impl<'ctx> Codegen<'ctx> {
Expression::Variable(qname) => {
let name = qname.leaf();
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let llvm_ty = self.type_spec_to_llvm(&ty)
.unwrap_or(BasicTypeEnum::IntType(i64_t));
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
// Use byte_size() to get the real storage width — the
@@ -1337,14 +1515,23 @@ impl<'ctx> Codegen<'ctx> {
let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
return Ok(ext.into());
}
Err(CodegenError::new(format!("could not load variable '{}'", name)))
}
TypeSpec::Ind => {
// Stored as i8 (1 = *On, 0 = *Off); zero-extend to i64.
let i8_t = self.context.i8_type();
if let Ok(v) = self.builder.build_load(i8_t, ptr, name) {
let iv = v.into_int_value();
let ext = self.builder.build_int_z_extend(iv, i64_t, "ind_zext").unwrap_or(iv);
return Ok(ext.into());
}
Ok(i64_t.const_zero().into())
}
_ => {
// For CHAR / other types, return the pointer itself.
return Ok(ptr.into());
// For CHAR / pointer-like types, return the pointer itself.
Ok(ptr.into())
}
}
let _ = llvm_ty;
Err(CodegenError::new(format!("could not load variable '{}'", name)))
} else {
// Return 0 for unknown variables.
Ok(i64_t.const_zero().into())
@@ -1407,33 +1594,32 @@ impl<'ctx> Codegen<'ctx> {
}
// No function found — check if `name` is an array variable and
// the call is actually a subscript read: name(idx).
// the call is actually a subscript read: name(idx) or name(row:col).
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let elem_size = ty.byte_size().unwrap_or(8);
if let Some(Arg::Expr(idx_expr)) = args.first() {
// Collect up to two Expr arguments.
let exprs: Vec<&Expression> = args.iter()
.filter_map(|a| if let Arg::Expr(e) = a { Some(e) } else { None })
.collect();
let elem_ptr = if exprs.len() >= 2 {
// 2-D subscript: name(row: col)
let cols = self.array_cols.get(name).copied().unwrap_or(1);
let row_val = self.gen_expression(exprs[0], state)?;
let col_val = self.gen_expression(exprs[1], state)?;
let row_i = self.coerce_to_i64(row_val);
let col_i = self.coerce_to_i64(col_val);
self.array_elem_ptr_2d(ptr, row_i, col_i, cols, elem_size)
} else if let Some(idx_expr) = exprs.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(AddressSpace::default()),
"call_elem_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") {
let iv = v.into_int_value();
let ext = self.builder
.build_int_s_extend(iv, i64_t, "call_elem_ext")
.unwrap_or(iv);
return Ok(ext.into());
}
}
_ => return Ok(elem_ptr.into()),
}
}
self.array_elem_ptr(ptr, idx_i, elem_size)
} else {
ptr
};
return self.load_array_elem(elem_ptr, &ty);
}
Ok(i64_t.const_zero().into())
@@ -1446,35 +1632,29 @@ impl<'ctx> Codegen<'ctx> {
}
Expression::Index(qname, indices) => {
// Array element read: name(i) — RPG uses 1-based indexing.
// Array element read: name(i) or name(row: col) — RPG uses 1-based indexing.
let name = qname.leaf();
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let elem_size = ty.byte_size().unwrap_or(8);
if let Some(idx_expr) = indices.first() {
let elem_ptr = if indices.len() >= 2 {
// 2-D subscript: name(row: col)
let cols = self.array_cols.get(name).copied().unwrap_or(1);
let row_val = self.gen_expression(&indices[0], state)?;
let col_val = self.gen_expression(&indices[1], state)?;
let row_i = self.coerce_to_i64(row_val);
let col_i = self.coerce_to_i64(col_val);
self.array_elem_ptr_2d(ptr, row_i, col_i, cols, elem_size)
} else if let Some(idx_expr) = indices.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
// Load the element with the element's integer type.
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(AddressSpace::default()),
"elem_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") {
let iv = v.into_int_value();
let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
return Ok(ext.into());
}
}
_ => {
return Ok(elem_ptr.into());
}
}
}
self.array_elem_ptr(ptr, idx_i, elem_size)
} else {
ptr
};
// Load the element with the appropriate type.
return self.load_array_elem(elem_ptr, &ty);
}
Ok(i64_t.const_zero().into())
}

View File

@@ -1352,9 +1352,18 @@ impl Parser {
self.advance(); // KwDim
if self.peek() == &Token::LParen {
self.advance(); // (
if let Ok(expr) = self.parse_expression() {
self.eat(&Token::RParen);
return VarKeyword::Dim(expr);
if let Ok(rows) = self.parse_expression() {
if self.eat(&Token::Colon) {
// DIM(rows: cols) — 2-D array
if let Ok(cols) = self.parse_expression() {
self.eat(&Token::RParen);
return VarKeyword::Dim2(rows, cols);
}
} else {
// DIM(n) — 1-D array
self.eat(&Token::RParen);
return VarKeyword::Dim(rows);
}
}
self.eat(&Token::RParen);
}