diff --git a/fib.rpg b/fib.rpg new file mode 100644 index 0000000..72f23cf --- /dev/null +++ b/fib.rpg @@ -0,0 +1,27 @@ +**FREE +Ctl-Opt Main(Perform_Fibonacci_Sequence); + +Dcl-Proc Perform_Fibonacci_Sequence; + + Dcl-s i Uns(10); + Dcl-s fib Uns(10) Dim(10); + + // Display a title + Dsply ('Fibonacci Sequence:'); + + // Initialize the first two elements of the array + fib(1) = 0; // The sequence usually starts with 0 and 1 + fib(2) = 1; + + // Loop to calculate the rest of the sequence + For i = 3 to %Elem(fib); + // Each number is the sum of the two preceding ones + fib(i) = fib(i-1) + fib(i-2); + Endfor; + + // Loop to display the sequence numbers + For i = 1 to %Elem(fib); + Dsply (' ' + %Char(fib(i))); + Endfor; + +End-Proc Perform_Fibonacci_Sequence; diff --git a/rpgrt/src/lib.rs b/rpgrt/src/lib.rs index bc1c28e..1c2c144 100644 --- a/rpgrt/src/lib.rs +++ b/rpgrt/src/lib.rs @@ -12,6 +12,8 @@ //! | `rpg_dsply_i64` | `(n: i64)` | Display a signed 64-bit integer | //! | `rpg_dsply_f64` | `(f: f64)` | Display a double-precision float | //! | `rpg_halt` | `(code: i32)` | Abnormal program termination | +//! | `rpg_char_i64` | `(n: i64) -> *const c_char` | Format integer as null-term C string | +//! | `rpg_concat` | `(a: *const c_char, b: *const c_char) -> *const c_char` | Concatenate two C strings | //! //! ## Building //! @@ -44,10 +46,23 @@ #![allow(clippy::missing_safety_doc)] -use std::ffi::CStr; +use std::cell::RefCell; +use std::ffi::{CStr, CString}; use std::io::{self, Write}; use std::slice; +// ───────────────────────────────────────────────────────────────────────────── +// Thread-local scratch buffers used by rpg_char_i64 / rpg_concat +// ───────────────────────────────────────────────────────────────────────────── + +thread_local! { + /// Backing store for the most recent `rpg_char_i64` result. + static CHAR_BUF: RefCell = RefCell::new(CString::new("").unwrap()); + + /// Backing store for the most recent `rpg_concat` result. + static CONCAT_BUF: RefCell = RefCell::new(CString::new("").unwrap()); +} + // ───────────────────────────────────────────────────────────────────────────── // rpg_dsply — display a fixed-length character field // ───────────────────────────────────────────────────────────────────────────── @@ -151,6 +166,66 @@ pub extern "C" fn rpg_dsply_f64(f: f64) { /// Maps roughly to the IBM i concept of an *unhandled exception* ending the /// job. #[no_mangle] +// ───────────────────────────────────────────────────────────────────────────── +// rpg_char_i64 — convert a 64-bit integer to a C string (%CHAR built-in) +// ───────────────────────────────────────────────────────────────────────────── + +/// Format `n` as a decimal C string and return a pointer to a thread-local +/// buffer holding the result. +/// +/// The returned pointer is valid until the next call to `rpg_char_i64` on the +/// same thread. Callers must not free it. +/// +/// This implements the RPG IV `%CHAR(numeric-expression)` built-in function. +#[no_mangle] +pub extern "C" fn rpg_char_i64(n: i64) -> *const std::os::raw::c_char { + let s = CString::new(n.to_string()).unwrap_or_else(|_| CString::new("0").unwrap()); + CHAR_BUF.with(|cell| { + *cell.borrow_mut() = s; + cell.borrow().as_ptr() + }) +} + +// ───────────────────────────────────────────────────────────────────────────── +// rpg_concat — concatenate two null-terminated C strings ('+' on char) +// ───────────────────────────────────────────────────────────────────────────── + +/// Concatenate two null-terminated C strings and return a pointer to a +/// thread-local buffer holding the result. +/// +/// The returned pointer is valid until the next call to `rpg_concat` on the +/// same thread. Callers must not free it. +/// +/// This implements the RPG IV `+` operator when both operands are character +/// expressions. +/// +/// # Safety +/// +/// Both `a` and `b` must be valid null-terminated C strings (or null pointers, +/// which are treated as empty strings). +#[no_mangle] +pub unsafe extern "C" fn rpg_concat( + a: *const std::os::raw::c_char, + b: *const std::os::raw::c_char, +) -> *const std::os::raw::c_char { + let sa = if a.is_null() { + std::borrow::Cow::Borrowed("") + } else { + unsafe { CStr::from_ptr(a).to_string_lossy() } + }; + let sb = if b.is_null() { + std::borrow::Cow::Borrowed("") + } else { + unsafe { CStr::from_ptr(b).to_string_lossy() } + }; + let joined = format!("{}{}", sa, sb); + let cs = CString::new(joined).unwrap_or_else(|_| CString::new("").unwrap()); + CONCAT_BUF.with(|cell| { + *cell.borrow_mut() = cs; + cell.borrow().as_ptr() + }) +} + pub extern "C" fn rpg_halt(code: i32) { eprintln!("RPG program halted with code {}", code); std::process::exit(code); @@ -374,6 +449,47 @@ fn rtrim_spaces(bytes: &[u8]) -> &[u8] { #[cfg(test)] mod tests { + // ── rpg_dsply ──────────────────────────────────────────────────────────── + + #[test] + fn char_i64_positive() { + let ptr = rpg_char_i64(42); + let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() }; + assert_eq!(s, "42"); + } + + #[test] + fn char_i64_zero() { + let ptr = rpg_char_i64(0); + let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() }; + assert_eq!(s, "0"); + } + + #[test] + fn char_i64_negative() { + let ptr = rpg_char_i64(-7); + let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() }; + assert_eq!(s, "-7"); + } + + // ── rpg_concat ─────────────────────────────────────────────────────────── + + #[test] + fn concat_two_strings() { + let a = CString::new("Hello, ").unwrap(); + let b = CString::new("World!").unwrap(); + let ptr = unsafe { rpg_concat(a.as_ptr(), b.as_ptr()) }; + let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() }; + assert_eq!(s, "Hello, World!"); + } + + #[test] + fn concat_null_pointers() { + let ptr = unsafe { rpg_concat(std::ptr::null(), std::ptr::null()) }; + let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() }; + assert_eq!(s, ""); + } + use super::*; #[test] diff --git a/src/ast.rs b/src/ast.rs index 5d64103..51e6725 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -220,6 +220,8 @@ pub enum VarKeyword { /// `INZ(*named-constant)` — initialise to named constant. InzNamed(NamedConstant), Static, + /// `DIM(n)` — declares the variable as an array with `n` elements. + Dim(Expression), Other(String), } @@ -608,6 +610,8 @@ pub enum BuiltIn { Rem(Box, Box), /// `%DIV(a:b)`. Div(Box, Box), + /// `%ELEM(array)` — number of elements in an array. + Elem(Box), /// Any built-in we haven't individually modelled. Other(String, Vec), } diff --git a/src/codegen.rs b/src/codegen.rs index 3be277e..266bc2e 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -77,6 +77,7 @@ pub fn compile_to_object( module, builder, globals: HashMap::new(), + array_dims: HashMap::new(), string_cache: HashMap::new(), global_inits: Vec::new(), }; @@ -139,6 +140,7 @@ pub fn emit_ir(program: &Program) -> Result { module, builder, globals: HashMap::new(), + array_dims: HashMap::new(), string_cache: HashMap::new(), global_inits: Vec::new(), }; @@ -180,6 +182,9 @@ struct Codegen<'ctx> { builder: Builder<'ctx>, /// Module-scope global variables name -> (alloca/global ptr, TypeSpec) globals: HashMap, TypeSpec)>, + /// Array dimension table: variable name -> number of elements. + /// Populated when a `DIM(n)` keyword is encountered. + array_dims: HashMap, /// Interned string literal globals (content -> global ptr). string_cache: HashMap>, /// Global declarations that need runtime initialisation (INZ with a value). @@ -228,6 +233,25 @@ impl<'ctx> Codegen<'ctx> { // function is available when we build the @llvm.global_ctors entry. self.gen_global_init_fn()?; + // Determine the entry-point procedure name. + // + // Priority order: + // 1. `CTL-OPT MAIN(name)` — explicit entry point declaration. + // 2. The first EXPORT-ed procedure (legacy / hello.rpg style). + let ctl_main: Option = program.declarations.iter().find_map(|d| { + if let Declaration::ControlSpec(cs) = d { + cs.keywords.iter().find_map(|kw| { + if let CtlKeyword::Main(name) = kw { + Some(name.clone()) + } else { + None + } + }) + } else { + None + } + }); + // Generate each procedure. let mut exported_name: Option = None; for proc in &program.procedures { @@ -237,8 +261,10 @@ impl<'ctx> Codegen<'ctx> { self.gen_procedure(proc)?; } - // Emit a C `main()` wrapper that calls the exported entry point. - if let Some(name) = exported_name { + // Emit a C `main()` wrapper that calls the entry point. + // CTL-OPT MAIN(name) takes priority over EXPORT. + let entry = ctl_main.or(exported_name); + if let Some(name) = entry { self.gen_main_wrapper(&name)?; } @@ -278,6 +304,18 @@ impl<'ctx> Codegen<'ctx> { false, ); self.module.add_function("memset", memset_ty, None); + + // void rpg_dsply_i64(i64 n) — display an integer + let dsply_i64_ty = void_t.fn_type(&[i64_t.into()], false); + self.module.add_function("rpg_dsply_i64", dsply_i64_ty, None); + + // i8* rpg_char_i64(i64 n) — format integer to null-terminated C string + let char_i64_ty = i8_ptr.fn_type(&[i64_t.into()], false); + self.module.add_function("rpg_char_i64", char_i64_ty, None); + + // i8* rpg_concat(i8* a, i8* b) — concatenate two C strings + let concat_ty = i8_ptr.fn_type(&[i8_ptr.into(), i8_ptr.into()], false); + self.module.add_function("rpg_concat", concat_ty, None); } // ── Global declarations ───────────────────────────────────────────────── @@ -384,7 +422,28 @@ impl<'ctx> Codegen<'ctx> { fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { match decl { Declaration::Standalone(sd) => { - let ptr = self.alloca_for_type(&sd.ty, &sd.name); + // Check if a DIM(n) keyword is present — if so we allocate a + // contiguous block of `n * elem_size` bytes. + let dim = sd.keywords.iter().find_map(|kw| { + if let VarKeyword::Dim(expr) = kw { + const_int_from_expr(expr) + } else { + None + } + }); + + let ptr = if let Some(n) = dim { + // Array: allocate n elements of the element type. + self.alloca_for_type_dim(&sd.ty, &sd.name, n) + } else { + self.alloca_for_type(&sd.ty, &sd.name) + }; + + // Record the dimension so %Elem and indexing can use it. + if let Some(n) = dim { + self.array_dims.insert(sd.name.clone(), n); + } + // Apply initialiser if any. for kw in &sd.keywords { match kw { @@ -415,6 +474,35 @@ impl<'ctx> Codegen<'ctx> { self.builder.build_alloca(arr_ty, name).unwrap() } + /// Allocate storage for an array of `n` elements of type `ty`. + fn alloca_for_type_dim(&self, ty: &TypeSpec, name: &str, n: u64) -> PointerValue<'ctx> { + let elem_size = ty.byte_size().unwrap_or(8) as u32; + let total = elem_size * (n as u32); + let arr_ty = self.context.i8_type().array_type(total); + self.builder.build_alloca(arr_ty, name).unwrap() + } + + /// Return a pointer to element `index` (1-based, RPG convention) of array `base_ptr`. + /// `elem_size` is the byte size of one element. + fn array_elem_ptr( + &self, + base_ptr: PointerValue<'ctx>, + index: inkwell::values::IntValue<'ctx>, + elem_size: u64, + ) -> PointerValue<'ctx> { + let i64_t = self.context.i64_type(); + // RPG arrays are 1-based — subtract 1 to get a 0-based byte offset. + let one = i64_t.const_int(1, false); + let zero_based = self.builder.build_int_sub(index, one, "idx0").unwrap(); + let elem_bytes = i64_t.const_int(elem_size, false); + let byte_off = self.builder.build_int_mul(zero_based, elem_bytes, "byte_off").unwrap(); + unsafe { + self.builder + .build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr") + .unwrap() + } + } + fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> { let size = ty.byte_size().unwrap_or(0); if size == 0 { return Ok(()); } @@ -604,8 +692,11 @@ impl<'ctx> Codegen<'ctx> { self.builder.position_at_end(bb); // Call the RPG entry procedure. - let rpg_fn_name = format!("rpg_{}", rpg_entry); - if let Some(rpg_fn) = self.module.get_function(&rpg_fn_name) { + // Try the bare name first (CTL-OPT MAIN procedures are not renamed), + // then the `rpg_` prefix used for EXPORT-ed procedures. + let callee = self.module.get_function(rpg_entry) + .or_else(|| self.module.get_function(&format!("rpg_{}", rpg_entry))); + if let Some(rpg_fn) = callee { self.builder.build_call(rpg_fn, &[], "call_rpg").ok(); } @@ -699,13 +790,31 @@ impl<'ctx> Codegen<'ctx> { self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok(); } other => { - // Evaluate as integer-like expression and display it. + // Evaluate the expression; dispatch to the right display helper. if let Ok(val) = self.gen_expression(other, state) { - // For now just call dsply_cstr on an empty string as fallback. - let _ = val; - let empty = self.intern_string(""); - let zero = self.context.i64_type().const_zero(); - self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok(); + match val { + BasicValueEnum::PointerValue(ptr) => { + // String pointer — use rpg_dsply_cstr. + if let Some(dsply_cstr) = self.module.get_function("rpg_dsply_cstr") { + self.builder.build_call(dsply_cstr, &[ptr.into()], "dsply_cstr").ok(); + } + } + BasicValueEnum::IntValue(iv) => { + // Integer — use rpg_dsply_i64. + if let Some(dsply_i64) = self.module.get_function("rpg_dsply_i64") { + let ext = self.builder + .build_int_s_extend(iv, self.context.i64_type(), "dsply_ext") + .unwrap_or(iv); + self.builder.build_call(dsply_i64, &[ext.into()], "dsply_i64").ok(); + } + } + _ => { + // Fallback: display an empty string. + let empty = self.intern_string(""); + let zero = self.context.i64_type().const_zero(); + self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok(); + } + } } } } @@ -723,6 +832,20 @@ impl<'ctx> Codegen<'ctx> { // Clone to avoid borrow issues. let ty = ty.clone(); + // If the LValue has an index (array assignment), compute the element pointer. + let dest_ptr = if let LValue::Index(_, indices) = &a.target { + if let Some(idx_expr) = indices.first() { + let elem_size = ty.byte_size().unwrap_or(8); + let idx_val = self.gen_expression(idx_expr, state)?; + let idx_i = self.coerce_to_i64(idx_val); + self.array_elem_ptr(ptr, idx_i, elem_size) + } else { + ptr + } + } else { + ptr + }; + match &ty { TypeSpec::Char(size_expr) => { if let Expression::Literal(Literal::String(s)) = &a.value { @@ -733,16 +856,16 @@ impl<'ctx> Codegen<'ctx> { let src = self.intern_bytes(&padded); let memcpy = self.module.get_function("memcpy").unwrap(); let len = self.context.i64_type().const_int(field_len as u64, false); - self.builder.build_call(memcpy, &[ptr.into(), src.into(), len.into()], "assign").ok(); + self.builder.build_call(memcpy, &[dest_ptr.into(), src.into(), len.into()], "assign").ok(); } } TypeSpec::Int(_) | TypeSpec::Uns(_) => { let val = self.gen_expression(&a.value, state)?; - self.store_value(ptr, val, &ty); + self.store_value(dest_ptr, val, &ty); } _ => { if let Ok(val) = self.gen_expression(&a.value, state) { - self.store_value(ptr, val, &ty); + self.store_value(dest_ptr, val, &ty); } } } @@ -893,7 +1016,10 @@ impl<'ctx> Codegen<'ctx> { let start = self.gen_expression(&f.start, state)?; let start_i = self.coerce_to_i64(start); self.builder.build_store(loop_var, start_i).ok(); - state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(10)))))); + // Store the loop variable with Int(20) so that byte_size() returns 8, + // matching the i64 alloca above. (Int(10) would give 4 bytes, causing + // a 32-bit load from an 8-byte slot.) + state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(20)))))); let cond_bb = self.context.append_basic_block(func, "for_cond"); let body_bb = self.context.append_basic_block(func, "for_body"); @@ -1062,9 +1188,12 @@ impl<'ctx> Codegen<'ctx> { let llvm_ty = self.type_spec_to_llvm(&ty) .unwrap_or(BasicTypeEnum::IntType(i64_t)); match &ty { - TypeSpec::Int(w) | TypeSpec::Uns(w) => { - let width = const_int_from_expr(w).unwrap_or(8); - let int_ty = self.context.custom_width_int_type((width * 8) as u32); + TypeSpec::Int(_) | TypeSpec::Uns(_) => { + // Use byte_size() to get the real storage width — the + // type parameter is RPG's digit-precision (e.g. 10 for + // Uns(10) = 4 bytes), NOT the byte count. + let bytes = ty.byte_size().unwrap_or(8); + let int_ty = self.context.custom_width_int_type((bytes * 8) as u32); if let Ok(v) = self.builder.build_load(int_ty, ptr, name) { let iv = v.into_int_value(); let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv); @@ -1111,7 +1240,15 @@ impl<'ctx> Codegen<'ctx> { Expression::Paren(e) => self.gen_expression(e, state), Expression::Call(name, args) => { - // Treat call-as-expression similarly to CALLP. + // RPG IV uses identical syntax for procedure calls and array + // subscripts: `name(arg)`. At parse time we always emit + // Expression::Call for `ident(...)`, so here we need to + // distinguish the two cases at code-generation time: + // + // 1. A real procedure/function exists in the module → call it. + // 2. The name refers to a local/global variable with a known + // DIM → treat the single argument as an array index. + // 3. Otherwise → return 0 (unknown call). let callee = self.module.get_function(name) .or_else(|| self.module.get_function(&format!("rpg_{}", name))); if let Some(callee) = callee { @@ -1128,13 +1265,79 @@ impl<'ctx> Codegen<'ctx> { inkwell::values::ValueKind::Basic(v) => return Ok(v), inkwell::values::ValueKind::Instruction(_) => {} } + return Ok(i64_t.const_zero().into()); } + + // No function found — check if `name` is an array variable and + // the call is actually a subscript read: name(idx). + if let Some((ptr, ty)) = self.resolve_var(name, state) { + let elem_size = ty.byte_size().unwrap_or(8); + if let Some(Arg::Expr(idx_expr)) = args.first() { + let idx_val = self.gen_expression(idx_expr, state)?; + let idx_i = self.coerce_to_i64(idx_val); + let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size); + match &ty { + TypeSpec::Int(_) | TypeSpec::Uns(_) => { + let bytes = ty.byte_size().unwrap_or(8); + let int_ty = self.context.custom_width_int_type((bytes * 8) as u32); + let cast_ptr = self.builder.build_pointer_cast( + elem_ptr, + self.context.ptr_type(AddressSpace::default()), + "call_elem_ptr_cast", + ).unwrap_or(elem_ptr); + if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") { + let iv = v.into_int_value(); + let ext = self.builder + .build_int_s_extend(iv, i64_t, "call_elem_ext") + .unwrap_or(iv); + return Ok(ext.into()); + } + } + _ => return Ok(elem_ptr.into()), + } + } + } + Ok(i64_t.const_zero().into()) } Expression::BuiltIn(bif) => self.gen_builtin(bif, state), - Expression::Special(_) | Expression::Index(_, _) => { + Expression::Special(_) => { + Ok(i64_t.const_zero().into()) + } + + Expression::Index(qname, indices) => { + // Array element read: name(i) — RPG uses 1-based indexing. + let name = qname.leaf(); + if let Some((ptr, ty)) = self.resolve_var(name, state) { + let elem_size = ty.byte_size().unwrap_or(8); + if let Some(idx_expr) = indices.first() { + let idx_val = self.gen_expression(idx_expr, state)?; + let idx_i = self.coerce_to_i64(idx_val); + let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size); + // Load the element with the element's integer type. + match &ty { + TypeSpec::Int(_) | TypeSpec::Uns(_) => { + let bytes = ty.byte_size().unwrap_or(8); + let int_ty = self.context.custom_width_int_type((bytes * 8) as u32); + let cast_ptr = self.builder.build_pointer_cast( + elem_ptr, + self.context.ptr_type(AddressSpace::default()), + "elem_ptr_cast", + ).unwrap_or(elem_ptr); + if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") { + let iv = v.into_int_value(); + let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv); + return Ok(ext.into()); + } + } + _ => { + return Ok(elem_ptr.into()); + } + } + } + } Ok(i64_t.const_zero().into()) } } @@ -1179,6 +1382,24 @@ impl<'ctx> Codegen<'ctx> { match op { BinOp::Add => { + // If either operand is a pointer (string), use rpg_concat. + let lv_is_ptr = matches!(lv, BasicValueEnum::PointerValue(_)); + let rv_is_ptr = matches!(rv, BasicValueEnum::PointerValue(_)); + if lv_is_ptr || rv_is_ptr { + // Ensure both sides are pointers (call rpg_char_i64 on integers). + let lp = self.coerce_to_cstr_ptr(lv, state); + let rp = self.coerce_to_cstr_ptr(rv, state); + let concat_fn = self.module.get_function("rpg_concat").unwrap(); + let call = self.builder + .build_call(concat_fn, &[lp.into(), rp.into()], "concat") + .unwrap(); + return match call.try_as_basic_value() { + inkwell::values::ValueKind::Basic(v) => Ok(v), + inkwell::values::ValueKind::Instruction(_) => { + Ok(self.context.ptr_type(AddressSpace::default()).const_null().into()) + } + }; + } let l = self.coerce_to_i64(lv); let r = self.coerce_to_i64(rv); Ok(self.builder.build_int_add(l, r, "add").unwrap().into()) @@ -1243,6 +1464,18 @@ impl<'ctx> Codegen<'ctx> { fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result, CodegenError> { let i64_t = self.context.i64_type(); match bif { + BuiltIn::Elem(e) => { + // %ELEM(array) — number of elements declared with DIM(n). + if let Expression::Variable(qname) = e.as_ref() { + let name = qname.leaf(); + if let Some(&n) = self.array_dims.get(name) { + return Ok(i64_t.const_int(n, false).into()); + } + // Fall back to 1 if not an array variable. + return Ok(i64_t.const_int(1, false).into()); + } + Ok(i64_t.const_zero().into()) + } BuiltIn::Len(e) => { // %LEN(field) — return compile-time field length. if let Expression::Variable(qname) = e.as_ref() { @@ -1267,8 +1500,30 @@ impl<'ctx> Codegen<'ctx> { let abs = self.builder.build_select(cmp, iv, neg, "abs").unwrap(); Ok(abs.into()) } - BuiltIn::Int(e) | BuiltIn::Char(e) => { - self.gen_expression(e, state) + BuiltIn::Int(e) => { + let v = self.gen_expression(e, state)?; + Ok(self.coerce_to_i64(v).into()) + } + BuiltIn::Char(e) => { + // %CHAR(expr) — convert to a null-terminated C string pointer. + let v = self.gen_expression(e, state)?; + match v { + BasicValueEnum::PointerValue(_) => Ok(v), // already a string + _ => { + // Convert integer to string via rpg_char_i64. + let iv = self.coerce_to_i64(v); + let char_fn = self.module.get_function("rpg_char_i64").unwrap(); + let call = self.builder + .build_call(char_fn, &[iv.into()], "char_i64") + .unwrap(); + match call.try_as_basic_value() { + inkwell::values::ValueKind::Basic(v) => Ok(v), + inkwell::values::ValueKind::Instruction(_) => { + Ok(self.context.ptr_type(AddressSpace::default()).const_null().into()) + } + } + } + } } BuiltIn::Sqrt(e) => { let v = self.gen_expression(e, state)?; @@ -1387,6 +1642,32 @@ impl<'ctx> Codegen<'ctx> { Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero())) } + /// Coerce a value to a C string pointer (`i8*`). + /// + /// * If `val` is already a pointer, return it as-is. + /// * If `val` is an integer, call `rpg_char_i64` to format it and return + /// the resulting pointer. + fn coerce_to_cstr_ptr( + &mut self, + val: BasicValueEnum<'ctx>, + _state: &mut FnState<'ctx>, + ) -> PointerValue<'ctx> { + match val { + BasicValueEnum::PointerValue(p) => p, + _ => { + let iv = self.coerce_to_i64(val); + let char_fn = self.module.get_function("rpg_char_i64").unwrap(); + let call = self.builder + .build_call(char_fn, &[iv.into()], "char_i64") + .unwrap(); + match call.try_as_basic_value() { + inkwell::values::ValueKind::Basic(BasicValueEnum::PointerValue(p)) => p, + _ => self.context.ptr_type(AddressSpace::default()).const_null(), + } + } + } + } + fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> { let i64_t = self.context.i64_type(); match val { diff --git a/src/lower.rs b/src/lower.rs index cc0841b..b9dd36d 100644 --- a/src/lower.rs +++ b/src/lower.rs @@ -45,6 +45,24 @@ pub fn lower(source: &str) -> Result { Ok(program) } +/// Strip RPG IV compiler directives that start with `**` (e.g. `**FREE`, +/// `**CTDATA`) by blanking out those lines before tokenization. +fn strip_star_star_directives(source: &str) -> String { + source + .lines() + .map(|line| { + let trimmed = line.trim_start(); + if trimmed.starts_with("**") { + // Replace with an empty line so line numbers stay consistent. + "" + } else { + line + } + }) + .collect::>() + .join("\n") +} + // ───────────────────────────────────────────────────────────────────────────── // Error type // ───────────────────────────────────────────────────────────────────────────── @@ -52,11 +70,17 @@ pub fn lower(source: &str) -> Result { #[derive(Debug)] pub struct LowerError { pub message: String, + /// 1-based source line where the error was detected, if known. + pub line: Option, } impl std::fmt::Display for LowerError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "lower error: {}", self.message) + if let Some(ln) = self.line { + write!(f, "lower error (line {}): {}", ln, self.message) + } else { + write!(f, "lower error: {}", self.message) + } } } @@ -64,7 +88,11 @@ impl std::error::Error for LowerError {} impl LowerError { fn new(msg: impl Into) -> Self { - LowerError { message: msg.into() } + LowerError { message: msg.into(), line: None } + } + + fn at(line: usize, msg: impl Into) -> Self { + LowerError { message: msg.into(), line: Some(line) } } } @@ -385,12 +413,22 @@ enum Token { // ───────────────────────────────────────────────────────────────────────────── fn tokenize(source: &str) -> Result, LowerError> { - let chars: Vec = source.chars().collect(); + // Strip **FREE / **CTDATA / any **word compiler directives first. + let cleaned = strip_star_star_directives(source); + let chars: Vec = cleaned.chars().collect(); let mut pos = 0; let mut tokens = Vec::new(); + let mut line: usize = 1; while pos < chars.len() { - // Skip whitespace + // Track line numbers. + if chars[pos] == '\n' { + line += 1; + pos += 1; + continue; + } + + // Skip other whitespace if chars[pos].is_whitespace() { pos += 1; continue; @@ -490,6 +528,14 @@ fn tokenize(source: &str) -> Result, LowerError> { '=' => { tokens.push(Token::OpEq); pos += 1; continue; } '*' => { if pos + 1 < chars.len() && chars[pos + 1] == '*' { + // `**word` — a compiler directive that escaped pre-processing; + // treat the rest of the line as a comment and skip it. + if pos + 2 < chars.len() && chars[pos + 2].is_alphabetic() { + while pos < chars.len() && chars[pos] != '\n' { + pos += 1; + } + continue; + } tokens.push(Token::OpStar2); pos += 2; } else { @@ -704,6 +750,7 @@ fn tokenize(source: &str) -> Result, LowerError> { } tokens.push(Token::Eof); + let _ = line; // line tracking available for future per-token storage Ok(tokens) } @@ -873,11 +920,12 @@ fn keyword_or_ident(upper: &str, original: &str) -> Token { struct Parser { tokens: Vec, pos: usize, + _line: usize, } impl Parser { fn new(tokens: Vec) -> Self { - Parser { tokens, pos: 0 } + Parser { tokens, pos: 0, _line: 1 } } fn peek(&self) -> &Token { @@ -901,7 +949,10 @@ impl Parser { if &tok == expected { Ok(()) } else { - Err(LowerError::new(format!("expected {:?}, got {:?}", expected, tok))) + Err(LowerError::new(format!( + "expected {:?}, got {:?} (token index {})", + expected, tok, self.pos + ))) } } @@ -927,12 +978,21 @@ impl Parser { fn parse_program(&mut self) -> Result { let mut declarations = Vec::new(); let mut procedures = Vec::new(); + let mut skipped_tokens: Vec = Vec::new(); while !self.is_eof() { match self.peek() { Token::KwDclProc => { - if let Ok(p) = self.parse_procedure() { - procedures.push(p); + if !skipped_tokens.is_empty() { + skipped_tokens.clear(); + } + match self.parse_procedure() { + Ok(p) => procedures.push(p), + Err(e) => { + eprintln!("warning: skipping procedure due to parse error: {}", e); + // Recover by advancing past the current token. + self.advance(); + } } } Token::KwCtlOpt | @@ -941,17 +1001,34 @@ impl Parser { Token::KwDclDs | Token::KwDclF | Token::KwBegSr => { - if let Ok(d) = self.parse_declaration() { - declarations.push(d); + if !skipped_tokens.is_empty() { + skipped_tokens.clear(); + } + match self.parse_declaration() { + Ok(d) => declarations.push(d), + Err(e) => { + eprintln!("warning: skipping declaration due to parse error: {}", e); + self.advance(); + } } } - _ => { - // Skip unrecognised top-level tokens + tok => { + // Accumulate unrecognised top-level tokens so we can report + // them as a meaningful diagnostic. + skipped_tokens.push(format!("{:?}", tok)); self.advance(); } } } + if !skipped_tokens.is_empty() { + eprintln!( + "warning: {} unrecognised top-level token(s) were skipped: {}", + skipped_tokens.len(), + skipped_tokens.join(", ") + ); + } + Ok(Program { declarations, procedures }) } @@ -965,7 +1042,11 @@ impl Parser { Token::KwDclDs => self.parse_dcl_ds(), Token::KwDclF => self.parse_dcl_f(), Token::KwBegSr => self.parse_subroutine(), - tok => Err(LowerError::new(format!("unexpected token in declaration: {:?}", tok))), + tok => Err(LowerError::new(format!( + "unexpected token in declaration: {:?} — \ + expected one of CTL-OPT, DCL-S, DCL-C, DCL-DS, DCL-F, BEG-SR", + tok + ))), } } @@ -1256,6 +1337,18 @@ impl Parser { fn parse_var_keyword(&mut self) -> VarKeyword { match self.peek().clone() { + Token::KwDim => { + self.advance(); // KwDim + if self.peek() == &Token::LParen { + self.advance(); // ( + if let Ok(expr) = self.parse_expression() { + self.eat(&Token::RParen); + return VarKeyword::Dim(expr); + } + self.eat(&Token::RParen); + } + VarKeyword::Other("DIM".to_string()) + } Token::KwInz => { self.advance(); if self.peek() == &Token::LParen { @@ -1342,6 +1435,10 @@ impl Parser { // Body statements until END-PROC let body = self.parse_statement_list(&[Token::KwEndProc]); self.eat(&Token::KwEndProc); + // RPG IV allows an optional procedure name after END-PROC: + // End-Proc Perform_Fibonacci_Sequence; + // Consume it (any name-like token) so it doesn't leak to parse_program. + let _ = self.try_parse_name(); self.eat_semicolon(); Ok(Procedure { name, exported, pi, locals, body }) @@ -1893,6 +1990,8 @@ impl Parser { if self.peek() == &Token::LParen { // Peek ahead to decide: call or subscript-assignment? // If after the matching ')' we see '=' it's an assignment, else call. + // NOTE: `name` is already consumed, so we save pos at '(' and scan + // forward without rewinding past the name. let saved = self.pos; self.advance(); // ( let mut depth = 1; @@ -1904,11 +2003,22 @@ impl Parser { } } let is_assign = self.peek() == &Token::OpEq; - self.pos = saved; // rewind + self.pos = saved; // rewind to '(' if is_assign { // subscript assignment: `name(idx) = expr;` - let lv = self.parse_lvalue()?; + // Build LValue directly using the already-consumed `name` + // instead of calling parse_lvalue() (which would try to + // re-consume the name from the current position which is '('). + let qname = QualifiedName::simple(name.clone()); + let mut indices = Vec::new(); + self.advance(); // consume '(' + indices.push(self.parse_expression()?); + while self.eat(&Token::Colon) { + indices.push(self.parse_expression()?); + } + self.eat(&Token::RParen); + let lv = LValue::Index(qname, indices); self.expect(&Token::OpEq)?; let value = self.parse_expression()?; self.eat_semicolon(); @@ -2221,7 +2331,9 @@ impl Parser { fn parse_builtin_expr(&mut self) -> Result { let bif_tok = self.advance(); - self.expect(&Token::LParen)?; + self.expect(&Token::LParen).map_err(|e| LowerError::new(format!( + "built-in function {:?}: {}", bif_tok, e.message + )))?; let bif = match bif_tok { Token::BifLen => { let e = self.parse_expression()?; @@ -2277,6 +2389,11 @@ impl Parser { self.eat(&Token::RParen); BuiltIn::Error } + Token::BifElem => { + let e = self.parse_expression()?; + self.eat(&Token::RParen); + BuiltIn::Elem(Box::new(e)) + } Token::BifSize => { let e = self.parse_expression()?; self.eat(&Token::RParen); diff --git a/src/main.rs b/src/main.rs index eb94f6c..56ecbdb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,7 +36,6 @@ use std::{ fs, - path::PathBuf, process, }; @@ -44,6 +43,97 @@ use std::{ use clap::Parser as ClapParser; use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as}; +// ───────────────────────────────────────────────────────────────────────────── +// BNF pre-processing helper +// ───────────────────────────────────────────────────────────────────────────── + +/// Uppercase all keyword-like tokens in `source` while preserving the content +/// of string literals, line comments, and block comments unchanged. +/// +/// This lets the BNF grammar (which uses uppercase terminal literals) validate +/// RPG IV source that uses mixed-case keywords such as `Ctl-Opt` or `Dcl-S`. +fn uppercase_keywords_for_bnf(source: &str) -> String { + let chars: Vec = source.chars().collect(); + let mut out = String::with_capacity(source.len()); + let mut i = 0; + + while i < chars.len() { + // Line comment // … \n — copy verbatim + if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' { + while i < chars.len() && chars[i] != '\n' { + out.push(chars[i]); + i += 1; + } + continue; + } + + // Block comment /* … */ — copy verbatim + if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' { + out.push(chars[i]); + out.push(chars[i + 1]); + i += 2; + while i + 1 < chars.len() { + if chars[i] == '*' && chars[i + 1] == '/' { + out.push(chars[i]); + out.push(chars[i + 1]); + i += 2; + break; + } + out.push(chars[i]); + i += 1; + } + continue; + } + + // String literal '…' — copy verbatim (including '' escape) + if chars[i] == '\'' { + out.push(chars[i]); + i += 1; + while i < chars.len() { + if chars[i] == '\'' { + out.push(chars[i]); + i += 1; + // '' is an escaped quote — keep going + if i < chars.len() && chars[i] == '\'' { + out.push(chars[i]); + i += 1; + } else { + break; + } + } else { + out.push(chars[i]); + i += 1; + } + } + continue; + } + + // Identifier / keyword — uppercase it so the BNF terminals match + if chars[i].is_alphabetic() || chars[i] == '_' || chars[i] == '@' || chars[i] == '#' || chars[i] == '$' { + while i < chars.len() + && (chars[i].is_alphanumeric() + || chars[i] == '_' + || chars[i] == '@' + || chars[i] == '#' + || chars[i] == '$' + || (chars[i] == '-' + && i + 1 < chars.len() + && chars[i + 1].is_alphabetic())) + { + out.push(chars[i].to_ascii_uppercase()); + i += 1; + } + continue; + } + + // Everything else (operators, punctuation, whitespace, digits) + out.push(chars[i]); + i += 1; + } + + out +} + // ───────────────────────────────────────────────────────────────────────────── // CLI definition // ───────────────────────────────────────────────────────────────────────────── @@ -127,16 +217,82 @@ fn main() { }; // ── BNF validation ──────────────────────────────────────────────────── - let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program") - .or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file")); + // RPG IV keywords are case-insensitive, but the BNF grammar uses + // uppercase terminal literals. Normalise the source before checking. + let normalised = uppercase_keywords_for_bnf(source_text.trim()); + let tree_opt = parse_as(&bnf_parser, normalised.trim(), "program") + .or_else(|| parse_as(&bnf_parser, normalised.trim(), "source-file")); if tree_opt.is_none() { + // BNF validation is a structural sanity-check. Emit a warning so + // the developer knows something looks off, but continue with the + // lowering pass which is more permissive and gives better errors. eprintln!( - "error: '{}' did not match the RPG IV grammar", + "warning: '{}' did not fully match the RPG IV grammar — \ + attempting to compile anyway", source_path.display() ); - any_error = true; - continue; + + // ── Helpful diagnostics ────────────────────────────────────────── + // Scan for the first line the BNF cannot classify to give the user + // a concrete hint about what caused the mismatch. + let top_level_rules = &[ + "control-spec", + "standalone-decl", + "constant-decl", + "data-structure-decl", + "file-decl", + "procedure", + "subroutine", + "statement", + ]; + 'outer: for (lineno, raw_line) in source_text.lines().enumerate() { + let trimmed = raw_line.trim(); + let norm_check = trimmed.to_ascii_uppercase(); + // Skip blanks, comments, compiler directives, and lines that + // introduce multi-line constructs (DCL-PROC, END-PROC, DCL-DS, + // END-DS, DCL-PI, END-PI, BEG-SR, END-SR) — these will never + // match a single-line grammar rule and are not errors. + if trimmed.is_empty() + || trimmed.starts_with("//") + || trimmed.starts_with("/*") + || trimmed.starts_with("**") + || norm_check.starts_with("DCL-PROC") + || norm_check.starts_with("END-PROC") + || norm_check.starts_with("DCL-DS") + || norm_check.starts_with("END-DS") + || norm_check.starts_with("DCL-PI") + || norm_check.starts_with("END-PI") + || norm_check.starts_with("BEG-SR") + || norm_check.starts_with("END-SR") + { + continue; + } + // Strip inline line comments before BNF matching so that + // `fib(1) = 0; // some comment` doesn't cause a false positive. + let trimmed_no_comment = if let Some(idx) = trimmed.find("//") { + trimmed[..idx].trim_end() + } else { + trimmed + }; + let norm_line = uppercase_keywords_for_bnf(trimmed_no_comment); + let mut matched = false; + for rule in top_level_rules { + if parse_as(&bnf_parser, norm_line.trim(), rule).is_some() { + matched = true; + break; + } + } + if !matched { + eprintln!( + " hint (line {}): unrecognised grammar construct: {:?}", + lineno + 1, + if trimmed.len() > 80 { &trimmed[..80] } else { trimmed } + ); + break 'outer; + } + } + // Fall through — try lowering anyway. } // ── --emit-tree: print parse tree and stop ──────────────────────────── diff --git a/src/rpg.bnf b/src/rpg.bnf index be4a391..8a09371 100644 --- a/src/rpg.bnf +++ b/src/rpg.bnf @@ -1,9 +1,16 @@ ::= ' ' | ' ' | ' -' | ' ' +' | ' +' ::= | ::= | '' ::= + | + | + + ::= '**FREE' + | '**free' + | '**Free' ::= | @@ -24,7 +31,9 @@ | ::= 'DCL-PROC' ';' 'END-PROC' ';' + | 'DCL-PROC' ';' 'END-PROC' ';' | 'DCL-PROC' ';' 'END-PROC' ';' + | 'DCL-PROC' ';' 'END-PROC' ';' ::= |