add: fib sample

2026-03-12 22:19:42 -07:00
parent 073c86d784
commit 31a6c8b91b
7 changed files with 756 additions and 46 deletions
@@ -0,0 +1,27 @@
 **FREE
 Ctl-Opt Main(Perform_Fibonacci_Sequence);
 Dcl-Proc Perform_Fibonacci_Sequence;
    Dcl-s i   Uns(10);
    Dcl-s fib Uns(10) Dim(10);
    // Display a title
    Dsply ('Fibonacci Sequence:');
    // Initialize the first two elements of the array
    fib(1) = 0; // The sequence usually starts with 0 and 1
    fib(2) = 1;
    // Loop to calculate the rest of the sequence
    For i = 3 to %Elem(fib);
        // Each number is the sum of the two preceding ones
        fib(i) = fib(i-1) + fib(i-2);
    Endfor;
    // Loop to display the sequence numbers
    For i = 1 to %Elem(fib);
        Dsply (' ' + %Char(fib(i)));
    Endfor;
 End-Proc Perform_Fibonacci_Sequence;
@@ -12,6 +12,8 @@
 //! | `rpg_dsply_i64`     | `(n: i64)`                             | Display a signed 64-bit integer      |
 //! | `rpg_dsply_f64`     | `(f: f64)`                             | Display a double-precision float     |
 //! | `rpg_halt`          | `(code: i32)`                          | Abnormal program termination         |
 //! | `rpg_char_i64`      | `(n: i64) -> *const c_char`            | Format integer as null-term C string |
 //! | `rpg_concat`        | `(a: *const c_char, b: *const c_char) -> *const c_char` | Concatenate two C strings |
 //!
 //! ## Building
 //!
@@ -44,10 +46,23 @@
 #![allow(clippy::missing_safety_doc)]
-use std::ffi::CStr;
+use std::cell::RefCell;
 use std::ffi::{CStr, CString};
 use std::io::{self, Write};
 use std::slice;
 // ─────────────────────────────────────────────────────────────────────────────
 // Thread-local scratch buffers used by rpg_char_i64 / rpg_concat
 // ─────────────────────────────────────────────────────────────────────────────
 thread_local! {
    /// Backing store for the most recent `rpg_char_i64` result.
    static CHAR_BUF: RefCell<CString> = RefCell::new(CString::new("").unwrap());
    /// Backing store for the most recent `rpg_concat` result.
    static CONCAT_BUF: RefCell<CString> = RefCell::new(CString::new("").unwrap());
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // rpg_dsply — display a fixed-length character field
 // ─────────────────────────────────────────────────────────────────────────────
@@ -151,6 +166,66 @@ pub extern "C" fn rpg_dsply_f64(f: f64) {
 /// Maps roughly to the IBM i concept of an *unhandled exception* ending the
 /// job.
 #[no_mangle]
 // ─────────────────────────────────────────────────────────────────────────────
 // rpg_char_i64 — convert a 64-bit integer to a C string  (%CHAR built-in)
 // ─────────────────────────────────────────────────────────────────────────────
 /// Format `n` as a decimal C string and return a pointer to a thread-local
 /// buffer holding the result.
 ///
 /// The returned pointer is valid until the next call to `rpg_char_i64` on the
 /// same thread.  Callers must not free it.
 ///
 /// This implements the RPG IV `%CHAR(numeric-expression)` built-in function.
 #[no_mangle]
 pub extern "C" fn rpg_char_i64(n: i64) -> *const std::os::raw::c_char {
    let s = CString::new(n.to_string()).unwrap_or_else(|_| CString::new("0").unwrap());
    CHAR_BUF.with(|cell| {
        *cell.borrow_mut() = s;
        cell.borrow().as_ptr()
    })
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // rpg_concat — concatenate two null-terminated C strings  ('+' on char)
 // ─────────────────────────────────────────────────────────────────────────────
 /// Concatenate two null-terminated C strings and return a pointer to a
 /// thread-local buffer holding the result.
 ///
 /// The returned pointer is valid until the next call to `rpg_concat` on the
 /// same thread.  Callers must not free it.
 ///
 /// This implements the RPG IV `+` operator when both operands are character
 /// expressions.
 ///
 /// # Safety
 ///
 /// Both `a` and `b` must be valid null-terminated C strings (or null pointers,
 /// which are treated as empty strings).
 #[no_mangle]
 pub unsafe extern "C" fn rpg_concat(
    a: *const std::os::raw::c_char,
    b: *const std::os::raw::c_char,
 ) -> *const std::os::raw::c_char {
    let sa = if a.is_null() {
        std::borrow::Cow::Borrowed("")
    } else {
        unsafe { CStr::from_ptr(a).to_string_lossy() }
    };
    let sb = if b.is_null() {
        std::borrow::Cow::Borrowed("")
    } else {
        unsafe { CStr::from_ptr(b).to_string_lossy() }
    };
    let joined = format!("{}{}", sa, sb);
    let cs = CString::new(joined).unwrap_or_else(|_| CString::new("").unwrap());
    CONCAT_BUF.with(|cell| {
        *cell.borrow_mut() = cs;
        cell.borrow().as_ptr()
    })
 }
 pub extern "C" fn rpg_halt(code: i32) {
    eprintln!("RPG program halted with code {}", code);
    std::process::exit(code);
@@ -374,6 +449,47 @@ fn rtrim_spaces(bytes: &[u8]) -> &[u8] {
 #[cfg(test)]
 mod tests {
    // ── rpg_dsply ────────────────────────────────────────────────────────────
    #[test]
    fn char_i64_positive() {
        let ptr = rpg_char_i64(42);
        let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
        assert_eq!(s, "42");
    }
    #[test]
    fn char_i64_zero() {
        let ptr = rpg_char_i64(0);
        let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
        assert_eq!(s, "0");
    }
    #[test]
    fn char_i64_negative() {
        let ptr = rpg_char_i64(-7);
        let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
        assert_eq!(s, "-7");
    }
    // ── rpg_concat ───────────────────────────────────────────────────────────
    #[test]
    fn concat_two_strings() {
        let a = CString::new("Hello, ").unwrap();
        let b = CString::new("World!").unwrap();
        let ptr = unsafe { rpg_concat(a.as_ptr(), b.as_ptr()) };
        let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
        assert_eq!(s, "Hello, World!");
    }
    #[test]
    fn concat_null_pointers() {
        let ptr = unsafe { rpg_concat(std::ptr::null(), std::ptr::null()) };
        let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
        assert_eq!(s, "");
    }
    use super::*;
    #[test]
@@ -220,6 +220,8 @@ pub enum VarKeyword {
    /// `INZ(*named-constant)` — initialise to named constant.
    InzNamed(NamedConstant),
    Static,
    /// `DIM(n)` — declares the variable as an array with `n` elements.
    Dim(Expression),
    Other(String),
 }
@@ -608,6 +610,8 @@ pub enum BuiltIn {
    Rem(Box<Expression>, Box<Expression>),
    /// `%DIV(a:b)`.
    Div(Box<Expression>, Box<Expression>),
    /// `%ELEM(array)` — number of elements in an array.
    Elem(Box<Expression>),
    /// Any built-in we haven't individually modelled.
    Other(String, Vec<Expression>),
 }
@@ -77,6 +77,7 @@ pub fn compile_to_object(
        module,
        builder,
        globals: HashMap::new(),
        array_dims: HashMap::new(),
        string_cache: HashMap::new(),
        global_inits: Vec::new(),
    };
@@ -139,6 +140,7 @@ pub fn emit_ir(program: &Program) -> Result<String, CodegenError> {
        module,
        builder,
        globals: HashMap::new(),
        array_dims: HashMap::new(),
        string_cache: HashMap::new(),
        global_inits: Vec::new(),
    };
@@ -180,6 +182,9 @@ struct Codegen<'ctx> {
    builder: Builder<'ctx>,
    /// Module-scope global variables  name -> (alloca/global ptr, TypeSpec)
    globals: HashMap<String, (PointerValue<'ctx>, TypeSpec)>,
    /// Array dimension table: variable name -> number of elements.
    /// Populated when a `DIM(n)` keyword is encountered.
    array_dims: HashMap<String, u64>,
    /// Interned string literal globals (content -> global ptr).
    string_cache: HashMap<String, PointerValue<'ctx>>,
    /// Global declarations that need runtime initialisation (INZ with a value).
@@ -228,6 +233,25 @@ impl<'ctx> Codegen<'ctx> {
        // function is available when we build the @llvm.global_ctors entry.
        self.gen_global_init_fn()?;
        // Determine the entry-point procedure name.
        //
        // Priority order:
        //   1. `CTL-OPT MAIN(name)` — explicit entry point declaration.
        //   2. The first EXPORT-ed procedure (legacy / hello.rpg style).
        let ctl_main: Option<String> = program.declarations.iter().find_map(|d| {
            if let Declaration::ControlSpec(cs) = d {
                cs.keywords.iter().find_map(|kw| {
                    if let CtlKeyword::Main(name) = kw {
                        Some(name.clone())
                    } else {
                        None
                    }
                })
            } else {
                None
            }
        });
        // Generate each procedure.
        let mut exported_name: Option<String> = None;
        for proc in &program.procedures {
@@ -237,8 +261,10 @@ impl<'ctx> Codegen<'ctx> {
            self.gen_procedure(proc)?;
        }
-        // Emit a C `main()` wrapper that calls the exported entry point.
+        // Emit a C `main()` wrapper that calls the entry point.
-        if let Some(name) = exported_name {
+        // CTL-OPT MAIN(name) takes priority over EXPORT.
        let entry = ctl_main.or(exported_name);
        if let Some(name) = entry {
            self.gen_main_wrapper(&name)?;
        }
@@ -278,6 +304,18 @@ impl<'ctx> Codegen<'ctx> {
            false,
        );
        self.module.add_function("memset", memset_ty, None);
        // void rpg_dsply_i64(i64 n)  — display an integer
        let dsply_i64_ty = void_t.fn_type(&[i64_t.into()], false);
        self.module.add_function("rpg_dsply_i64", dsply_i64_ty, None);
        // i8* rpg_char_i64(i64 n)  — format integer to null-terminated C string
        let char_i64_ty = i8_ptr.fn_type(&[i64_t.into()], false);
        self.module.add_function("rpg_char_i64", char_i64_ty, None);
        // i8* rpg_concat(i8* a, i8* b)  — concatenate two C strings
        let concat_ty = i8_ptr.fn_type(&[i8_ptr.into(), i8_ptr.into()], false);
        self.module.add_function("rpg_concat", concat_ty, None);
    }
    // ── Global declarations ─────────────────────────────────────────────────
@@ -384,7 +422,28 @@ impl<'ctx> Codegen<'ctx> {
    fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> {
        match decl {
            Declaration::Standalone(sd) => {
-                let ptr = self.alloca_for_type(&sd.ty, &sd.name);
+                // Check if a DIM(n) keyword is present — if so we allocate a
                // contiguous block of `n * elem_size` bytes.
                let dim = sd.keywords.iter().find_map(|kw| {
                    if let VarKeyword::Dim(expr) = kw {
                        const_int_from_expr(expr)
                    } else {
                        None
                    }
                });
                let ptr = if let Some(n) = dim {
                    // Array: allocate n elements of the element type.
                    self.alloca_for_type_dim(&sd.ty, &sd.name, n)
                } else {
                    self.alloca_for_type(&sd.ty, &sd.name)
                };
                // Record the dimension so %Elem and indexing can use it.
                if let Some(n) = dim {
                    self.array_dims.insert(sd.name.clone(), n);
                }
                // Apply initialiser if any.
                for kw in &sd.keywords {
                    match kw {
@@ -415,6 +474,35 @@ impl<'ctx> Codegen<'ctx> {
        self.builder.build_alloca(arr_ty, name).unwrap()
    }
    /// Allocate storage for an array of `n` elements of type `ty`.
    fn alloca_for_type_dim(&self, ty: &TypeSpec, name: &str, n: u64) -> PointerValue<'ctx> {
        let elem_size = ty.byte_size().unwrap_or(8) as u32;
        let total = elem_size * (n as u32);
        let arr_ty = self.context.i8_type().array_type(total);
        self.builder.build_alloca(arr_ty, name).unwrap()
    }
    /// Return a pointer to element `index` (1-based, RPG convention) of array `base_ptr`.
    /// `elem_size` is the byte size of one element.
    fn array_elem_ptr(
        &self,
        base_ptr: PointerValue<'ctx>,
        index: inkwell::values::IntValue<'ctx>,
        elem_size: u64,
    ) -> PointerValue<'ctx> {
        let i64_t = self.context.i64_type();
        // RPG arrays are 1-based — subtract 1 to get a 0-based byte offset.
        let one = i64_t.const_int(1, false);
        let zero_based = self.builder.build_int_sub(index, one, "idx0").unwrap();
        let elem_bytes = i64_t.const_int(elem_size, false);
        let byte_off = self.builder.build_int_mul(zero_based, elem_bytes, "byte_off").unwrap();
        unsafe {
            self.builder
                .build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr")
                .unwrap()
        }
    }
    fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> {
        let size = ty.byte_size().unwrap_or(0);
        if size == 0 { return Ok(()); }
@@ -604,8 +692,11 @@ impl<'ctx> Codegen<'ctx> {
        self.builder.position_at_end(bb);
        // Call the RPG entry procedure.
-        let rpg_fn_name = format!("rpg_{}", rpg_entry);
+        // Try the bare name first (CTL-OPT MAIN procedures are not renamed),
-        if let Some(rpg_fn) = self.module.get_function(&rpg_fn_name) {
+        // then the `rpg_` prefix used for EXPORT-ed procedures.
        let callee = self.module.get_function(rpg_entry)
            .or_else(|| self.module.get_function(&format!("rpg_{}", rpg_entry)));
        if let Some(rpg_fn) = callee {
            self.builder.build_call(rpg_fn, &[], "call_rpg").ok();
        }
@@ -699,16 +790,34 @@ impl<'ctx> Codegen<'ctx> {
                self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok();
            }
            other => {
-                // Evaluate as integer-like expression and display it.
+                // Evaluate the expression; dispatch to the right display helper.
                if let Ok(val) = self.gen_expression(other, state) {
-                    // For now just call dsply_cstr on an empty string as fallback.
+                    match val {
-                    let _ = val;
+                        BasicValueEnum::PointerValue(ptr) => {
                            // String pointer — use rpg_dsply_cstr.
                            if let Some(dsply_cstr) = self.module.get_function("rpg_dsply_cstr") {
                                self.builder.build_call(dsply_cstr, &[ptr.into()], "dsply_cstr").ok();
                            }
                        }
                        BasicValueEnum::IntValue(iv) => {
                            // Integer — use rpg_dsply_i64.
                            if let Some(dsply_i64) = self.module.get_function("rpg_dsply_i64") {
                                let ext = self.builder
                                    .build_int_s_extend(iv, self.context.i64_type(), "dsply_ext")
                                    .unwrap_or(iv);
                                self.builder.build_call(dsply_i64, &[ext.into()], "dsply_i64").ok();
                            }
                        }
                        _ => {
                            // Fallback: display an empty string.
                            let empty = self.intern_string("");
                            let zero  = self.context.i64_type().const_zero();
                            self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok();
                        }
                    }
                }
            }
        }
        Ok(())
    }
@@ -723,6 +832,20 @@ impl<'ctx> Codegen<'ctx> {
        // Clone to avoid borrow issues.
        let ty = ty.clone();
        // If the LValue has an index (array assignment), compute the element pointer.
        let dest_ptr = if let LValue::Index(_, indices) = &a.target {
            if let Some(idx_expr) = indices.first() {
                let elem_size = ty.byte_size().unwrap_or(8);
                let idx_val = self.gen_expression(idx_expr, state)?;
                let idx_i   = self.coerce_to_i64(idx_val);
                self.array_elem_ptr(ptr, idx_i, elem_size)
            } else {
                ptr
            }
        } else {
            ptr
        };
        match &ty {
            TypeSpec::Char(size_expr) => {
                if let Expression::Literal(Literal::String(s)) = &a.value {
@@ -733,16 +856,16 @@ impl<'ctx> Codegen<'ctx> {
                    let src = self.intern_bytes(&padded);
                    let memcpy = self.module.get_function("memcpy").unwrap();
                    let len = self.context.i64_type().const_int(field_len as u64, false);
-                    self.builder.build_call(memcpy, &[ptr.into(), src.into(), len.into()], "assign").ok();
+                    self.builder.build_call(memcpy, &[dest_ptr.into(), src.into(), len.into()], "assign").ok();
                }
            }
            TypeSpec::Int(_) | TypeSpec::Uns(_) => {
                let val = self.gen_expression(&a.value, state)?;
-                self.store_value(ptr, val, &ty);
+                self.store_value(dest_ptr, val, &ty);
            }
            _ => {
                if let Ok(val) = self.gen_expression(&a.value, state) {
-                    self.store_value(ptr, val, &ty);
+                    self.store_value(dest_ptr, val, &ty);
                }
            }
        }
@@ -893,7 +1016,10 @@ impl<'ctx> Codegen<'ctx> {
        let start = self.gen_expression(&f.start, state)?;
        let start_i = self.coerce_to_i64(start);
        self.builder.build_store(loop_var, start_i).ok();
-        state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(10))))));
+        // Store the loop variable with Int(20) so that byte_size() returns 8,
        // matching the i64 alloca above.  (Int(10) would give 4 bytes, causing
        // a 32-bit load from an 8-byte slot.)
        state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(20))))));
        let cond_bb  = self.context.append_basic_block(func, "for_cond");
        let body_bb  = self.context.append_basic_block(func, "for_body");
@@ -1062,9 +1188,12 @@ impl<'ctx> Codegen<'ctx> {
                    let llvm_ty = self.type_spec_to_llvm(&ty)
                        .unwrap_or(BasicTypeEnum::IntType(i64_t));
                    match &ty {
-                        TypeSpec::Int(w) | TypeSpec::Uns(w) => {
+                        TypeSpec::Int(_) | TypeSpec::Uns(_) => {
-                            let width = const_int_from_expr(w).unwrap_or(8);
+                            // Use byte_size() to get the real storage width — the
-                            let int_ty = self.context.custom_width_int_type((width * 8) as u32);
+                            // type parameter is RPG's digit-precision (e.g. 10 for
                            // Uns(10) = 4 bytes), NOT the byte count.
                            let bytes = ty.byte_size().unwrap_or(8);
                            let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
                            if let Ok(v) = self.builder.build_load(int_ty, ptr, name) {
                                let iv = v.into_int_value();
                                let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
@@ -1111,7 +1240,15 @@ impl<'ctx> Codegen<'ctx> {
            Expression::Paren(e) => self.gen_expression(e, state),
            Expression::Call(name, args) => {
-                // Treat call-as-expression similarly to CALLP.
+                // RPG IV uses identical syntax for procedure calls and array
                // subscripts: `name(arg)`.  At parse time we always emit
                // Expression::Call for `ident(...)`, so here we need to
                // distinguish the two cases at code-generation time:
                //
                //   1. A real procedure/function exists in the module → call it.
                //   2. The name refers to a local/global variable with a known
                //      DIM → treat the single argument as an array index.
                //   3. Otherwise → return 0 (unknown call).
                let callee = self.module.get_function(name)
                    .or_else(|| self.module.get_function(&format!("rpg_{}", name)));
                if let Some(callee) = callee {
@@ -1128,13 +1265,79 @@ impl<'ctx> Codegen<'ctx> {
                        inkwell::values::ValueKind::Basic(v) => return Ok(v),
                        inkwell::values::ValueKind::Instruction(_) => {}
                    }
                    return Ok(i64_t.const_zero().into());
                }
                // No function found — check if `name` is an array variable and
                // the call is actually a subscript read: name(idx).
                if let Some((ptr, ty)) = self.resolve_var(name, state) {
                    let elem_size = ty.byte_size().unwrap_or(8);
                    if let Some(Arg::Expr(idx_expr)) = args.first() {
                        let idx_val = self.gen_expression(idx_expr, state)?;
                        let idx_i   = self.coerce_to_i64(idx_val);
                        let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
                        match &ty {
                            TypeSpec::Int(_) | TypeSpec::Uns(_) => {
                                let bytes   = ty.byte_size().unwrap_or(8);
                                let int_ty  = self.context.custom_width_int_type((bytes * 8) as u32);
                                let cast_ptr = self.builder.build_pointer_cast(
                                    elem_ptr,
                                    self.context.ptr_type(AddressSpace::default()),
                                    "call_elem_ptr_cast",
                                ).unwrap_or(elem_ptr);
                                if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") {
                                    let iv  = v.into_int_value();
                                    let ext = self.builder
                                        .build_int_s_extend(iv, i64_t, "call_elem_ext")
                                        .unwrap_or(iv);
                                    return Ok(ext.into());
                                }
                            }
                            _ => return Ok(elem_ptr.into()),
                        }
                    }
                }
                Ok(i64_t.const_zero().into())
            }
            Expression::BuiltIn(bif) => self.gen_builtin(bif, state),
-            Expression::Special(_) | Expression::Index(_, _) => {
+            Expression::Special(_) => {
                Ok(i64_t.const_zero().into())
            }
            Expression::Index(qname, indices) => {
                // Array element read: name(i) — RPG uses 1-based indexing.
                let name = qname.leaf();
                if let Some((ptr, ty)) = self.resolve_var(name, state) {
                    let elem_size = ty.byte_size().unwrap_or(8);
                    if let Some(idx_expr) = indices.first() {
                        let idx_val = self.gen_expression(idx_expr, state)?;
                        let idx_i   = self.coerce_to_i64(idx_val);
                        let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
                        // Load the element with the element's integer type.
                        match &ty {
                            TypeSpec::Int(_) | TypeSpec::Uns(_) => {
                                let bytes = ty.byte_size().unwrap_or(8);
                                let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
                                let cast_ptr = self.builder.build_pointer_cast(
                                    elem_ptr,
                                    self.context.ptr_type(AddressSpace::default()),
                                    "elem_ptr_cast",
                                ).unwrap_or(elem_ptr);
                                if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") {
                                    let iv = v.into_int_value();
                                    let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
                                    return Ok(ext.into());
                                }
                            }
                            _ => {
                                return Ok(elem_ptr.into());
                            }
                        }
                    }
                }
                Ok(i64_t.const_zero().into())
            }
        }
@@ -1179,6 +1382,24 @@ impl<'ctx> Codegen<'ctx> {
        match op {
            BinOp::Add => {
                // If either operand is a pointer (string), use rpg_concat.
                let lv_is_ptr = matches!(lv, BasicValueEnum::PointerValue(_));
                let rv_is_ptr = matches!(rv, BasicValueEnum::PointerValue(_));
                if lv_is_ptr || rv_is_ptr {
                    // Ensure both sides are pointers (call rpg_char_i64 on integers).
                    let lp = self.coerce_to_cstr_ptr(lv, state);
                    let rp = self.coerce_to_cstr_ptr(rv, state);
                    let concat_fn = self.module.get_function("rpg_concat").unwrap();
                    let call = self.builder
                        .build_call(concat_fn, &[lp.into(), rp.into()], "concat")
                        .unwrap();
                    return match call.try_as_basic_value() {
                        inkwell::values::ValueKind::Basic(v) => Ok(v),
                        inkwell::values::ValueKind::Instruction(_) => {
                            Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
                        }
                    };
                }
                let l = self.coerce_to_i64(lv);
                let r = self.coerce_to_i64(rv);
                Ok(self.builder.build_int_add(l, r, "add").unwrap().into())
@@ -1243,6 +1464,18 @@ impl<'ctx> Codegen<'ctx> {
    fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result<BasicValueEnum<'ctx>, CodegenError> {
        let i64_t = self.context.i64_type();
        match bif {
            BuiltIn::Elem(e) => {
                // %ELEM(array) — number of elements declared with DIM(n).
                if let Expression::Variable(qname) = e.as_ref() {
                    let name = qname.leaf();
                    if let Some(&n) = self.array_dims.get(name) {
                        return Ok(i64_t.const_int(n, false).into());
                    }
                    // Fall back to 1 if not an array variable.
                    return Ok(i64_t.const_int(1, false).into());
                }
                Ok(i64_t.const_zero().into())
            }
            BuiltIn::Len(e) => {
                // %LEN(field) — return compile-time field length.
                if let Expression::Variable(qname) = e.as_ref() {
@@ -1267,8 +1500,30 @@ impl<'ctx> Codegen<'ctx> {
                let abs  = self.builder.build_select(cmp, iv, neg, "abs").unwrap();
                Ok(abs.into())
            }
-            BuiltIn::Int(e) | BuiltIn::Char(e) => {
+            BuiltIn::Int(e) => {
-                self.gen_expression(e, state)
+                let v = self.gen_expression(e, state)?;
                Ok(self.coerce_to_i64(v).into())
            }
            BuiltIn::Char(e) => {
                // %CHAR(expr) — convert to a null-terminated C string pointer.
                let v = self.gen_expression(e, state)?;
                match v {
                    BasicValueEnum::PointerValue(_) => Ok(v), // already a string
                    _ => {
                        // Convert integer to string via rpg_char_i64.
                        let iv = self.coerce_to_i64(v);
                        let char_fn = self.module.get_function("rpg_char_i64").unwrap();
                        let call = self.builder
                            .build_call(char_fn, &[iv.into()], "char_i64")
                            .unwrap();
                        match call.try_as_basic_value() {
                            inkwell::values::ValueKind::Basic(v) => Ok(v),
                            inkwell::values::ValueKind::Instruction(_) => {
                                Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
                            }
                        }
                    }
                }
            }
            BuiltIn::Sqrt(e) => {
                let v = self.gen_expression(e, state)?;
@@ -1387,6 +1642,32 @@ impl<'ctx> Codegen<'ctx> {
        Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero()))
    }
    /// Coerce a value to a C string pointer (`i8*`).
    ///
    /// * If `val` is already a pointer, return it as-is.
    /// * If `val` is an integer, call `rpg_char_i64` to format it and return
    ///   the resulting pointer.
    fn coerce_to_cstr_ptr(
        &mut self,
        val: BasicValueEnum<'ctx>,
        _state: &mut FnState<'ctx>,
    ) -> PointerValue<'ctx> {
        match val {
            BasicValueEnum::PointerValue(p) => p,
            _ => {
                let iv = self.coerce_to_i64(val);
                let char_fn = self.module.get_function("rpg_char_i64").unwrap();
                let call = self.builder
                    .build_call(char_fn, &[iv.into()], "char_i64")
                    .unwrap();
                match call.try_as_basic_value() {
                    inkwell::values::ValueKind::Basic(BasicValueEnum::PointerValue(p)) => p,
                    _ => self.context.ptr_type(AddressSpace::default()).const_null(),
                }
            }
        }
    }
    fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> {
        let i64_t = self.context.i64_type();
        match val {
@@ -45,6 +45,24 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
    Ok(program)
 }
 /// Strip RPG IV compiler directives that start with `**` (e.g. `**FREE`,
 /// `**CTDATA`) by blanking out those lines before tokenization.
 fn strip_star_star_directives(source: &str) -> String {
    source
        .lines()
        .map(|line| {
            let trimmed = line.trim_start();
            if trimmed.starts_with("**") {
                // Replace with an empty line so line numbers stay consistent.
                ""
            } else {
                line
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // Error type
 // ─────────────────────────────────────────────────────────────────────────────
@@ -52,19 +70,29 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
 #[derive(Debug)]
 pub struct LowerError {
    pub message: String,
    /// 1-based source line where the error was detected, if known.
    pub line: Option<usize>,
 }
 impl std::fmt::Display for LowerError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if let Some(ln) = self.line {
            write!(f, "lower error (line {}): {}", ln, self.message)
        } else {
            write!(f, "lower error: {}", self.message)
        }
    }
 }
 impl std::error::Error for LowerError {}
 impl LowerError {
    fn new(msg: impl Into<String>) -> Self {
-        LowerError { message: msg.into() }
+        LowerError { message: msg.into(), line: None }
    }
    fn at(line: usize, msg: impl Into<String>) -> Self {
        LowerError { message: msg.into(), line: Some(line) }
    }
 }
@@ -385,12 +413,22 @@ enum Token {
 // ─────────────────────────────────────────────────────────────────────────────
 fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
-    let chars: Vec<char> = source.chars().collect();
+    // Strip **FREE / **CTDATA / any **word compiler directives first.
    let cleaned = strip_star_star_directives(source);
    let chars: Vec<char> = cleaned.chars().collect();
    let mut pos = 0;
    let mut tokens = Vec::new();
    let mut line: usize = 1;
    while pos < chars.len() {
-        // Skip whitespace
+        // Track line numbers.
        if chars[pos] == '\n' {
            line += 1;
            pos += 1;
            continue;
        }
        // Skip other whitespace
        if chars[pos].is_whitespace() {
            pos += 1;
            continue;
@@ -490,6 +528,14 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
            '=' => { tokens.push(Token::OpEq); pos += 1; continue; }
            '*' => {
                if pos + 1 < chars.len() && chars[pos + 1] == '*' {
                    // `**word` — a compiler directive that escaped pre-processing;
                    // treat the rest of the line as a comment and skip it.
                    if pos + 2 < chars.len() && chars[pos + 2].is_alphabetic() {
                        while pos < chars.len() && chars[pos] != '\n' {
                            pos += 1;
                        }
                        continue;
                    }
                    tokens.push(Token::OpStar2);
                    pos += 2;
                } else {
@@ -704,6 +750,7 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
    }
    tokens.push(Token::Eof);
    let _ = line; // line tracking available for future per-token storage
    Ok(tokens)
 }
@@ -873,11 +920,12 @@ fn keyword_or_ident(upper: &str, original: &str) -> Token {
 struct Parser {
    tokens: Vec<Token>,
    pos: usize,
    _line: usize,
 }
 impl Parser {
    fn new(tokens: Vec<Token>) -> Self {
-        Parser { tokens, pos: 0 }
+        Parser { tokens, pos: 0, _line: 1 }
    }
    fn peek(&self) -> &Token {
@@ -901,7 +949,10 @@ impl Parser {
        if &tok == expected {
            Ok(())
        } else {
-            Err(LowerError::new(format!("expected {:?}, got {:?}", expected, tok)))
+            Err(LowerError::new(format!(
                "expected {:?}, got {:?} (token index {})",
                expected, tok, self.pos
            )))
        }
    }
@@ -927,12 +978,21 @@ impl Parser {
    fn parse_program(&mut self) -> Result<Program, LowerError> {
        let mut declarations = Vec::new();
        let mut procedures   = Vec::new();
        let mut skipped_tokens: Vec<String> = Vec::new();
        while !self.is_eof() {
            match self.peek() {
                Token::KwDclProc => {
-                    if let Ok(p) = self.parse_procedure() {
+                    if !skipped_tokens.is_empty() {
-                        procedures.push(p);
+                        skipped_tokens.clear();
                    }
                    match self.parse_procedure() {
                        Ok(p)  => procedures.push(p),
                        Err(e) => {
                            eprintln!("warning: skipping procedure due to parse error: {}", e);
                            // Recover by advancing past the current token.
                            self.advance();
                        }
                    }
                }
                Token::KwCtlOpt  |
@@ -941,16 +1001,33 @@ impl Parser {
                Token::KwDclDs   |
                Token::KwDclF    |
                Token::KwBegSr   => {
-                    if let Ok(d) = self.parse_declaration() {
+                    if !skipped_tokens.is_empty() {
-                        declarations.push(d);
+                        skipped_tokens.clear();
                    }
-                }
+                    match self.parse_declaration() {
-                _ => {
+                        Ok(d)  => declarations.push(d),
-                    // Skip unrecognised top-level tokens
+                        Err(e) => {
                            eprintln!("warning: skipping declaration due to parse error: {}", e);
                            self.advance();
                        }
                    }
                }
                tok => {
                    // Accumulate unrecognised top-level tokens so we can report
                    // them as a meaningful diagnostic.
                    skipped_tokens.push(format!("{:?}", tok));
                    self.advance();
                }
            }
        }
        if !skipped_tokens.is_empty() {
            eprintln!(
                "warning: {} unrecognised top-level token(s) were skipped: {}",
                skipped_tokens.len(),
                skipped_tokens.join(", ")
            );
        }
        Ok(Program { declarations, procedures })
    }
@@ -965,7 +1042,11 @@ impl Parser {
            Token::KwDclDs  => self.parse_dcl_ds(),
            Token::KwDclF   => self.parse_dcl_f(),
            Token::KwBegSr  => self.parse_subroutine(),
-            tok             => Err(LowerError::new(format!("unexpected token in declaration: {:?}", tok))),
+            tok             => Err(LowerError::new(format!(
                "unexpected token in declaration: {:?} — \
                 expected one of CTL-OPT, DCL-S, DCL-C, DCL-DS, DCL-F, BEG-SR",
                tok
            ))),
        }
    }
@@ -1256,6 +1337,18 @@ impl Parser {
    fn parse_var_keyword(&mut self) -> VarKeyword {
        match self.peek().clone() {
            Token::KwDim => {
                self.advance(); // KwDim
                if self.peek() == &Token::LParen {
                    self.advance(); // (
                    if let Ok(expr) = self.parse_expression() {
                        self.eat(&Token::RParen);
                        return VarKeyword::Dim(expr);
                    }
                    self.eat(&Token::RParen);
                }
                VarKeyword::Other("DIM".to_string())
            }
            Token::KwInz => {
                self.advance();
                if self.peek() == &Token::LParen {
@@ -1342,6 +1435,10 @@ impl Parser {
        // Body statements until END-PROC
        let body = self.parse_statement_list(&[Token::KwEndProc]);
        self.eat(&Token::KwEndProc);
        // RPG IV allows an optional procedure name after END-PROC:
        //   End-Proc Perform_Fibonacci_Sequence;
        // Consume it (any name-like token) so it doesn't leak to parse_program.
        let _ = self.try_parse_name();
        self.eat_semicolon();
        Ok(Procedure { name, exported, pi, locals, body })
@@ -1893,6 +1990,8 @@ impl Parser {
        if self.peek() == &Token::LParen {
            // Peek ahead to decide: call or subscript-assignment?
            // If after the matching ')' we see '=' it's an assignment, else call.
            // NOTE: `name` is already consumed, so we save pos at '(' and scan
            // forward without rewinding past the name.
            let saved = self.pos;
            self.advance(); // (
            let mut depth = 1;
@@ -1904,11 +2003,22 @@ impl Parser {
                }
            }
            let is_assign = self.peek() == &Token::OpEq;
-            self.pos = saved; // rewind
+            self.pos = saved; // rewind to '('
            if is_assign {
                // subscript assignment: `name(idx) = expr;`
-                let lv = self.parse_lvalue()?;
+                // Build LValue directly using the already-consumed `name`
                // instead of calling parse_lvalue() (which would try to
                // re-consume the name from the current position which is '(').
                let qname = QualifiedName::simple(name.clone());
                let mut indices = Vec::new();
                self.advance(); // consume '('
                indices.push(self.parse_expression()?);
                while self.eat(&Token::Colon) {
                    indices.push(self.parse_expression()?);
                }
                self.eat(&Token::RParen);
                let lv = LValue::Index(qname, indices);
                self.expect(&Token::OpEq)?;
                let value = self.parse_expression()?;
                self.eat_semicolon();
@@ -2221,7 +2331,9 @@ impl Parser {
    fn parse_builtin_expr(&mut self) -> Result<Expression, LowerError> {
        let bif_tok = self.advance();
-        self.expect(&Token::LParen)?;
+        self.expect(&Token::LParen).map_err(|e| LowerError::new(format!(
            "built-in function {:?}: {}", bif_tok, e.message
        )))?;
        let bif = match bif_tok {
            Token::BifLen => {
                let e = self.parse_expression()?;
@@ -2277,6 +2389,11 @@ impl Parser {
                self.eat(&Token::RParen);
                BuiltIn::Error
            }
            Token::BifElem => {
                let e = self.parse_expression()?;
                self.eat(&Token::RParen);
                BuiltIn::Elem(Box::new(e))
            }
            Token::BifSize => {
                let e = self.parse_expression()?;
                self.eat(&Token::RParen);
@@ -36,7 +36,6 @@
 use std::{
    fs,
    path::PathBuf,
    process,
 };
@@ -44,6 +43,97 @@ use std::{
 use clap::Parser as ClapParser;
 use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as};
 // ─────────────────────────────────────────────────────────────────────────────
 // BNF pre-processing helper
 // ─────────────────────────────────────────────────────────────────────────────
 /// Uppercase all keyword-like tokens in `source` while preserving the content
 /// of string literals, line comments, and block comments unchanged.
 ///
 /// This lets the BNF grammar (which uses uppercase terminal literals) validate
 /// RPG IV source that uses mixed-case keywords such as `Ctl-Opt` or `Dcl-S`.
 fn uppercase_keywords_for_bnf(source: &str) -> String {
    let chars: Vec<char> = source.chars().collect();
    let mut out = String::with_capacity(source.len());
    let mut i = 0;
    while i < chars.len() {
        // Line comment  // … \n  — copy verbatim
        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
            while i < chars.len() && chars[i] != '\n' {
                out.push(chars[i]);
                i += 1;
            }
            continue;
        }
        // Block comment  /* … */  — copy verbatim
        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
            out.push(chars[i]);
            out.push(chars[i + 1]);
            i += 2;
            while i + 1 < chars.len() {
                if chars[i] == '*' && chars[i + 1] == '/' {
                    out.push(chars[i]);
                    out.push(chars[i + 1]);
                    i += 2;
                    break;
                }
                out.push(chars[i]);
                i += 1;
            }
            continue;
        }
        // String literal  '…'  — copy verbatim (including '' escape)
        if chars[i] == '\'' {
            out.push(chars[i]);
            i += 1;
            while i < chars.len() {
                if chars[i] == '\'' {
                    out.push(chars[i]);
                    i += 1;
                    // '' is an escaped quote — keep going
                    if i < chars.len() && chars[i] == '\'' {
                        out.push(chars[i]);
                        i += 1;
                    } else {
                        break;
                    }
                } else {
                    out.push(chars[i]);
                    i += 1;
                }
            }
            continue;
        }
        // Identifier / keyword — uppercase it so the BNF terminals match
        if chars[i].is_alphabetic() || chars[i] == '_' || chars[i] == '@' || chars[i] == '#' || chars[i] == '$' {
            while i < chars.len()
                && (chars[i].is_alphanumeric()
                    || chars[i] == '_'
                    || chars[i] == '@'
                    || chars[i] == '#'
                    || chars[i] == '$'
                    || (chars[i] == '-'
                        && i + 1 < chars.len()
                        && chars[i + 1].is_alphabetic()))
            {
                out.push(chars[i].to_ascii_uppercase());
                i += 1;
            }
            continue;
        }
        // Everything else (operators, punctuation, whitespace, digits)
        out.push(chars[i]);
        i += 1;
    }
    out
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // CLI definition
 // ─────────────────────────────────────────────────────────────────────────────
@@ -127,17 +217,83 @@ fn main() {
        };
        // ── BNF validation ────────────────────────────────────────────────────
-        let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program")
+        // RPG IV keywords are case-insensitive, but the BNF grammar uses
-            .or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file"));
+        // uppercase terminal literals.  Normalise the source before checking.
        let normalised = uppercase_keywords_for_bnf(source_text.trim());
        let tree_opt = parse_as(&bnf_parser, normalised.trim(), "program")
            .or_else(|| parse_as(&bnf_parser, normalised.trim(), "source-file"));
        if tree_opt.is_none() {
            // BNF validation is a structural sanity-check.  Emit a warning so
            // the developer knows something looks off, but continue with the
            // lowering pass which is more permissive and gives better errors.
            eprintln!(
-                "error: '{}' did not match the RPG IV grammar",
+                "warning: '{}' did not fully match the RPG IV grammar — \
                 attempting to compile anyway",
                source_path.display()
            );
-            any_error = true;
+
            // ── Helpful diagnostics ──────────────────────────────────────────
            // Scan for the first line the BNF cannot classify to give the user
            // a concrete hint about what caused the mismatch.
            let top_level_rules = &[
                "control-spec",
                "standalone-decl",
                "constant-decl",
                "data-structure-decl",
                "file-decl",
                "procedure",
                "subroutine",
                "statement",
            ];
            'outer: for (lineno, raw_line) in source_text.lines().enumerate() {
                let trimmed = raw_line.trim();
                let norm_check = trimmed.to_ascii_uppercase();
                // Skip blanks, comments, compiler directives, and lines that
                // introduce multi-line constructs (DCL-PROC, END-PROC, DCL-DS,
                // END-DS, DCL-PI, END-PI, BEG-SR, END-SR) — these will never
                // match a single-line grammar rule and are not errors.
                if trimmed.is_empty()
                    || trimmed.starts_with("//")
                    || trimmed.starts_with("/*")
                    || trimmed.starts_with("**")
                    || norm_check.starts_with("DCL-PROC")
                    || norm_check.starts_with("END-PROC")
                    || norm_check.starts_with("DCL-DS")
                    || norm_check.starts_with("END-DS")
                    || norm_check.starts_with("DCL-PI")
                    || norm_check.starts_with("END-PI")
                    || norm_check.starts_with("BEG-SR")
                    || norm_check.starts_with("END-SR")
                {
                    continue;
                }
                // Strip inline line comments before BNF matching so that
                // `fib(1) = 0; // some comment` doesn't cause a false positive.
                let trimmed_no_comment = if let Some(idx) = trimmed.find("//") {
                    trimmed[..idx].trim_end()
                } else {
                    trimmed
                };
                let norm_line = uppercase_keywords_for_bnf(trimmed_no_comment);
                let mut matched = false;
                for rule in top_level_rules {
                    if parse_as(&bnf_parser, norm_line.trim(), rule).is_some() {
                        matched = true;
                        break;
                    }
                }
                if !matched {
                    eprintln!(
                        "  hint (line {}): unrecognised grammar construct: {:?}",
                        lineno + 1,
                        if trimmed.len() > 80 { &trimmed[..80] } else { trimmed }
                    );
                    break 'outer;
                }
            }
            // Fall through — try lowering anyway.
        }
        // ── --emit-tree: print parse tree and stop ────────────────────────────
        if cli.emit_tree {
@@ -1,9 +1,16 @@
 <wsc> ::= ' ' | '	' | '
-' | '
+' | '
 '
 <ws> ::= <wsc> | <wsc> <ws>
 <opt-ws> ::= <ws> | ''
 <program> ::= <opt-ws> <program-body> <opt-ws>
            | <opt-ws> <free-directive> <opt-ws> <program-body> <opt-ws>
            | <opt-ws> <free-directive> <opt-ws>
 <free-directive> ::= '**FREE'
                   | '**free'
                   | '**Free'
 <program-body> ::= <declaration-section> <opt-ws> <procedure-list>
                 | <declaration-section>
@@ -24,7 +31,9 @@
                   | <procedure>
 <procedure> ::= 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
              | 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
              | 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
              | 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
 <proc-keyword-list> ::= <proc-keyword> <ws> <proc-keyword-list>
                      | <proc-keyword>