add: fib sample

This commit is contained in:
2026-03-12 22:19:42 -07:00
parent 073c86d784
commit 31a6c8b91b
7 changed files with 756 additions and 46 deletions

27
fib.rpg Normal file
View File

@@ -0,0 +1,27 @@
**FREE
Ctl-Opt Main(Perform_Fibonacci_Sequence);
Dcl-Proc Perform_Fibonacci_Sequence;
Dcl-s i Uns(10);
Dcl-s fib Uns(10) Dim(10);
// Display a title
Dsply ('Fibonacci Sequence:');
// Initialize the first two elements of the array
fib(1) = 0; // The sequence usually starts with 0 and 1
fib(2) = 1;
// Loop to calculate the rest of the sequence
For i = 3 to %Elem(fib);
// Each number is the sum of the two preceding ones
fib(i) = fib(i-1) + fib(i-2);
Endfor;
// Loop to display the sequence numbers
For i = 1 to %Elem(fib);
Dsply (' ' + %Char(fib(i)));
Endfor;
End-Proc Perform_Fibonacci_Sequence;

View File

@@ -12,6 +12,8 @@
//! | `rpg_dsply_i64` | `(n: i64)` | Display a signed 64-bit integer | //! | `rpg_dsply_i64` | `(n: i64)` | Display a signed 64-bit integer |
//! | `rpg_dsply_f64` | `(f: f64)` | Display a double-precision float | //! | `rpg_dsply_f64` | `(f: f64)` | Display a double-precision float |
//! | `rpg_halt` | `(code: i32)` | Abnormal program termination | //! | `rpg_halt` | `(code: i32)` | Abnormal program termination |
//! | `rpg_char_i64` | `(n: i64) -> *const c_char` | Format integer as null-term C string |
//! | `rpg_concat` | `(a: *const c_char, b: *const c_char) -> *const c_char` | Concatenate two C strings |
//! //!
//! ## Building //! ## Building
//! //!
@@ -44,10 +46,23 @@
#![allow(clippy::missing_safety_doc)] #![allow(clippy::missing_safety_doc)]
use std::ffi::CStr; use std::cell::RefCell;
use std::ffi::{CStr, CString};
use std::io::{self, Write}; use std::io::{self, Write};
use std::slice; use std::slice;
// ─────────────────────────────────────────────────────────────────────────────
// Thread-local scratch buffers used by rpg_char_i64 / rpg_concat
// ─────────────────────────────────────────────────────────────────────────────
thread_local! {
/// Backing store for the most recent `rpg_char_i64` result.
static CHAR_BUF: RefCell<CString> = RefCell::new(CString::new("").unwrap());
/// Backing store for the most recent `rpg_concat` result.
static CONCAT_BUF: RefCell<CString> = RefCell::new(CString::new("").unwrap());
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// rpg_dsply — display a fixed-length character field // rpg_dsply — display a fixed-length character field
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@@ -151,6 +166,66 @@ pub extern "C" fn rpg_dsply_f64(f: f64) {
/// Maps roughly to the IBM i concept of an *unhandled exception* ending the /// Maps roughly to the IBM i concept of an *unhandled exception* ending the
/// job. /// job.
#[no_mangle] #[no_mangle]
// ─────────────────────────────────────────────────────────────────────────────
// rpg_char_i64 — convert a 64-bit integer to a C string (%CHAR built-in)
// ─────────────────────────────────────────────────────────────────────────────
/// Format `n` as a decimal C string and return a pointer to a thread-local
/// buffer holding the result.
///
/// The returned pointer is valid until the next call to `rpg_char_i64` on the
/// same thread. Callers must not free it.
///
/// This implements the RPG IV `%CHAR(numeric-expression)` built-in function.
#[no_mangle]
pub extern "C" fn rpg_char_i64(n: i64) -> *const std::os::raw::c_char {
let s = CString::new(n.to_string()).unwrap_or_else(|_| CString::new("0").unwrap());
CHAR_BUF.with(|cell| {
*cell.borrow_mut() = s;
cell.borrow().as_ptr()
})
}
// ─────────────────────────────────────────────────────────────────────────────
// rpg_concat — concatenate two null-terminated C strings ('+' on char)
// ─────────────────────────────────────────────────────────────────────────────
/// Concatenate two null-terminated C strings and return a pointer to a
/// thread-local buffer holding the result.
///
/// The returned pointer is valid until the next call to `rpg_concat` on the
/// same thread. Callers must not free it.
///
/// This implements the RPG IV `+` operator when both operands are character
/// expressions.
///
/// # Safety
///
/// Both `a` and `b` must be valid null-terminated C strings (or null pointers,
/// which are treated as empty strings).
#[no_mangle]
pub unsafe extern "C" fn rpg_concat(
a: *const std::os::raw::c_char,
b: *const std::os::raw::c_char,
) -> *const std::os::raw::c_char {
let sa = if a.is_null() {
std::borrow::Cow::Borrowed("")
} else {
unsafe { CStr::from_ptr(a).to_string_lossy() }
};
let sb = if b.is_null() {
std::borrow::Cow::Borrowed("")
} else {
unsafe { CStr::from_ptr(b).to_string_lossy() }
};
let joined = format!("{}{}", sa, sb);
let cs = CString::new(joined).unwrap_or_else(|_| CString::new("").unwrap());
CONCAT_BUF.with(|cell| {
*cell.borrow_mut() = cs;
cell.borrow().as_ptr()
})
}
pub extern "C" fn rpg_halt(code: i32) { pub extern "C" fn rpg_halt(code: i32) {
eprintln!("RPG program halted with code {}", code); eprintln!("RPG program halted with code {}", code);
std::process::exit(code); std::process::exit(code);
@@ -374,6 +449,47 @@ fn rtrim_spaces(bytes: &[u8]) -> &[u8] {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
// ── rpg_dsply ────────────────────────────────────────────────────────────
#[test]
fn char_i64_positive() {
let ptr = rpg_char_i64(42);
let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
assert_eq!(s, "42");
}
#[test]
fn char_i64_zero() {
let ptr = rpg_char_i64(0);
let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
assert_eq!(s, "0");
}
#[test]
fn char_i64_negative() {
let ptr = rpg_char_i64(-7);
let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
assert_eq!(s, "-7");
}
// ── rpg_concat ───────────────────────────────────────────────────────────
#[test]
fn concat_two_strings() {
let a = CString::new("Hello, ").unwrap();
let b = CString::new("World!").unwrap();
let ptr = unsafe { rpg_concat(a.as_ptr(), b.as_ptr()) };
let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
assert_eq!(s, "Hello, World!");
}
#[test]
fn concat_null_pointers() {
let ptr = unsafe { rpg_concat(std::ptr::null(), std::ptr::null()) };
let s = unsafe { CStr::from_ptr(ptr).to_str().unwrap() };
assert_eq!(s, "");
}
use super::*; use super::*;
#[test] #[test]

View File

@@ -220,6 +220,8 @@ pub enum VarKeyword {
/// `INZ(*named-constant)` — initialise to named constant. /// `INZ(*named-constant)` — initialise to named constant.
InzNamed(NamedConstant), InzNamed(NamedConstant),
Static, Static,
/// `DIM(n)` — declares the variable as an array with `n` elements.
Dim(Expression),
Other(String), Other(String),
} }
@@ -608,6 +610,8 @@ pub enum BuiltIn {
Rem(Box<Expression>, Box<Expression>), Rem(Box<Expression>, Box<Expression>),
/// `%DIV(a:b)`. /// `%DIV(a:b)`.
Div(Box<Expression>, Box<Expression>), Div(Box<Expression>, Box<Expression>),
/// `%ELEM(array)` — number of elements in an array.
Elem(Box<Expression>),
/// Any built-in we haven't individually modelled. /// Any built-in we haven't individually modelled.
Other(String, Vec<Expression>), Other(String, Vec<Expression>),
} }

View File

@@ -77,6 +77,7 @@ pub fn compile_to_object(
module, module,
builder, builder,
globals: HashMap::new(), globals: HashMap::new(),
array_dims: HashMap::new(),
string_cache: HashMap::new(), string_cache: HashMap::new(),
global_inits: Vec::new(), global_inits: Vec::new(),
}; };
@@ -139,6 +140,7 @@ pub fn emit_ir(program: &Program) -> Result<String, CodegenError> {
module, module,
builder, builder,
globals: HashMap::new(), globals: HashMap::new(),
array_dims: HashMap::new(),
string_cache: HashMap::new(), string_cache: HashMap::new(),
global_inits: Vec::new(), global_inits: Vec::new(),
}; };
@@ -180,6 +182,9 @@ struct Codegen<'ctx> {
builder: Builder<'ctx>, builder: Builder<'ctx>,
/// Module-scope global variables name -> (alloca/global ptr, TypeSpec) /// Module-scope global variables name -> (alloca/global ptr, TypeSpec)
globals: HashMap<String, (PointerValue<'ctx>, TypeSpec)>, globals: HashMap<String, (PointerValue<'ctx>, TypeSpec)>,
/// Array dimension table: variable name -> number of elements.
/// Populated when a `DIM(n)` keyword is encountered.
array_dims: HashMap<String, u64>,
/// Interned string literal globals (content -> global ptr). /// Interned string literal globals (content -> global ptr).
string_cache: HashMap<String, PointerValue<'ctx>>, string_cache: HashMap<String, PointerValue<'ctx>>,
/// Global declarations that need runtime initialisation (INZ with a value). /// Global declarations that need runtime initialisation (INZ with a value).
@@ -228,6 +233,25 @@ impl<'ctx> Codegen<'ctx> {
// function is available when we build the @llvm.global_ctors entry. // function is available when we build the @llvm.global_ctors entry.
self.gen_global_init_fn()?; self.gen_global_init_fn()?;
// Determine the entry-point procedure name.
//
// Priority order:
// 1. `CTL-OPT MAIN(name)` — explicit entry point declaration.
// 2. The first EXPORT-ed procedure (legacy / hello.rpg style).
let ctl_main: Option<String> = program.declarations.iter().find_map(|d| {
if let Declaration::ControlSpec(cs) = d {
cs.keywords.iter().find_map(|kw| {
if let CtlKeyword::Main(name) = kw {
Some(name.clone())
} else {
None
}
})
} else {
None
}
});
// Generate each procedure. // Generate each procedure.
let mut exported_name: Option<String> = None; let mut exported_name: Option<String> = None;
for proc in &program.procedures { for proc in &program.procedures {
@@ -237,8 +261,10 @@ impl<'ctx> Codegen<'ctx> {
self.gen_procedure(proc)?; self.gen_procedure(proc)?;
} }
// Emit a C `main()` wrapper that calls the exported entry point. // Emit a C `main()` wrapper that calls the entry point.
if let Some(name) = exported_name { // CTL-OPT MAIN(name) takes priority over EXPORT.
let entry = ctl_main.or(exported_name);
if let Some(name) = entry {
self.gen_main_wrapper(&name)?; self.gen_main_wrapper(&name)?;
} }
@@ -278,6 +304,18 @@ impl<'ctx> Codegen<'ctx> {
false, false,
); );
self.module.add_function("memset", memset_ty, None); self.module.add_function("memset", memset_ty, None);
// void rpg_dsply_i64(i64 n) — display an integer
let dsply_i64_ty = void_t.fn_type(&[i64_t.into()], false);
self.module.add_function("rpg_dsply_i64", dsply_i64_ty, None);
// i8* rpg_char_i64(i64 n) — format integer to null-terminated C string
let char_i64_ty = i8_ptr.fn_type(&[i64_t.into()], false);
self.module.add_function("rpg_char_i64", char_i64_ty, None);
// i8* rpg_concat(i8* a, i8* b) — concatenate two C strings
let concat_ty = i8_ptr.fn_type(&[i8_ptr.into(), i8_ptr.into()], false);
self.module.add_function("rpg_concat", concat_ty, None);
} }
// ── Global declarations ───────────────────────────────────────────────── // ── Global declarations ─────────────────────────────────────────────────
@@ -384,7 +422,28 @@ impl<'ctx> Codegen<'ctx> {
fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> { fn gen_local_decl(&mut self, decl: &Declaration, state: &mut FnState<'ctx>) -> Result<(), CodegenError> {
match decl { match decl {
Declaration::Standalone(sd) => { Declaration::Standalone(sd) => {
let ptr = self.alloca_for_type(&sd.ty, &sd.name); // Check if a DIM(n) keyword is present — if so we allocate a
// contiguous block of `n * elem_size` bytes.
let dim = sd.keywords.iter().find_map(|kw| {
if let VarKeyword::Dim(expr) = kw {
const_int_from_expr(expr)
} else {
None
}
});
let ptr = if let Some(n) = dim {
// Array: allocate n elements of the element type.
self.alloca_for_type_dim(&sd.ty, &sd.name, n)
} else {
self.alloca_for_type(&sd.ty, &sd.name)
};
// Record the dimension so %Elem and indexing can use it.
if let Some(n) = dim {
self.array_dims.insert(sd.name.clone(), n);
}
// Apply initialiser if any. // Apply initialiser if any.
for kw in &sd.keywords { for kw in &sd.keywords {
match kw { match kw {
@@ -415,6 +474,35 @@ impl<'ctx> Codegen<'ctx> {
self.builder.build_alloca(arr_ty, name).unwrap() self.builder.build_alloca(arr_ty, name).unwrap()
} }
/// Allocate storage for an array of `n` elements of type `ty`.
fn alloca_for_type_dim(&self, ty: &TypeSpec, name: &str, n: u64) -> PointerValue<'ctx> {
let elem_size = ty.byte_size().unwrap_or(8) as u32;
let total = elem_size * (n as u32);
let arr_ty = self.context.i8_type().array_type(total);
self.builder.build_alloca(arr_ty, name).unwrap()
}
/// Return a pointer to element `index` (1-based, RPG convention) of array `base_ptr`.
/// `elem_size` is the byte size of one element.
fn array_elem_ptr(
&self,
base_ptr: PointerValue<'ctx>,
index: inkwell::values::IntValue<'ctx>,
elem_size: u64,
) -> PointerValue<'ctx> {
let i64_t = self.context.i64_type();
// RPG arrays are 1-based — subtract 1 to get a 0-based byte offset.
let one = i64_t.const_int(1, false);
let zero_based = self.builder.build_int_sub(index, one, "idx0").unwrap();
let elem_bytes = i64_t.const_int(elem_size, false);
let byte_off = self.builder.build_int_mul(zero_based, elem_bytes, "byte_off").unwrap();
unsafe {
self.builder
.build_gep(self.context.i8_type(), base_ptr, &[byte_off], "elem_ptr")
.unwrap()
}
}
fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> { fn zero_init_var(&self, ptr: PointerValue<'ctx>, ty: &TypeSpec) -> Result<(), CodegenError> {
let size = ty.byte_size().unwrap_or(0); let size = ty.byte_size().unwrap_or(0);
if size == 0 { return Ok(()); } if size == 0 { return Ok(()); }
@@ -604,8 +692,11 @@ impl<'ctx> Codegen<'ctx> {
self.builder.position_at_end(bb); self.builder.position_at_end(bb);
// Call the RPG entry procedure. // Call the RPG entry procedure.
let rpg_fn_name = format!("rpg_{}", rpg_entry); // Try the bare name first (CTL-OPT MAIN procedures are not renamed),
if let Some(rpg_fn) = self.module.get_function(&rpg_fn_name) { // then the `rpg_` prefix used for EXPORT-ed procedures.
let callee = self.module.get_function(rpg_entry)
.or_else(|| self.module.get_function(&format!("rpg_{}", rpg_entry)));
if let Some(rpg_fn) = callee {
self.builder.build_call(rpg_fn, &[], "call_rpg").ok(); self.builder.build_call(rpg_fn, &[], "call_rpg").ok();
} }
@@ -699,16 +790,34 @@ impl<'ctx> Codegen<'ctx> {
self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok(); self.builder.build_call(dsply, &[ptr.into(), len_val.into()], "dsply").ok();
} }
other => { other => {
// Evaluate as integer-like expression and display it. // Evaluate the expression; dispatch to the right display helper.
if let Ok(val) = self.gen_expression(other, state) { if let Ok(val) = self.gen_expression(other, state) {
// For now just call dsply_cstr on an empty string as fallback. match val {
let _ = val; BasicValueEnum::PointerValue(ptr) => {
// String pointer — use rpg_dsply_cstr.
if let Some(dsply_cstr) = self.module.get_function("rpg_dsply_cstr") {
self.builder.build_call(dsply_cstr, &[ptr.into()], "dsply_cstr").ok();
}
}
BasicValueEnum::IntValue(iv) => {
// Integer — use rpg_dsply_i64.
if let Some(dsply_i64) = self.module.get_function("rpg_dsply_i64") {
let ext = self.builder
.build_int_s_extend(iv, self.context.i64_type(), "dsply_ext")
.unwrap_or(iv);
self.builder.build_call(dsply_i64, &[ext.into()], "dsply_i64").ok();
}
}
_ => {
// Fallback: display an empty string.
let empty = self.intern_string(""); let empty = self.intern_string("");
let zero = self.context.i64_type().const_zero(); let zero = self.context.i64_type().const_zero();
self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok(); self.builder.build_call(dsply, &[empty.into(), zero.into()], "dsply").ok();
} }
} }
} }
}
}
Ok(()) Ok(())
} }
@@ -723,6 +832,20 @@ impl<'ctx> Codegen<'ctx> {
// Clone to avoid borrow issues. // Clone to avoid borrow issues.
let ty = ty.clone(); let ty = ty.clone();
// If the LValue has an index (array assignment), compute the element pointer.
let dest_ptr = if let LValue::Index(_, indices) = &a.target {
if let Some(idx_expr) = indices.first() {
let elem_size = ty.byte_size().unwrap_or(8);
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
self.array_elem_ptr(ptr, idx_i, elem_size)
} else {
ptr
}
} else {
ptr
};
match &ty { match &ty {
TypeSpec::Char(size_expr) => { TypeSpec::Char(size_expr) => {
if let Expression::Literal(Literal::String(s)) = &a.value { if let Expression::Literal(Literal::String(s)) = &a.value {
@@ -733,16 +856,16 @@ impl<'ctx> Codegen<'ctx> {
let src = self.intern_bytes(&padded); let src = self.intern_bytes(&padded);
let memcpy = self.module.get_function("memcpy").unwrap(); let memcpy = self.module.get_function("memcpy").unwrap();
let len = self.context.i64_type().const_int(field_len as u64, false); let len = self.context.i64_type().const_int(field_len as u64, false);
self.builder.build_call(memcpy, &[ptr.into(), src.into(), len.into()], "assign").ok(); self.builder.build_call(memcpy, &[dest_ptr.into(), src.into(), len.into()], "assign").ok();
} }
} }
TypeSpec::Int(_) | TypeSpec::Uns(_) => { TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let val = self.gen_expression(&a.value, state)?; let val = self.gen_expression(&a.value, state)?;
self.store_value(ptr, val, &ty); self.store_value(dest_ptr, val, &ty);
} }
_ => { _ => {
if let Ok(val) = self.gen_expression(&a.value, state) { if let Ok(val) = self.gen_expression(&a.value, state) {
self.store_value(ptr, val, &ty); self.store_value(dest_ptr, val, &ty);
} }
} }
} }
@@ -893,7 +1016,10 @@ impl<'ctx> Codegen<'ctx> {
let start = self.gen_expression(&f.start, state)?; let start = self.gen_expression(&f.start, state)?;
let start_i = self.coerce_to_i64(start); let start_i = self.coerce_to_i64(start);
self.builder.build_store(loop_var, start_i).ok(); self.builder.build_store(loop_var, start_i).ok();
state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(10)))))); // Store the loop variable with Int(20) so that byte_size() returns 8,
// matching the i64 alloca above. (Int(10) would give 4 bytes, causing
// a 32-bit load from an 8-byte slot.)
state.locals.insert(f.var.clone(), (loop_var, TypeSpec::Int(Box::new(Expression::Literal(Literal::Integer(20))))));
let cond_bb = self.context.append_basic_block(func, "for_cond"); let cond_bb = self.context.append_basic_block(func, "for_cond");
let body_bb = self.context.append_basic_block(func, "for_body"); let body_bb = self.context.append_basic_block(func, "for_body");
@@ -1062,9 +1188,12 @@ impl<'ctx> Codegen<'ctx> {
let llvm_ty = self.type_spec_to_llvm(&ty) let llvm_ty = self.type_spec_to_llvm(&ty)
.unwrap_or(BasicTypeEnum::IntType(i64_t)); .unwrap_or(BasicTypeEnum::IntType(i64_t));
match &ty { match &ty {
TypeSpec::Int(w) | TypeSpec::Uns(w) => { TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let width = const_int_from_expr(w).unwrap_or(8); // Use byte_size() to get the real storage width — the
let int_ty = self.context.custom_width_int_type((width * 8) as u32); // type parameter is RPG's digit-precision (e.g. 10 for
// Uns(10) = 4 bytes), NOT the byte count.
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
if let Ok(v) = self.builder.build_load(int_ty, ptr, name) { if let Ok(v) = self.builder.build_load(int_ty, ptr, name) {
let iv = v.into_int_value(); let iv = v.into_int_value();
let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv); let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
@@ -1111,7 +1240,15 @@ impl<'ctx> Codegen<'ctx> {
Expression::Paren(e) => self.gen_expression(e, state), Expression::Paren(e) => self.gen_expression(e, state),
Expression::Call(name, args) => { Expression::Call(name, args) => {
// Treat call-as-expression similarly to CALLP. // RPG IV uses identical syntax for procedure calls and array
// subscripts: `name(arg)`. At parse time we always emit
// Expression::Call for `ident(...)`, so here we need to
// distinguish the two cases at code-generation time:
//
// 1. A real procedure/function exists in the module → call it.
// 2. The name refers to a local/global variable with a known
// DIM → treat the single argument as an array index.
// 3. Otherwise → return 0 (unknown call).
let callee = self.module.get_function(name) let callee = self.module.get_function(name)
.or_else(|| self.module.get_function(&format!("rpg_{}", name))); .or_else(|| self.module.get_function(&format!("rpg_{}", name)));
if let Some(callee) = callee { if let Some(callee) = callee {
@@ -1128,13 +1265,79 @@ impl<'ctx> Codegen<'ctx> {
inkwell::values::ValueKind::Basic(v) => return Ok(v), inkwell::values::ValueKind::Basic(v) => return Ok(v),
inkwell::values::ValueKind::Instruction(_) => {} inkwell::values::ValueKind::Instruction(_) => {}
} }
return Ok(i64_t.const_zero().into());
} }
// No function found — check if `name` is an array variable and
// the call is actually a subscript read: name(idx).
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let elem_size = ty.byte_size().unwrap_or(8);
if let Some(Arg::Expr(idx_expr)) = args.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(AddressSpace::default()),
"call_elem_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "call_elem") {
let iv = v.into_int_value();
let ext = self.builder
.build_int_s_extend(iv, i64_t, "call_elem_ext")
.unwrap_or(iv);
return Ok(ext.into());
}
}
_ => return Ok(elem_ptr.into()),
}
}
}
Ok(i64_t.const_zero().into()) Ok(i64_t.const_zero().into())
} }
Expression::BuiltIn(bif) => self.gen_builtin(bif, state), Expression::BuiltIn(bif) => self.gen_builtin(bif, state),
Expression::Special(_) | Expression::Index(_, _) => { Expression::Special(_) => {
Ok(i64_t.const_zero().into())
}
Expression::Index(qname, indices) => {
// Array element read: name(i) — RPG uses 1-based indexing.
let name = qname.leaf();
if let Some((ptr, ty)) = self.resolve_var(name, state) {
let elem_size = ty.byte_size().unwrap_or(8);
if let Some(idx_expr) = indices.first() {
let idx_val = self.gen_expression(idx_expr, state)?;
let idx_i = self.coerce_to_i64(idx_val);
let elem_ptr = self.array_elem_ptr(ptr, idx_i, elem_size);
// Load the element with the element's integer type.
match &ty {
TypeSpec::Int(_) | TypeSpec::Uns(_) => {
let bytes = ty.byte_size().unwrap_or(8);
let int_ty = self.context.custom_width_int_type((bytes * 8) as u32);
let cast_ptr = self.builder.build_pointer_cast(
elem_ptr,
self.context.ptr_type(AddressSpace::default()),
"elem_ptr_cast",
).unwrap_or(elem_ptr);
if let Ok(v) = self.builder.build_load(int_ty, cast_ptr, "elem") {
let iv = v.into_int_value();
let ext = self.builder.build_int_s_extend(iv, i64_t, "sext").unwrap_or(iv);
return Ok(ext.into());
}
}
_ => {
return Ok(elem_ptr.into());
}
}
}
}
Ok(i64_t.const_zero().into()) Ok(i64_t.const_zero().into())
} }
} }
@@ -1179,6 +1382,24 @@ impl<'ctx> Codegen<'ctx> {
match op { match op {
BinOp::Add => { BinOp::Add => {
// If either operand is a pointer (string), use rpg_concat.
let lv_is_ptr = matches!(lv, BasicValueEnum::PointerValue(_));
let rv_is_ptr = matches!(rv, BasicValueEnum::PointerValue(_));
if lv_is_ptr || rv_is_ptr {
// Ensure both sides are pointers (call rpg_char_i64 on integers).
let lp = self.coerce_to_cstr_ptr(lv, state);
let rp = self.coerce_to_cstr_ptr(rv, state);
let concat_fn = self.module.get_function("rpg_concat").unwrap();
let call = self.builder
.build_call(concat_fn, &[lp.into(), rp.into()], "concat")
.unwrap();
return match call.try_as_basic_value() {
inkwell::values::ValueKind::Basic(v) => Ok(v),
inkwell::values::ValueKind::Instruction(_) => {
Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
}
};
}
let l = self.coerce_to_i64(lv); let l = self.coerce_to_i64(lv);
let r = self.coerce_to_i64(rv); let r = self.coerce_to_i64(rv);
Ok(self.builder.build_int_add(l, r, "add").unwrap().into()) Ok(self.builder.build_int_add(l, r, "add").unwrap().into())
@@ -1243,6 +1464,18 @@ impl<'ctx> Codegen<'ctx> {
fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result<BasicValueEnum<'ctx>, CodegenError> { fn gen_builtin(&mut self, bif: &BuiltIn, state: &mut FnState<'ctx>) -> Result<BasicValueEnum<'ctx>, CodegenError> {
let i64_t = self.context.i64_type(); let i64_t = self.context.i64_type();
match bif { match bif {
BuiltIn::Elem(e) => {
// %ELEM(array) — number of elements declared with DIM(n).
if let Expression::Variable(qname) = e.as_ref() {
let name = qname.leaf();
if let Some(&n) = self.array_dims.get(name) {
return Ok(i64_t.const_int(n, false).into());
}
// Fall back to 1 if not an array variable.
return Ok(i64_t.const_int(1, false).into());
}
Ok(i64_t.const_zero().into())
}
BuiltIn::Len(e) => { BuiltIn::Len(e) => {
// %LEN(field) — return compile-time field length. // %LEN(field) — return compile-time field length.
if let Expression::Variable(qname) = e.as_ref() { if let Expression::Variable(qname) = e.as_ref() {
@@ -1267,8 +1500,30 @@ impl<'ctx> Codegen<'ctx> {
let abs = self.builder.build_select(cmp, iv, neg, "abs").unwrap(); let abs = self.builder.build_select(cmp, iv, neg, "abs").unwrap();
Ok(abs.into()) Ok(abs.into())
} }
BuiltIn::Int(e) | BuiltIn::Char(e) => { BuiltIn::Int(e) => {
self.gen_expression(e, state) let v = self.gen_expression(e, state)?;
Ok(self.coerce_to_i64(v).into())
}
BuiltIn::Char(e) => {
// %CHAR(expr) — convert to a null-terminated C string pointer.
let v = self.gen_expression(e, state)?;
match v {
BasicValueEnum::PointerValue(_) => Ok(v), // already a string
_ => {
// Convert integer to string via rpg_char_i64.
let iv = self.coerce_to_i64(v);
let char_fn = self.module.get_function("rpg_char_i64").unwrap();
let call = self.builder
.build_call(char_fn, &[iv.into()], "char_i64")
.unwrap();
match call.try_as_basic_value() {
inkwell::values::ValueKind::Basic(v) => Ok(v),
inkwell::values::ValueKind::Instruction(_) => {
Ok(self.context.ptr_type(AddressSpace::default()).const_null().into())
}
}
}
}
} }
BuiltIn::Sqrt(e) => { BuiltIn::Sqrt(e) => {
let v = self.gen_expression(e, state)?; let v = self.gen_expression(e, state)?;
@@ -1387,6 +1642,32 @@ impl<'ctx> Codegen<'ctx> {
Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero())) Ok(self.builder.build_int_compare(pred, l, r, "cmp").unwrap_or_else(|_| i64_t.const_zero()))
} }
/// Coerce a value to a C string pointer (`i8*`).
///
/// * If `val` is already a pointer, return it as-is.
/// * If `val` is an integer, call `rpg_char_i64` to format it and return
/// the resulting pointer.
fn coerce_to_cstr_ptr(
&mut self,
val: BasicValueEnum<'ctx>,
_state: &mut FnState<'ctx>,
) -> PointerValue<'ctx> {
match val {
BasicValueEnum::PointerValue(p) => p,
_ => {
let iv = self.coerce_to_i64(val);
let char_fn = self.module.get_function("rpg_char_i64").unwrap();
let call = self.builder
.build_call(char_fn, &[iv.into()], "char_i64")
.unwrap();
match call.try_as_basic_value() {
inkwell::values::ValueKind::Basic(BasicValueEnum::PointerValue(p)) => p,
_ => self.context.ptr_type(AddressSpace::default()).const_null(),
}
}
}
}
fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> { fn coerce_to_i64(&self, val: BasicValueEnum<'ctx>) -> inkwell::values::IntValue<'ctx> {
let i64_t = self.context.i64_type(); let i64_t = self.context.i64_type();
match val { match val {

View File

@@ -45,6 +45,24 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
Ok(program) Ok(program)
} }
/// Strip RPG IV compiler directives that start with `**` (e.g. `**FREE`,
/// `**CTDATA`) by blanking out those lines before tokenization.
fn strip_star_star_directives(source: &str) -> String {
source
.lines()
.map(|line| {
let trimmed = line.trim_start();
if trimmed.starts_with("**") {
// Replace with an empty line so line numbers stay consistent.
""
} else {
line
}
})
.collect::<Vec<_>>()
.join("\n")
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Error type // Error type
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@@ -52,19 +70,29 @@ pub fn lower(source: &str) -> Result<Program, LowerError> {
#[derive(Debug)] #[derive(Debug)]
pub struct LowerError { pub struct LowerError {
pub message: String, pub message: String,
/// 1-based source line where the error was detected, if known.
pub line: Option<usize>,
} }
impl std::fmt::Display for LowerError { impl std::fmt::Display for LowerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(ln) = self.line {
write!(f, "lower error (line {}): {}", ln, self.message)
} else {
write!(f, "lower error: {}", self.message) write!(f, "lower error: {}", self.message)
} }
} }
}
impl std::error::Error for LowerError {} impl std::error::Error for LowerError {}
impl LowerError { impl LowerError {
fn new(msg: impl Into<String>) -> Self { fn new(msg: impl Into<String>) -> Self {
LowerError { message: msg.into() } LowerError { message: msg.into(), line: None }
}
fn at(line: usize, msg: impl Into<String>) -> Self {
LowerError { message: msg.into(), line: Some(line) }
} }
} }
@@ -385,12 +413,22 @@ enum Token {
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> { fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
let chars: Vec<char> = source.chars().collect(); // Strip **FREE / **CTDATA / any **word compiler directives first.
let cleaned = strip_star_star_directives(source);
let chars: Vec<char> = cleaned.chars().collect();
let mut pos = 0; let mut pos = 0;
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut line: usize = 1;
while pos < chars.len() { while pos < chars.len() {
// Skip whitespace // Track line numbers.
if chars[pos] == '\n' {
line += 1;
pos += 1;
continue;
}
// Skip other whitespace
if chars[pos].is_whitespace() { if chars[pos].is_whitespace() {
pos += 1; pos += 1;
continue; continue;
@@ -490,6 +528,14 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
'=' => { tokens.push(Token::OpEq); pos += 1; continue; } '=' => { tokens.push(Token::OpEq); pos += 1; continue; }
'*' => { '*' => {
if pos + 1 < chars.len() && chars[pos + 1] == '*' { if pos + 1 < chars.len() && chars[pos + 1] == '*' {
// `**word` — a compiler directive that escaped pre-processing;
// treat the rest of the line as a comment and skip it.
if pos + 2 < chars.len() && chars[pos + 2].is_alphabetic() {
while pos < chars.len() && chars[pos] != '\n' {
pos += 1;
}
continue;
}
tokens.push(Token::OpStar2); tokens.push(Token::OpStar2);
pos += 2; pos += 2;
} else { } else {
@@ -704,6 +750,7 @@ fn tokenize(source: &str) -> Result<Vec<Token>, LowerError> {
} }
tokens.push(Token::Eof); tokens.push(Token::Eof);
let _ = line; // line tracking available for future per-token storage
Ok(tokens) Ok(tokens)
} }
@@ -873,11 +920,12 @@ fn keyword_or_ident(upper: &str, original: &str) -> Token {
struct Parser { struct Parser {
tokens: Vec<Token>, tokens: Vec<Token>,
pos: usize, pos: usize,
_line: usize,
} }
impl Parser { impl Parser {
fn new(tokens: Vec<Token>) -> Self { fn new(tokens: Vec<Token>) -> Self {
Parser { tokens, pos: 0 } Parser { tokens, pos: 0, _line: 1 }
} }
fn peek(&self) -> &Token { fn peek(&self) -> &Token {
@@ -901,7 +949,10 @@ impl Parser {
if &tok == expected { if &tok == expected {
Ok(()) Ok(())
} else { } else {
Err(LowerError::new(format!("expected {:?}, got {:?}", expected, tok))) Err(LowerError::new(format!(
"expected {:?}, got {:?} (token index {})",
expected, tok, self.pos
)))
} }
} }
@@ -927,12 +978,21 @@ impl Parser {
fn parse_program(&mut self) -> Result<Program, LowerError> { fn parse_program(&mut self) -> Result<Program, LowerError> {
let mut declarations = Vec::new(); let mut declarations = Vec::new();
let mut procedures = Vec::new(); let mut procedures = Vec::new();
let mut skipped_tokens: Vec<String> = Vec::new();
while !self.is_eof() { while !self.is_eof() {
match self.peek() { match self.peek() {
Token::KwDclProc => { Token::KwDclProc => {
if let Ok(p) = self.parse_procedure() { if !skipped_tokens.is_empty() {
procedures.push(p); skipped_tokens.clear();
}
match self.parse_procedure() {
Ok(p) => procedures.push(p),
Err(e) => {
eprintln!("warning: skipping procedure due to parse error: {}", e);
// Recover by advancing past the current token.
self.advance();
}
} }
} }
Token::KwCtlOpt | Token::KwCtlOpt |
@@ -941,16 +1001,33 @@ impl Parser {
Token::KwDclDs | Token::KwDclDs |
Token::KwDclF | Token::KwDclF |
Token::KwBegSr => { Token::KwBegSr => {
if let Ok(d) = self.parse_declaration() { if !skipped_tokens.is_empty() {
declarations.push(d); skipped_tokens.clear();
} }
} match self.parse_declaration() {
_ => { Ok(d) => declarations.push(d),
// Skip unrecognised top-level tokens Err(e) => {
eprintln!("warning: skipping declaration due to parse error: {}", e);
self.advance(); self.advance();
} }
} }
} }
tok => {
// Accumulate unrecognised top-level tokens so we can report
// them as a meaningful diagnostic.
skipped_tokens.push(format!("{:?}", tok));
self.advance();
}
}
}
if !skipped_tokens.is_empty() {
eprintln!(
"warning: {} unrecognised top-level token(s) were skipped: {}",
skipped_tokens.len(),
skipped_tokens.join(", ")
);
}
Ok(Program { declarations, procedures }) Ok(Program { declarations, procedures })
} }
@@ -965,7 +1042,11 @@ impl Parser {
Token::KwDclDs => self.parse_dcl_ds(), Token::KwDclDs => self.parse_dcl_ds(),
Token::KwDclF => self.parse_dcl_f(), Token::KwDclF => self.parse_dcl_f(),
Token::KwBegSr => self.parse_subroutine(), Token::KwBegSr => self.parse_subroutine(),
tok => Err(LowerError::new(format!("unexpected token in declaration: {:?}", tok))), tok => Err(LowerError::new(format!(
"unexpected token in declaration: {:?}\
expected one of CTL-OPT, DCL-S, DCL-C, DCL-DS, DCL-F, BEG-SR",
tok
))),
} }
} }
@@ -1256,6 +1337,18 @@ impl Parser {
fn parse_var_keyword(&mut self) -> VarKeyword { fn parse_var_keyword(&mut self) -> VarKeyword {
match self.peek().clone() { match self.peek().clone() {
Token::KwDim => {
self.advance(); // KwDim
if self.peek() == &Token::LParen {
self.advance(); // (
if let Ok(expr) = self.parse_expression() {
self.eat(&Token::RParen);
return VarKeyword::Dim(expr);
}
self.eat(&Token::RParen);
}
VarKeyword::Other("DIM".to_string())
}
Token::KwInz => { Token::KwInz => {
self.advance(); self.advance();
if self.peek() == &Token::LParen { if self.peek() == &Token::LParen {
@@ -1342,6 +1435,10 @@ impl Parser {
// Body statements until END-PROC // Body statements until END-PROC
let body = self.parse_statement_list(&[Token::KwEndProc]); let body = self.parse_statement_list(&[Token::KwEndProc]);
self.eat(&Token::KwEndProc); self.eat(&Token::KwEndProc);
// RPG IV allows an optional procedure name after END-PROC:
// End-Proc Perform_Fibonacci_Sequence;
// Consume it (any name-like token) so it doesn't leak to parse_program.
let _ = self.try_parse_name();
self.eat_semicolon(); self.eat_semicolon();
Ok(Procedure { name, exported, pi, locals, body }) Ok(Procedure { name, exported, pi, locals, body })
@@ -1893,6 +1990,8 @@ impl Parser {
if self.peek() == &Token::LParen { if self.peek() == &Token::LParen {
// Peek ahead to decide: call or subscript-assignment? // Peek ahead to decide: call or subscript-assignment?
// If after the matching ')' we see '=' it's an assignment, else call. // If after the matching ')' we see '=' it's an assignment, else call.
// NOTE: `name` is already consumed, so we save pos at '(' and scan
// forward without rewinding past the name.
let saved = self.pos; let saved = self.pos;
self.advance(); // ( self.advance(); // (
let mut depth = 1; let mut depth = 1;
@@ -1904,11 +2003,22 @@ impl Parser {
} }
} }
let is_assign = self.peek() == &Token::OpEq; let is_assign = self.peek() == &Token::OpEq;
self.pos = saved; // rewind self.pos = saved; // rewind to '('
if is_assign { if is_assign {
// subscript assignment: `name(idx) = expr;` // subscript assignment: `name(idx) = expr;`
let lv = self.parse_lvalue()?; // Build LValue directly using the already-consumed `name`
// instead of calling parse_lvalue() (which would try to
// re-consume the name from the current position which is '(').
let qname = QualifiedName::simple(name.clone());
let mut indices = Vec::new();
self.advance(); // consume '('
indices.push(self.parse_expression()?);
while self.eat(&Token::Colon) {
indices.push(self.parse_expression()?);
}
self.eat(&Token::RParen);
let lv = LValue::Index(qname, indices);
self.expect(&Token::OpEq)?; self.expect(&Token::OpEq)?;
let value = self.parse_expression()?; let value = self.parse_expression()?;
self.eat_semicolon(); self.eat_semicolon();
@@ -2221,7 +2331,9 @@ impl Parser {
fn parse_builtin_expr(&mut self) -> Result<Expression, LowerError> { fn parse_builtin_expr(&mut self) -> Result<Expression, LowerError> {
let bif_tok = self.advance(); let bif_tok = self.advance();
self.expect(&Token::LParen)?; self.expect(&Token::LParen).map_err(|e| LowerError::new(format!(
"built-in function {:?}: {}", bif_tok, e.message
)))?;
let bif = match bif_tok { let bif = match bif_tok {
Token::BifLen => { Token::BifLen => {
let e = self.parse_expression()?; let e = self.parse_expression()?;
@@ -2277,6 +2389,11 @@ impl Parser {
self.eat(&Token::RParen); self.eat(&Token::RParen);
BuiltIn::Error BuiltIn::Error
} }
Token::BifElem => {
let e = self.parse_expression()?;
self.eat(&Token::RParen);
BuiltIn::Elem(Box::new(e))
}
Token::BifSize => { Token::BifSize => {
let e = self.parse_expression()?; let e = self.parse_expression()?;
self.eat(&Token::RParen); self.eat(&Token::RParen);

View File

@@ -36,7 +36,6 @@
use std::{ use std::{
fs, fs,
path::PathBuf, path::PathBuf,
process, process,
}; };
@@ -44,6 +43,97 @@ use std::{
use clap::Parser as ClapParser; use clap::Parser as ClapParser;
use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as}; use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as};
// ─────────────────────────────────────────────────────────────────────────────
// BNF pre-processing helper
// ─────────────────────────────────────────────────────────────────────────────
/// Uppercase all keyword-like tokens in `source` while preserving the content
/// of string literals, line comments, and block comments unchanged.
///
/// This lets the BNF grammar (which uses uppercase terminal literals) validate
/// RPG IV source that uses mixed-case keywords such as `Ctl-Opt` or `Dcl-S`.
fn uppercase_keywords_for_bnf(source: &str) -> String {
let chars: Vec<char> = source.chars().collect();
let mut out = String::with_capacity(source.len());
let mut i = 0;
while i < chars.len() {
// Line comment // … \n — copy verbatim
if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
while i < chars.len() && chars[i] != '\n' {
out.push(chars[i]);
i += 1;
}
continue;
}
// Block comment /* … */ — copy verbatim
if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
out.push(chars[i]);
out.push(chars[i + 1]);
i += 2;
while i + 1 < chars.len() {
if chars[i] == '*' && chars[i + 1] == '/' {
out.push(chars[i]);
out.push(chars[i + 1]);
i += 2;
break;
}
out.push(chars[i]);
i += 1;
}
continue;
}
// String literal '…' — copy verbatim (including '' escape)
if chars[i] == '\'' {
out.push(chars[i]);
i += 1;
while i < chars.len() {
if chars[i] == '\'' {
out.push(chars[i]);
i += 1;
// '' is an escaped quote — keep going
if i < chars.len() && chars[i] == '\'' {
out.push(chars[i]);
i += 1;
} else {
break;
}
} else {
out.push(chars[i]);
i += 1;
}
}
continue;
}
// Identifier / keyword — uppercase it so the BNF terminals match
if chars[i].is_alphabetic() || chars[i] == '_' || chars[i] == '@' || chars[i] == '#' || chars[i] == '$' {
while i < chars.len()
&& (chars[i].is_alphanumeric()
|| chars[i] == '_'
|| chars[i] == '@'
|| chars[i] == '#'
|| chars[i] == '$'
|| (chars[i] == '-'
&& i + 1 < chars.len()
&& chars[i + 1].is_alphabetic()))
{
out.push(chars[i].to_ascii_uppercase());
i += 1;
}
continue;
}
// Everything else (operators, punctuation, whitespace, digits)
out.push(chars[i]);
i += 1;
}
out
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// CLI definition // CLI definition
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@@ -127,17 +217,83 @@ fn main() {
}; };
// ── BNF validation ──────────────────────────────────────────────────── // ── BNF validation ────────────────────────────────────────────────────
let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program") // RPG IV keywords are case-insensitive, but the BNF grammar uses
.or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file")); // uppercase terminal literals. Normalise the source before checking.
let normalised = uppercase_keywords_for_bnf(source_text.trim());
let tree_opt = parse_as(&bnf_parser, normalised.trim(), "program")
.or_else(|| parse_as(&bnf_parser, normalised.trim(), "source-file"));
if tree_opt.is_none() { if tree_opt.is_none() {
// BNF validation is a structural sanity-check. Emit a warning so
// the developer knows something looks off, but continue with the
// lowering pass which is more permissive and gives better errors.
eprintln!( eprintln!(
"error: '{}' did not match the RPG IV grammar", "warning: '{}' did not fully match the RPG IV grammar\
attempting to compile anyway",
source_path.display() source_path.display()
); );
any_error = true;
// ── Helpful diagnostics ──────────────────────────────────────────
// Scan for the first line the BNF cannot classify to give the user
// a concrete hint about what caused the mismatch.
let top_level_rules = &[
"control-spec",
"standalone-decl",
"constant-decl",
"data-structure-decl",
"file-decl",
"procedure",
"subroutine",
"statement",
];
'outer: for (lineno, raw_line) in source_text.lines().enumerate() {
let trimmed = raw_line.trim();
let norm_check = trimmed.to_ascii_uppercase();
// Skip blanks, comments, compiler directives, and lines that
// introduce multi-line constructs (DCL-PROC, END-PROC, DCL-DS,
// END-DS, DCL-PI, END-PI, BEG-SR, END-SR) — these will never
// match a single-line grammar rule and are not errors.
if trimmed.is_empty()
|| trimmed.starts_with("//")
|| trimmed.starts_with("/*")
|| trimmed.starts_with("**")
|| norm_check.starts_with("DCL-PROC")
|| norm_check.starts_with("END-PROC")
|| norm_check.starts_with("DCL-DS")
|| norm_check.starts_with("END-DS")
|| norm_check.starts_with("DCL-PI")
|| norm_check.starts_with("END-PI")
|| norm_check.starts_with("BEG-SR")
|| norm_check.starts_with("END-SR")
{
continue; continue;
} }
// Strip inline line comments before BNF matching so that
// `fib(1) = 0; // some comment` doesn't cause a false positive.
let trimmed_no_comment = if let Some(idx) = trimmed.find("//") {
trimmed[..idx].trim_end()
} else {
trimmed
};
let norm_line = uppercase_keywords_for_bnf(trimmed_no_comment);
let mut matched = false;
for rule in top_level_rules {
if parse_as(&bnf_parser, norm_line.trim(), rule).is_some() {
matched = true;
break;
}
}
if !matched {
eprintln!(
" hint (line {}): unrecognised grammar construct: {:?}",
lineno + 1,
if trimmed.len() > 80 { &trimmed[..80] } else { trimmed }
);
break 'outer;
}
}
// Fall through — try lowering anyway.
}
// ── --emit-tree: print parse tree and stop ──────────────────────────── // ── --emit-tree: print parse tree and stop ────────────────────────────
if cli.emit_tree { if cli.emit_tree {

View File

@@ -1,9 +1,16 @@
<wsc> ::= ' ' | ' ' | ' <wsc> ::= ' ' | ' ' | '
' | ' ' | '
'
<ws> ::= <wsc> | <wsc> <ws> <ws> ::= <wsc> | <wsc> <ws>
<opt-ws> ::= <ws> | '' <opt-ws> ::= <ws> | ''
<program> ::= <opt-ws> <program-body> <opt-ws> <program> ::= <opt-ws> <program-body> <opt-ws>
| <opt-ws> <free-directive> <opt-ws> <program-body> <opt-ws>
| <opt-ws> <free-directive> <opt-ws>
<free-directive> ::= '**FREE'
| '**free'
| '**Free'
<program-body> ::= <declaration-section> <opt-ws> <procedure-list> <program-body> ::= <declaration-section> <opt-ws> <procedure-list>
| <declaration-section> | <declaration-section>
@@ -24,7 +31,9 @@
| <procedure> | <procedure>
<procedure> ::= 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';' <procedure> ::= 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
| 'DCL-PROC' <ws> <identifier> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
| 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';' | 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <opt-ws> ';'
| 'DCL-PROC' <ws> <identifier> <ws> <proc-keyword-list> <opt-ws> ';' <opt-ws> <procedure-body> <opt-ws> 'END-PROC' <ws> <identifier> <opt-ws> ';'
<proc-keyword-list> ::= <proc-keyword> <ws> <proc-keyword-list> <proc-keyword-list> ::= <proc-keyword> <ws> <proc-keyword-list>
| <proc-keyword> | <proc-keyword>