add: compiler

This commit is contained in:
2026-03-12 21:41:30 -07:00
parent 90de2206db
commit 3498b018e5
10 changed files with 6190 additions and 78 deletions

View File

@@ -1,7 +1,11 @@
//! rust-langrpg — RPG IV compiler CLI
//!
//! Parses one or more RPG IV source files using the embedded BNF grammar
//! and optionally writes the resulting parse tree to an output file.
//! Full compilation pipeline:
//! source (.rpg)
//! → BNF validation (bnf crate)
//! → AST lowering (lower.rs)
//! → LLVM IR / object (codegen.rs via inkwell)
//! → native executable (cc linker + librpgrt.so runtime)
//!
//! ## Usage
//!
@@ -9,46 +13,80 @@
//! rust-langrpg [OPTIONS] <SOURCES>...
//!
//! Arguments:
//! <SOURCES>... RPG IV source file(s) to parse
//! <SOURCES>... RPG IV source file(s) to compile
//!
//! Options:
//! -o <OUTPUT> Write the parse tree to this file
//! -h, --help Print help
//! -V, --version Print version
//! -o <OUTPUT> Output executable path [default: a.out]
//! --emit-ir Print LLVM IR to stdout instead of producing a binary
//! --emit-tree Print BNF parse tree to stdout instead of compiling
//! -O <LEVEL> Optimisation level 0-3 [default: 0]
//! --no-link Produce a .o object file, skip linking
//! --runtime <PATH> Path to librpgrt.so [default: auto-detect]
//! -h, --help Print help
//! -V, --version Print version
//! ```
//!
//! ## Example
//!
//! ```text
//! cargo run --release -- -o out.txt hello.rpg
//! cargo run --release -- -o main hello.rpg
//! ./main
//! DSPLY Hello, World!
//! ```
use std::{
fs,
io::{self, Write},
path::PathBuf,
process,
};
use clap::Parser;
use rust_langrpg::{load_grammar, parse_as};
use clap::Parser as ClapParser;
use rust_langrpg::{codegen, load_grammar, lower::lower, parse_as};
// ─────────────────────────────────────────────────────────────────────────────
// CLI definition
// ─────────────────────────────────────────────────────────────────────────────
/// RPG IV free-format compiler — parses source files and emits parse trees.
#[derive(Parser, Debug)]
#[command(name = "rust-langrpg", version, about, long_about = None)]
/// RPG IV free-format compiler — produces native Linux executables from RPG IV
/// source files using LLVM as the back-end.
#[derive(ClapParser, Debug)]
#[command(
name = "rust-langrpg",
version,
about = "RPG IV compiler (LLVM back-end)",
long_about = None,
)]
struct Cli {
/// RPG IV source file(s) to parse.
/// RPG IV source file(s) to compile.
#[arg(required = true, value_name = "SOURCES")]
sources: Vec<PathBuf>,
/// Write the parse tree(s) to this file.
/// If omitted the tree is not printed.
/// Write the output executable (or object with --no-link) to this path.
/// If omitted the binary is written to `a.out`.
#[arg(short = 'o', value_name = "OUTPUT")]
output: Option<PathBuf>,
/// Emit LLVM IR text to stdout instead of compiling to a binary.
#[arg(long = "emit-ir")]
emit_ir: bool,
/// Emit the BNF parse tree to stdout instead of compiling.
#[arg(long = "emit-tree")]
emit_tree: bool,
/// Optimisation level: 0 = none, 1 = less, 2 = default, 3 = aggressive.
#[arg(short = 'O', default_value = "0", value_name = "LEVEL")]
opt_level: u8,
/// Produce a `.o` object file but do not invoke the linker.
#[arg(long = "no-link")]
no_link: bool,
/// Path to the `librpgrt.so` runtime shared library.
/// If not specified the compiler searches in common locations.
#[arg(long = "runtime", value_name = "PATH")]
runtime: Option<PathBuf>,
}
// ─────────────────────────────────────────────────────────────────────────────
@@ -58,7 +96,7 @@ struct Cli {
fn main() {
let cli = Cli::parse();
// ── Load grammar ─────────────────────────────────────────────────────────
// ── Load and build the BNF grammar ───────────────────────────────────────
let grammar = match load_grammar() {
Ok(g) => g,
Err(e) => {
@@ -67,61 +105,139 @@ fn main() {
}
};
// ── Build parser ─────────────────────────────────────────────────────────
let parser = match grammar.build_parser() {
let bnf_parser = match grammar.build_parser() {
Ok(p) => p,
Err(e) => {
eprintln!("error: failed to build parser: {e}");
eprintln!("error: failed to build BNF parser: {e}");
process::exit(1);
}
};
// ── Open output sink ──────────────────────────────────────────────────────
// `output` is Box<dyn Write> so we can use either a file or a sink that
// discards everything when -o was not supplied.
let mut output: Box<dyn Write> = match &cli.output {
Some(path) => {
let file = fs::File::create(path).unwrap_or_else(|e| {
eprintln!("error: cannot open output file '{}': {e}", path.display());
process::exit(1);
});
Box::new(io::BufWriter::new(file))
}
None => Box::new(io::sink()),
};
// ── Process each source file ──────────────────────────────────────────────
// ── Process each source file ─────────────────────────────────────────────
let mut any_error = false;
for path in &cli.sources {
let source = match fs::read_to_string(path) {
for source_path in &cli.sources {
let source_text = match fs::read_to_string(source_path) {
Ok(s) => s,
Err(e) => {
eprintln!("error: cannot read '{}': {e}", path.display());
eprintln!("error: cannot read '{}': {e}", source_path.display());
any_error = true;
continue;
}
};
// Try the top-level "program" rule first; fall back to "source-file"
// so the binary is useful even if only one of those rule names exists
// in the grammar.
let tree = parse_as(&parser, source.trim(), "program")
.or_else(|| parse_as(&parser, source.trim(), "source-file"));
// ── BNF validation ────────────────────────────────────────────────────
let tree_opt = parse_as(&bnf_parser, source_text.trim(), "program")
.or_else(|| parse_as(&bnf_parser, source_text.trim(), "source-file"));
match tree {
Some(t) => {
eprintln!("ok: {}", path.display());
writeln!(output, "=== {} ===", path.display())
.and_then(|_| writeln!(output, "{t}"))
.unwrap_or_else(|e| {
eprintln!("error: write failed: {e}");
any_error = true;
});
}
None => {
eprintln!("error: '{}' did not match the RPG IV grammar", path.display());
if tree_opt.is_none() {
eprintln!(
"error: '{}' did not match the RPG IV grammar",
source_path.display()
);
any_error = true;
continue;
}
// ── --emit-tree: print parse tree and stop ────────────────────────────
if cli.emit_tree {
println!("=== {} ===", source_path.display());
println!("{}", tree_opt.unwrap());
eprintln!("ok: {} (parse tree emitted)", source_path.display());
continue;
}
eprintln!("ok: {} (BNF valid)", source_path.display());
// ── Lower to typed AST ────────────────────────────────────────────────
let program = match lower(source_text.trim()) {
Ok(p) => p,
Err(e) => {
eprintln!("error: lowering '{}' failed: {e}", source_path.display());
any_error = true;
continue;
}
};
eprintln!(
"ok: {} ({} declaration(s), {} procedure(s))",
source_path.display(),
program.declarations.len(),
program.procedures.len(),
);
// ── --emit-ir: print LLVM IR and stop ────────────────────────────────
if cli.emit_ir {
match codegen::emit_ir(&program) {
Ok(ir) => {
print!("{}", ir);
}
Err(e) => {
eprintln!("error: IR emission failed for '{}': {e}", source_path.display());
any_error = true;
}
}
continue;
}
// ── Determine output path ─────────────────────────────────────────────
let out_path = if cli.no_link {
// Object file: replace source extension with .o
let mut p = cli.output.clone().unwrap_or_else(|| {
let mut base = source_path.clone();
base.set_extension("o");
base
});
if p.extension().and_then(|e| e.to_str()) != Some("o") {
p.set_extension("o");
}
p
} else {
// Executable: use -o, or default to a.out
cli.output.clone().unwrap_or_else(|| PathBuf::from("a.out"))
};
// ── Compile to object file ────────────────────────────────────────────
let obj_path: PathBuf = if cli.no_link {
out_path.clone()
} else {
// Temporary object file alongside the final binary.
let stem = source_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("rpg_prog");
let mut tmp = std::env::temp_dir();
tmp.push(format!("{}.rpg.o", stem));
tmp
};
match codegen::compile_to_object(&program, &obj_path, cli.opt_level) {
Ok(()) => {
eprintln!("ok: object → {}", obj_path.display());
}
Err(e) => {
eprintln!(
"error: codegen failed for '{}': {e}",
source_path.display()
);
any_error = true;
continue;
}
}
// ── Link if requested ─────────────────────────────────────────────────
if !cli.no_link {
let runtime = find_runtime(cli.runtime.as_deref());
match link_executable(&obj_path, &out_path, runtime.as_deref()) {
Ok(()) => {
eprintln!("ok: executable → {}", out_path.display());
// Clean up the temporary object.
let _ = fs::remove_file(&obj_path);
}
Err(msg) => {
eprintln!("error: linking failed: {msg}");
any_error = true;
}
}
}
}
@@ -130,3 +246,206 @@ fn main() {
process::exit(1);
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Linker invocation
// ─────────────────────────────────────────────────────────────────────────────
/// Invoke the system C compiler to link `obj_path` into `exe_path`.
///
/// We use `cc` (which wraps the system linker) rather than calling `ld`
/// directly so that the C runtime startup files (`crt0.o`, `crti.o`, etc.) are
/// included automatically — this is the same approach Clang uses when building
/// executables.
fn link_executable(
obj_path: &std::path::Path,
exe_path: &std::path::Path,
runtime: Option<&std::path::Path>,
) -> Result<(), String> {
let mut cmd = process::Command::new("cc");
cmd.arg(obj_path)
.arg("-o")
.arg(exe_path);
// Link against the RPG runtime shared library.
match runtime {
Some(rt) => {
// Explicit path: use -L <dir> -lrpgrt (or pass the .so directly).
if rt.is_file() {
// Absolute path to the .so — pass directly.
cmd.arg(rt);
} else if rt.is_dir() {
cmd.arg(format!("-L{}", rt.display()))
.arg("-lrpgrt");
} else {
cmd.arg(format!("-L{}", rt.display()))
.arg("-lrpgrt");
}
}
None => {
// No explicit runtime specified — link against libc only.
// The program will need librpgrt.so to be in LD_LIBRARY_PATH at
// runtime, or the user must build and install it separately.
cmd.arg("-lc");
}
}
// Allow the runtime library to be found at execution time relative to the
// executable (rpath tricks).
if let Some(rt) = runtime {
if let Some(dir) = rt.parent() {
let rpath = format!("-Wl,-rpath,{}", dir.display());
cmd.arg(rpath);
}
}
let status = cmd
.status()
.map_err(|e| format!("could not run linker `cc`: {e}"))?;
if status.success() {
Ok(())
} else {
Err(format!("`cc` exited with status {}", status))
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Runtime library discovery
// ─────────────────────────────────────────────────────────────────────────────
/// Search for `librpgrt.so` in well-known locations.
///
/// Checked in order:
/// 1. `RPGRT_LIB` environment variable
/// 2. Same directory as the compiler executable
/// 3. `target/debug/` or `target/release/` relative to the current directory
/// (useful when running via `cargo run`)
/// 4. `/usr/local/lib`
/// 5. `/usr/lib`
fn find_runtime(explicit: Option<&std::path::Path>) -> Option<PathBuf> {
// Honour an explicitly supplied path first.
if let Some(p) = explicit {
return Some(p.to_path_buf());
}
// Check the environment variable.
if let Ok(val) = std::env::var("RPGRT_LIB") {
let p = PathBuf::from(val);
if p.exists() {
return Some(p);
}
}
// Probe standard locations.
let candidates = [
// Alongside the running binary.
std::env::current_exe()
.ok()
.and_then(|e| e.parent().map(|d| d.join("librpgrt.so"))),
// Cargo target directories.
Some(PathBuf::from("target/debug/librpgrt.so")),
Some(PathBuf::from("target/release/librpgrt.so")),
Some(PathBuf::from("target/debug/deps/librpgrt.so")),
// System-wide.
Some(PathBuf::from("/usr/local/lib/librpgrt.so")),
Some(PathBuf::from("/usr/lib/librpgrt.so")),
];
for candidate in candidates.into_iter().flatten() {
if candidate.exists() {
return Some(candidate);
}
}
None
}
// ─────────────────────────────────────────────────────────────────────────────
// Integration smoke test (compile-time only — no process spawning needed)
// ─────────────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use rust_langrpg::{codegen::emit_ir, lower::lower};
/// The hello.rpg from the repository root must compile all the way through
/// to LLVM IR without errors.
#[test]
fn hello_rpg_emits_ir() {
let src = include_str!("../hello.rpg");
let prog = lower(src.trim()).expect("lower hello.rpg");
let ir = emit_ir(&prog).expect("emit_ir hello.rpg");
// The IR must define at least one function.
assert!(
ir.contains("define"),
"IR should contain at least one function definition:\n{}",
&ir[..ir.len().min(1000)]
);
// The IR must reference the dsply runtime call.
assert!(
ir.contains("rpg_dsply"),
"IR should reference rpg_dsply:\n{}",
&ir[..ir.len().min(1000)]
);
// There must be a C main() wrapper so the binary is directly executable.
assert!(
ir.contains("@main"),
"IR should contain a @main entry point:\n{}",
&ir[..ir.len().min(1000)]
);
}
/// A minimal RPG IV program with an integer variable and a loop must
/// compile to IR that contains branch instructions (i.e. the loop was
/// actually code-generated, not silently dropped).
#[test]
fn loop_program_emits_branches() {
let src = r#"
CTL-OPT DFTACTGRP(*NO);
DCL-S counter INT(10) INZ(0);
DCL-PROC main EXPORT;
DCL-S i INT(10);
FOR i = 1 TO 10;
counter = counter + i;
ENDFOR;
RETURN;
END-PROC;
"#;
let prog = lower(src.trim()).expect("lower loop program");
let ir = emit_ir(&prog).expect("emit_ir loop program");
assert!(
ir.contains("br "),
"loop IR should contain branch instructions:\n{}",
&ir[..ir.len().min(2000)]
);
}
/// An IF/ELSE conditional must produce a conditional branch in the IR.
#[test]
fn conditional_program_emits_conditional_branch() {
let src = r#"
DCL-PROC check EXPORT;
DCL-S x INT(10) INZ(5);
IF x = 5;
RETURN;
ELSE;
RETURN;
ENDIF;
END-PROC;
"#;
let prog = lower(src.trim()).expect("lower conditional program");
let ir = emit_ir(&prog).expect("emit_ir conditional program");
assert!(
ir.contains("br i1"),
"conditional IR should contain 'br i1':\n{}",
&ir[..ir.len().min(2000)]
);
}
}