From ca81a2d0109095da0ba751853dccdbbd6e10411d Mon Sep 17 00:00:00 2001 From: charles Date: Mon, 4 May 2026 13:45:18 -0700 Subject: [PATCH] Update README --- README.md | 142 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 96 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index d64e445..8e761fe 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,38 @@ # roto -Rust protos without the pointers. +Zero-allocation Rust protobuf reader and writer. -The codegen is different; we don't create data structures. -We mark what where each field is, and only read it when asked. -The binary blob is never decompressed by the library; it is your -job to figure out how to store the data if you need to access it -more than once. +## Overview -And building protos? You use a builder. We don't make some fancy -structure and give you a marshal function, nah. You give us a blob -to write data into, and we write what you tell us, no questions asked. +Instead of deserializing binary protobuf data into Rust structs, roto scans a message _once_ on +construction — recording the byte offset of each field — then reads fields on demand directly from +the original bytes. No heap allocation, no data copying, no full deserialization upfront. -### Design +Writing works the same way: you provide a fixed buffer and a builder writes fields directly into it, +returning a slice of the bytes written. -The `protoc` command generates a CodeGeneratorRequest message; `protoc-gen-roto` (from src/bin/protoc-gen-roto.rs) -reads this message from stdin, and generated a CodeGeneratorResponse, which it sends to stdout. +## Design -The generated files get written to disk by protoc; these should be included in the Rust code being developed to -use the protobuffers in question. +`protoc` generates a `CodeGeneratorRequest` message; `protoc-gen-roto` (in +`src/bin/protoc-gen-roto.rs`) reads this from stdin, generates Rust source files, and writes a +`CodeGeneratorResponse` to stdout. `protoc` then writes those `.rs` files to disk. The generated +files are included directly in the crate that uses the protobuffers. -### Sample usage +## Generated code -```rust -/* +For each protobuf message roto generates two types: + +- **Reader struct** `MessageName<'a>` — borrows the original byte slice, zero-copy. +- **Builder struct** `MessageNameBuilder<'b>` — writes into a caller-provided `&mut [u8]`. + +Nested message types are placed in a `pub mod message_name { ... }` module (snake_case of the +parent message name) within the same generated file. + +## Sample usage + +Given this proto definition: + +```proto message Hello { string hello_world = 1; message InnerWorld { @@ -31,42 +40,83 @@ message Hello { } InnerWorld inner_world = 2; } - */ - -fn parse_proto(data: &[u8]) -> Result { - // Scans the data, marks where each flag is as an offset - // into the proto. - let accessor = HelloProto::new(data)?; - // Load the hello world string; returns bytes, not - // a Rust string. - let hello_world = accessor.hello_world()?; - // Inspect a nested message; accessing inner_world scans it - // for flag locations and returns a similiar access struct - let inner_world = accessor.inner_world()?.thought()?; - - format!("{} is about {}", hello_world, inner_world) -} ``` -### Sample builder usage +### Reading ```rust -let mut buf = [0u8; 1024]; -let mut builder = HelloProto::Builder::new(&mut buf) - .hello_world("some world") - .inner_world() // Returns an HelloProto::InnerWorld::Builder - .thought("some thought") - .done(); // returns the HelloProto::Builder -let bytes_written = builder.finish()?; // returns the number of bytes written to buffer +fn parse_proto(data: &[u8]) -> roto::Result { + // Scan the data once, recording field offsets + let hello = Hello::new(data)?; + + // String fields return &str borrowed from the original bytes (zero-copy) + let hello_world: &str = hello.hello_world()?; + + // Nested message fields return &[u8]; construct the nested reader from those bytes + let inner_bytes: &[u8] = hello.inner_world()?; + let inner_world = hello::InnerWorld::new(inner_bytes)?; + let thought: &str = inner_world.thought()?; + + Ok(format!("{} is about {}", hello_world, thought)) +} ``` -### High level design +Fields absent from the binary data return `Err(roto::RotoError::FieldNotFound)`. -The runtime library offers an iterator over the fields in a message, using the protobuf wire format provide -objects of flag and type. Codegen creates a 'wrapper' that iterates over the message, and records the -byte offset of each element. Helper methods in the wrapper give the user access to the name fields, -casted to the appropriate data type. +### Writing -### Literature +Nested messages must be serialized into a scratch buffer first, then embedded as raw bytes in the +outer builder. + +```rust +fn build_proto(buf: &mut [u8]) -> roto::Result<&[u8]> { + // Serialize the inner message first + let mut inner_buf = [0u8; 256]; + let inner_bytes = hello::InnerWorldBuilder::builder(&mut inner_buf) + .thought("some thought")? + .finish()?; + + // Build the outer message, embedding the serialized inner bytes + HelloBuilder::builder(buf) + .hello_world("some world")? + .inner_world(inner_bytes)? + .finish() // returns Result<&'b mut [u8]> — the written portion of buf +} +``` + +Builder methods consume `self` and return `Result`, enabling `?`-based chaining. +`finish()` returns `Result<&'b mut [u8]>` — a slice of the portion of the buffer that was written. + +### Repeated fields + +Repeated fields return a `RepeatedFieldIterator<'a>`. Each item yields `Result<(&[u8], WireType)>`. + +```rust +let hello = Hello::new(data)?; +for item in hello.tags() { + let (value_bytes, _wire_type) = item?; + // decode value_bytes according to the expected wire type +} +``` + +## Runtime API + +The core runtime in `src/lib.rs` provides: + +- `ProtoAccessor<'a>` — scans a message's fields and reads values at recorded offsets. +- `ProtoBuilder<'a>` — writes fields into a provided `&mut [u8]` buffer. +- `FieldIterator<'a>` / `RepeatedFieldIterator<'a>` — iterators over fields and repeated fields. +- `Tag`, `WireType` — protobuf encoding primitives. +- `read_varint`, `write_varint`, `skip_value` — low-level wire-format helpers. +- `RotoError`, `Result` — error type and alias. + +## High-level design + +On construction (`MessageName::new(data)`), the generated reader struct iterates the binary once +using `FieldIterator` and records the byte offset of each field's tag. Subsequent field accesses +call `ProtoAccessor::get_value_at(offset)` — no re-scanning. For repeated fields, the start and +end offsets of the field range are recorded to bound iteration efficiently. + +## Literature https://protobuf.dev/programming-guides/encoding/