From a02907505ed1b65bb08d6a77c3b9676ab6c07d02 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Thu, 6 Jan 2022 18:38:02 +1100 Subject: [PATCH 1/1] human-string-filler version 1.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit As you may imagine, there was various experimental work before this, and in fact I changed syntaxes a few times (e.g. escaping with backslashes), but I decided to throw away that history when publishing it. And then I didn’t publish it for another year or so, and came back and realised there was more I wanted to add, and so moved from String to to get no-alloc support, and toned down my colon recommendation and added split_propertied and blah blah blah and bumped what had been going to be 0.1.0 to 1.0.0 because why not anyway. Then I read the commit message I had made which had the subsequent paragraphs in full, and snorted as I contemplated the fact that in the last few days I’ve published two other crates with the BlueOak-1.0.0 option, when clearly I had intended this to be my first. Eh, c’est la vie, as I so often seem to end up saying. Well, on with the rest: At some point someone may ask why no license text is included for MIT (which requires it) or Apache-2.0 (which requires at least a notice of similar length to the full MIT license, even if not the full text of the license; concerning putting such a header in every source file, lawyers tend to say “do it”, but I don’t believe the license actually requires that). This is a deliberate decision on my part; BlueOak-1.0.0 doesn’t require it, deeming a link sufficient in this era, where it wasn’t when MIT and Apache-2.0 were made. So I’m applying common sense and clear intent, both of which have stronger value than legalese. The whole thing is a gentleman’s agreement anyway; my conscience would not allow me to take legal action on any violations (see 1 Corinthians 6 for the general theme of reasoning, even though it’s not the same situation and thus not directly applicable). As for the inclusion of BlueOak-1.0.0 as an alternative to MIT and Apache-2.0? I prefer its comparative simplicity and completeness. (Fun aside: when you can choose between two licenses, we call it dual-licensing. When you can choose between three licenses, the equivalent term would be trial-licensing. That term is… inapt.) --- .gitignore | 2 + COPYRIGHT | 17 + Cargo.toml | 20 + README.md | 170 +++++++++ design-discussion.md | 137 +++++++ src/lib.rs | 853 +++++++++++++++++++++++++++++++++++++++++++ test | 13 + 7 files changed, 1212 insertions(+) create mode 100644 .gitignore create mode 100644 COPYRIGHT create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 design-discussion.md create mode 100644 src/lib.rs create mode 100755 test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e9e2199 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target/ +/Cargo.lock diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..2cced42 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,17 @@ +Copyright © 2022 Chris Morgan + +This project is distributed under the terms of three different licenses, +at your choice: + +- Blue Oak Model License 1.0.0: https://blueoakcouncil.org/license/1.0.0 +- MIT License: https://opensource.org/licenses/MIT +- Apache License, Version 2.0: https://www.apache.org/licenses/LICENSE-2.0 + +If you do not have particular cause to select the MIT or the Apache-2.0 +license, Chris Morgan recommends that you select BlueOak-1.0.0, which is +better and simpler than both MIT and Apache-2.0, which are only offered +due to their greater recognition and their conventional use in the Rust +ecosystem. (BlueOak-1.0.0 was only published in March 2019.) + +When using this code, ensure you comply with the terms of at least one of +these licenses. diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..038f115 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "human-string-filler" +description = "A tiny template language for human-friendly string substitutions" +authors = ["Chris Morgan "] +license = "BlueOak-1.0.0 OR MIT OR Apache-2.0" +version = "1.0.0" +edition = "2021" +keywords = ["template", "format"] +categories = ["no-std", "template-engine", "text-processing"] +repository = "https://gitlab.com/chris-morgan/human-string-filler" +exclude = ["design-discussion.md"] + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[features] +default = ["std"] +std = ["alloc"] +alloc = [] diff --git a/README.md b/README.md new file mode 100644 index 0000000..7dff6a0 --- /dev/null +++ b/README.md @@ -0,0 +1,170 @@ +# human-string-filler + +A tiny template language for human-friendly string substitutions. + +This crate is intended for situations where you need the user to be able to write simple +templated strings, and conveniently evaluate them. It’s deliberately simple so that there are +no surprises in its performance or functionality, and so that it’s not accidentally tied to +Rust (e.g. you can readily implement it in a JavaScript-powered web app), which would happen +if things like number formatting specifiers were included out of the box—instead, if you want +that sort of thing, you’ll have to implement it yourself (don’t worry, it won’t be hard). + +No logic is provided in this template language, only simple string formatting: `{…}` template +regions get replaced in whatever way you decide, curly braces get escaped by doubling them +(`{{` and `}}`), and *that’s it*. + +## Sample usage + +The **lowest-level** handling looks like this: + +```rust +use human_string_filler::{StrExt, SimpleFillerError}; + +let mut output = String::new(); +"Hello, {name}!".fill_into(&mut output, |output: &mut String, key: &str| { + match key { + "name" => output.push_str("world"), + _ => return Err(SimpleFillerError::NoSuchKey), + } + Ok(()) +}).unwrap(); + +assert_eq!(output, "Hello, world!"); +``` + +`template.fill_into(output, filler)` (provided by `StrExt`) can also be spelled +`fill(template, filler, output)` if you prefer a function to a method +(I reckon the method syntax is clearer, but opinions will differ so I provided both). + +The filler function appends to the string directly for efficiency in case of computed values, +and returns `Result<(), E>`; any error will become `Err(Error::BadReplacement { error, .. })` +on the fill call. (In this example I’ve used `SimpleFillerError::NoSuchKey`, but `()` would +work almost as well, or you can write your own error type altogether.) + +This example showed a closure that took `&mut String` and used `.push_str(…)`, but this crate +is not tied to `String` in any way: for greater generality you would use a function generic +over a type that implements `std::fmt::Write`, and use `.write_str(…)?` inside (`?` works there +because `SimpleFillerError` implements `From`). + +At a **higher level**, you can use a string-string map as a filler, and you can also fill +directly to a `String` with `.fill_to_string()` (also available as a standalone function +`fill_to_string`): + +```rust +use std::collections::HashMap; +use human_string_filler::StrExt; + +let mut map = HashMap::new(); +map.insert("name", "world"); + +let s = "Hello, {name}!".fill_to_string(&map); + +assert_eq!(s.unwrap(), "Hello, world!"); +``` + +Or you can implement the `Filler` trait for some other type of your own if you like. + +## Cargo features + +- **std** (enabled by default): remove for `#![no_std]` operation. Implies *alloc*. + - Implementation of `std::error::Error` for `Error`; + - Implementation of `Filler` for `&HashMap`. + +- **alloc** (enabled by default via *std*): + - Implementation of `Filler` for `&BTreeMap`. + - `fill_to_string` and `StrExt::fill_to_string`. + +## The template language + +This is the grammar of the template language in [ABNF](https://tools.ietf.org/html/rfc5234): + +```abnf +unescaped-normal-char = %x00-7A / %x7C / %x7E-D7FF / %xE000-10FFFF + ; any Unicode scalar value except for "{" and "}" + +normal-char = unescaped-normal-char / "{{" / "}}" + +template-region = "{" *unescaped-normal-char "}" + +template-string = *( normal-char / template-region ) +``` + +This regular expression will validate a template string: + +```text +^([^{}]|\{\{|\}\}|\{[^{}]*\})*$ +``` + +Sample legal template strings: + +- The empty string +- `Hello, {name}!`: one template region with key "name". +- `Today is {date:short}`: one template region with key "date:short". (Although there’s no + format specification like with the `format!()` macro, a colon convention is one reasonable + option—see the next section.) +- `Hello, {}!`: one template region with an empty key, not recommended but allowed. +- `Escaped {{ braces {and replacements} for {fun}!`: string "Escaped { braces ", followed by a + template region with key "and replacements", followed by string " for ", followed by a + template region with key "fun", followed by string "!". + +Sample illegal template strings: + +- `hello, {world}foo}`: opening and closing curlies must match; any others (specifically, the + last character of the string) must be escaped by doubling. +- `{{thing}`: the `{{` is an escaped opening curly, so the `}` is unmatched. +- `{thi{{n}}g}`: no curlies of any form inside template region keys. (It’s possible that a + future version may make it possible to escape curlies inside template regions, if it proves + to be useful in something like format specifiers; but not at this time.) + +## Conventions on key semantics + +The key is an arbitrary string (except that it can’t contain `{` or `}`) with explicitly no +defined semantics, but here are some suggestions, including helper functions: + +1. If it makes sense to have a format specifier (e.g. to specify a date format to use, or + whether to pad numbers with leading zeroes, *&c.*), split once on a character like `:`. + To do this most conveniently, a function `split_on` is provided. + +2. For more advanced formatting where you have multiple properties you could wish to set, + `split_propertied` offers some sound and similarly simple semantics for such strings as + `{key prop1 prop2=val2}` and `{key:prop1,prop2=val2}`. + +3. If it makes sense to have nested property access, split on `.` with the `key.split('.')` + iterator. (If you’re using `split_on` or `split_propertied` as mentioned above, you + probably want to apply them first to separate out the key part.) + +4. Only use [UAX #31 identifiers](https://www.unicode.org/reports/tr31/) for the key + (or keys, if supporting nested property access). Most of the time, empty strings and + numbers are probably not a good idea. + +With these suggestions, you might end up with the key `foo.bar:baz` being interpreted as +retrieving the “bar” property from the “foo” object, and formatting it according to “baz”; or +`aleph.beth.gimmel|alpha beta=5` as retrieving “gimmel” from “beth” of “aleph”, and formatting +it with properties “alpha” set to true and “beta” set to 5. What those things actually *mean* +is up to you to decide. *I* certainly haven’t a clue. + +## Author + +[Chris Morgan](https://chrismorgan.info/) +([chris-morgan](https://gitlab.com/chris-morgan)) +is the author and maintainer of human-string-filler. + +## License + +Copyright © 2022 Chris Morgan + +This project is distributed under the terms of three different licenses, +at your choice: + +- Blue Oak Model License 1.0.0: https://blueoakcouncil.org/license/1.0.0 +- MIT License: https://opensource.org/licenses/MIT +- Apache License, Version 2.0: https://www.apache.org/licenses/LICENSE-2.0 + +If you do not have particular cause to select the MIT or the Apache-2.0 +license, Chris Morgan recommends that you select BlueOak-1.0.0, which is +better and simpler than both MIT and Apache-2.0, which are only offered +due to their greater recognition and their conventional use in the Rust +ecosystem. (BlueOak-1.0.0 was only published in March 2019.) + +When using this code, ensure you comply with the terms of at least one of +these licenses. diff --git a/design-discussion.md b/design-discussion.md new file mode 100644 index 0000000..71e33a2 --- /dev/null +++ b/design-discussion.md @@ -0,0 +1,137 @@ +# A discussion of parts of the design and trade-offs in this library + +## Terminology and semantics + +- `Hello, {thing}!` is a template string. +- In that, `{thing}` is a template region. +- In that, `thing` is a key. +- `{{` is an escaped opening curly brace. +- `}}` is an escaped closing curly brace. + +Beyond that, these are *suggestions* for template region semantics: + +- In the `{post.date:short}` template region: + - `post.date` is a path (the `date` field within the `post` object); + - `short` is a format specifier, which we might here imagine to mean “use + localised date formatting, short form”. + +- In the `{post.date time hour=24}` and `{post.date:time,hour=24}` + template regions (I’m not suggesting particular characters): + - `post.date` is a path (the `date` field within the `post` object); + - `time` is a boolean property, and `hour` a property with value `24`, + which we might here imagine to mean “show the time, in 24-hour mode”. + +## Explanation of escaping + +It is necessary that literal `{` and `}` be able to be expressed, even if +they’re rare; there are three common techniques for achieving this: + +1. **Reserve certain template region keys to signify the delimiters,** e.g. + `{open_curly}` and `{close_curly}` to expand to `{` and `}`. Downsides: may + clash with real template region keys when arbitrary user keys are supported; + and it’s generally less memorable. +2. **Double the delimiter:** `{{` expands to `{` and `}}` to `}`. Seen in + string delimiters in a few languages, so `"foo""bar"` means “foo"bar”. + Downside: makes including the delimiters inside the key a difficult + proposition (see below). +3. **Escape by prefixing with another selected character:** select an escape + character which is also rare, most commonly a backslash due to its + convention in programming languages. Problems with using backslash: if you + need to represent Windows paths, literal backslashes aren’t so rare after + all, and you need to escape them all; and if you need to encode the template + string in a string literal in another language that uses the same escape + character (e.g. a TOML config file), all backslashes must now be escaped + again, e.g. string literals `"C:\\\\dir"` or `"\\{ ← literal curly"`. + +(A fourth technique that can be employed is delimiting the replacement regions +with values that will not appear in the desired text, as with the ␜, ␝, ␞ and ␟ +separator control characters in ASCII—and that more or less failed because it +was roughly machine-writable only, and so file types like CSV prospered due to +being human-writable, despite the acconpanying escaping woes. This language +requires that the delimiters be easy for the user to type, so this fourth +technique is deemed impossible—all values that are easy to type may occur in +regular text.) + +One interesting consideration is whether you can express the delimiters inside +template regions. For simplicity, I’ve currently banned curlies inside template +regions, but there are things to consider if it becomes desirable to express it +later (e.g. in a format specifier). Here’s how each of the three techniques +previously mentioned handles it: + +1. Solvable in two ways, both of which require that curlies be paired inside + template regions: + (a) Replace only `{open_curly}` and `{close_curly}` literally inside + template regions; or + (b) Perform recursive template resolution! + +2. Well, should `{foo}}}` mean “template region with key ‘foo’ followed by + literal ‘}’”, or “template region with key ‘foo}’”? If you wish both to be + expressible, you need to provide some means of disambiguation. The most + straightforward is to ban a certain character at the start and end of + template regions, most probably HORIZONTAL SPACE (that is: declare “no + leading or trailing whitespace”), and then repurpose that to behave + essentially the same as `#` on Rust’s raw string literals, where you can + write `r"…"`, `r#"…"`, `r##"…"##`, *&c.* With this, `{foo}}}` would mean + “template region with key ‘foo’ followed by literal ‘}’”, and `{ foo} }` + would mean “template region with key ‘foo}’”. (Note how escaping curlies + inside template regions is not required. Decide for yourself whether that’s + good or bad.) + +3. Allow escapes inside the template region, e.g. `\{{\{\}}\}` would mean + “literal ‘{’ followed by template region with key ‘{}’ followed by literal + ‘}’”. Straightforward. + +This then leads to three possibilities: +(a) full recursive template rendering; +(b) arbitrary keys, or *almost* arbitrary if leading and trailing whitespace is + disallowed or ignored (depending on the approach); or +(c) no curlies in keys please. + +I initially implemented the third technique (escape with backslash, and allow +escapes in template regions), but the downsides of using backslashes became too +severe for my liking, and I didn’t like any alternative characters on the +keyboard as much as just doubling the delimiter, so I went with the second +technique in the end. I don’t expect its downside to bite me, but we’ll see. + +## Other considered syntaxes + +I settled on using `{…}` for template regions. I also contemplated `$…`, but +ruled it for the combination of these reasons: + +- it requires embedding UAX #31 identifier parsing which is heavier and less + *definitely obvious* for real users than I’d like; + +- for things like nested property access and format specifiers you’d need a + variant with a closing delimiter anyway, such as `${…}`, so now you’d have + *two* formats; + +- all this makes it harder to explain all the features of the language to + users. + +So `$…` was eliminated as too complex and insufficient. + +So it became more a toss-up between `$…$`, `%…%`, `${…}` and `{…}`. + +- `$…$` was eliminated because it’s not using any paired delimiting characters, + and because more programmery types would be more likely to expect `$…` than + `$…$`. + +- `%…%` has precedent in Windows’ environment variables, but again the lack of + *paired* delimiting characters makes it not so great. Also it just feels + uglier in the standard sort of case I have in mind. I’m picturing something + like `%ALBUM%/%TRACK% - %TITLE%.mp3`. (It doesn’t *have* to be uppercase, but + I find that using percent makes me think uppercase.) + +- Given my intended purpose (*small* strings rather than big multi-line + strings), the `$` in `${…}` didn’t feel necessary: consider for example + `{album}/{track} - {title}.mp3`, which feels about right, compared with + `${album}/${track} - ${title}.mp3`, which *works*, but feels… I dunno, + programmery rather than usery. + +So `{…}` won. + +Taken with the `{{` and `}}` escaping, this makes it match Rust’s format +strings; this is purely coincidental. + +I’m pretty sure I considered `[…]` at some point too, but decided I didn’t like +it as much as `{…}`. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6d98850 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,853 @@ +#![cfg_attr(not(feature = "std"), no_std)] +#![deny(missing_docs)] +#![cfg_attr(docsrs, feature(doc_cfg))] +//! A tiny template language for human-friendly string substitutions. +//! +//! This crate is intended for situations where you need the user to be able to write simple +//! templated strings, and conveniently evaluate them. It’s deliberately simple so that there are +//! no surprises in its performance or functionality, and so that it’s not accidentally tied to +//! Rust (e.g. you can readily implement it in a JavaScript-powered web app), which would happen +//! if things like number formatting specifiers were included out of the box—instead, if you want +//! that sort of thing, you’ll have to implement it yourself (don’t worry, it won’t be hard). +//! +//! No logic is provided in this template language, only simple string formatting: `{…}` template +//! regions get replaced in whatever way you decide, curly braces get escaped by doubling them +//! (`{{` and `}}`), and *that’s it*. +//! +//! ## Sample usage +//! +//! The **lowest-level** handling looks like this: +//! +//! ```rust +//! use human_string_filler::{StrExt, SimpleFillerError}; +//! +//! let mut output = String::new(); +//! "Hello, {name}!".fill_into(&mut output, |output: &mut String, key: &str| { +//! match key { +//! "name" => output.push_str("world"), +//! _ => return Err(SimpleFillerError::NoSuchKey), +//! } +//! Ok(()) +//! }).unwrap(); +//! +//! assert_eq!(output, "Hello, world!"); +//! ``` +//! +//! `template.fill_into(output, filler)` (provided by `StrExt`) can also be spelled +//! `fill(template, filler, output)` if you prefer a function to a method +//! (I reckon the method syntax is clearer, but opinions will differ so I provided both). +//! +//! The filler function appends to the string directly for efficiency in case of computed values, +//! and returns `Result<(), E>`; any error will become `Err(Error::BadReplacement { error, .. })` +//! on the fill call. (In this example I’ve used `SimpleFillerError::NoSuchKey`, but `()` would +//! work almost as well, or you can write your own error type altogether.) +//! +//! This example showed a closure that took `&mut String` and used `.push_str(…)`, but this crate +//! is not tied to `String` in any way: for greater generality you would use a function generic +//! over a type that implements `std::fmt::Write`, and use `.write_str(…)?` inside (`?` works there +//! because `SimpleFillerError` implements `From`). +//! +//! At a **higher level**, you can use a string-string map as a filler, and you can also fill +//! directly to a `String` with `.fill_to_string()` (also available as a standalone function +//! `fill_to_string`): +//! +//! ```rust +//! # #[cfg(feature = "std")] { +//! use std::collections::HashMap; +//! use human_string_filler::StrExt; +//! +//! let mut map = HashMap::new(); +//! map.insert("name", "world"); +//! +//! let s = "Hello, {name}!".fill_to_string(&map); +//! +//! assert_eq!(s.unwrap(), "Hello, world!"); +//! # } +//! ``` +//! +//! Or you can implement the [`Filler`] trait for some other type of your own if you like. +//! +//! ## Cargo features +//! +#![cfg_attr( + feature = "std", + doc = " \ + - **std** (enabled by default, enabled in this build): remove for `#![no_std]` operation. \ + Implies *alloc*.\ +" +)] +#![cfg_attr( + not(feature = "std"), + doc = " \ + - **std** (enabled by default, *disabled* in this build): remove for `#![no_std]` operation. \ + Implies *alloc*.\ +" +)] +//! - Implementation of `std::error::Error` for `Error`; +//! - Implementation of `Filler` for `&HashMap`. +//! +#![cfg_attr( + feature = "alloc", + doc = " \ + - **alloc** (enabled by default via *std*, enabled in this build):\ +" +)] +#![cfg_attr( + not(feature = "alloc"), + doc = " \ + - **alloc** (enabled by default via *std*, disabled in this build):\ +" +)] +//! - Implementation of `Filler` for `&BTreeMap`. +//! - `fill_to_string` and `StrExt::fill_to_string`. +//! +//! ## The template language +//! +//! This is the grammar of the template language in [ABNF](https://tools.ietf.org/html/rfc5234): +//! +//! ```abnf +//! unescaped-normal-char = %x00-7A / %x7C / %x7E-D7FF / %xE000-10FFFF +//! ; any Unicode scalar value except for "{" and "}" +//! +//! normal-char = unescaped-normal-char / "{{" / "}}" +//! +//! template-region = "{" *unescaped-normal-char "}" +//! +//! template-string = *( normal-char / template-region ) +//! ``` +//! +//! This regular expression will validate a template string: +//! +//! ```text +//! ^([^{}]|\{\{|\}\}|\{[^{}]*\})*$ +//! ``` +//! +//! Sample legal template strings: +//! +//! - The empty string +//! - `Hello, {name}!`: one template region with key "name". +//! - `Today is {date:short}`: one template region with key "date:short". (Although there’s no +//! format specification like with the `format!()` macro, a colon convention is one reasonable +//! option—see the next section.) +//! - `Hello, {}!`: one template region with an empty key, not recommended but allowed. +//! - `Escaped {{ braces {and replacements} for {fun}!`: string "Escaped { braces ", followed by a +//! template region with key "and replacements", followed by string " for ", followed by a +//! template region with key "fun", followed by string "!". +//! +//! Sample illegal template strings: +//! +//! - `hello, {world}foo}`: opening and closing curlies must match; any others (specifically, the +//! last character of the string) must be escaped by doubling. +//! - `{{thing}`: the `{{` is an escaped opening curly, so the `}` is unmatched. +//! - `{thi{{n}}g}`: no curlies of any form inside template region keys. (It’s possible that a +//! future version may make it possible to escape curlies inside template regions, if it proves +//! to be useful in something like format specifiers; but not at this time.) +//! +//! ## Conventions on key semantics +//! +//! The key is an arbitrary string (except that it can’t contain `{` or `}`) with explicitly no +//! defined semantics, but here are some suggestions, including helper functions: +//! +//! 1. If it makes sense to have a format specifier (e.g. to specify a date format to use, or +//! whether to pad numbers with leading zeroes, *&c.*), split once on a character like `:`. +//! To do this most conveniently, a function [`split_on`] is provided. +//! +//! 2. For more advanced formatting where you have multiple properties you could wish to set, +//! [`split_propertied`] offers some sound and similarly simple semantics for such strings as +//! `{key prop1 prop2=val2}` and `{key:prop1,prop2=val2}`. +//! +//! 3. If it makes sense to have nested property access, split on `.` with the `key.split('.')` +//! iterator. (If you’re using `split_on` or `split_propertied` as mentioned above, you +//! probably want to apply them first to separate out the key part.) +//! +//! 4. Only use [UAX #31 identifiers](https://www.unicode.org/reports/tr31/) for the key +//! (or keys, if supporting nested property access). Most of the time, empty strings and +//! numbers are probably not a good idea. +//! +//! With these suggestions, you might end up with the key `foo.bar:baz` being interpreted as +//! retrieving the “bar” property from the “foo” object, and formatting it according to “baz”; or +//! `aleph.beth.gimmel|alpha beta=5` as retrieving “gimmel” from “beth” of “aleph”, and formatting +//! it with properties “alpha” set to true and “beta” set to 5. What those things actually *mean* +//! is up to you to decide. *I* certainly haven’t a clue. + +use core::fmt; +use core::iter::FusedIterator; +use core::ops::Range; + +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(feature = "alloc")] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use alloc::collections::BTreeMap; +#[cfg(feature = "alloc")] +use core::borrow::Borrow; +#[cfg(feature = "std")] +use std::collections::HashMap; +#[cfg(feature = "std")] +use std::hash::Hash; + +/// Any error that occurs when filling a template string. +/// +/// Template parsing and filling is all done in a single pass; so a failed replacement due to an +/// unknown key will shadow a syntax error later in the string. +#[derive(Debug, PartialEq, Eq)] +pub enum Error<'a, E> { + /// A template region was not closed. + /// That is, an opening curly brace (`{`) with no matching closing curly brace (`}`). + /// + /// Example: + /// + /// ```rust + /// # #[cfg(feature = "alloc")] { + /// # use human_string_filler::{StrExt, Error}; + /// # assert_eq!( + /// "Hello, {thing" + /// # .fill_to_string(|_: &mut String, _: &str| Result::<(), ()>::Ok(())), + /// # Err(Error::UnclosedRegion { source: "{thing", range: 7..13 }), + /// # ); + /// # } + /// ``` + UnclosedRegion { + /// The text of the unclosed region, which will start with `{` and contain no other curly + /// braces. + source: &'a str, + /// The indexes of `source` within the template string. + range: Range, + }, + + /// An unescaped closing curly brace (`}`) was found, outside a template region. + /// + /// Examples: + /// + /// ```rust + /// # #[cfg(feature = "alloc")] { + /// # use human_string_filler::{StrExt, Error}; + /// # assert_eq!( + /// "Hello, thing}!" + /// # .fill_to_string(|_: &mut String, _: &str| Result::<(), ()>::Ok(())), + /// # Err(Error::UnexpectedClosingBrace { index: 12 }), + /// # ); + /// # assert_eq!( + /// "Hello, {name}, look at my magnificent moustache: (}-:" + /// # .fill_to_string(|_: &mut String, _: &str| Result::<(), ()>::Ok(())), + /// # Err(Error::UnexpectedClosingBrace { index: 50 }), + /// # ); + /// # assert_eq!( + /// "Hello, {name}}!" + /// # .fill_to_string(|_: &mut String, _: &str| Result::<(), ()>::Ok(())), + /// # Err(Error::UnexpectedClosingBrace { index: 13 }), + /// # ); + /// # } + /// ``` + UnexpectedClosingBrace { + /// The index of the closing brace within the template string. + index: usize, + }, + + /// An opening curly brace (`{`) was found within a template region. + /// + /// Examples: + /// + /// ```rust + /// # #[cfg(feature = "alloc")] { + /// # use human_string_filler::{StrExt, Error}; + /// # assert_eq!( + /// "Hello, {thing{{sadness}}}" + /// # .fill_to_string(|_: &mut String, _: &str| Result::<(), ()>::Ok(())), + /// # Err(Error::UnexpectedOpeningBrace { index: 13 }), + /// # ); + /// # } + /// ``` + UnexpectedOpeningBrace { + /// The index of the opening brace within the template string. + index: usize, + }, + + /// The filler returned an error for the specified key. + BadReplacement { + /// The key on which the filler failed. Curly braces not included. + key: &'a str, + /// The indexes of `key` within the template string. + range: Range, + /// The error value returned by the filler. + error: E, + }, + + /// Writing to the output failed. + WriteFailed(fmt::Error), +} + +impl<'a, E> From for Error<'a, E> { + fn from(e: fmt::Error) -> Self { + Error::WriteFailed(e) + } +} + +impl<'a, E> fmt::Display for Error<'a, E> +where + E: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::UnclosedRegion { source, .. } => { + write!(f, "Unclosed template region at \"{}\"", source) + } + + Error::UnexpectedClosingBrace { index } => { + write!(f, "Unexpected closing brace at index {}", index) + } + + Error::UnexpectedOpeningBrace { index } => { + write!( + f, + "Unexpected curly brace within template region at index {}", + index + ) + } + + Error::BadReplacement { key, error, .. } => { + write!(f, "Error in template string at \"{{{}}}\": {}", key, error) + } + + Error::WriteFailed(fmt::Error) => f.write_str("Error in writing output"), + } + } +} + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +impl<'a, E> std::error::Error for Error<'a, E> +where + E: std::error::Error + 'static, +{ + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::BadReplacement { error, .. } => Some(error), + Error::WriteFailed(error) => Some(error), + _ => None, + } + } +} + +/// Implementers of this trait have the ability to fill template strings. +/// +/// It is extremely strongly recommended that fillers only push to the output, and do not perform +/// any other modifications of it. +/// +/// I mean, if you implement `Filler`, you get a `&mut String` and it’s *possible* to do +/// other things with it, but that’s a terrible idea. I’m almost ashamed of ideas like making `{␡}` +/// pop the last character, and `{←rot13}` ROT-13-encode what precedes it in the string. +pub trait Filler +where + W: fmt::Write, +{ + /// Fill the value for the given key into the output string. + fn fill(&mut self, output: &mut W, key: &str) -> Result<(), E>; +} + +impl Filler for F +where + F: FnMut(&mut W, &str) -> Result<(), E>, + W: fmt::Write, +{ + fn fill(&mut self, output: &mut W, key: &str) -> Result<(), E> { + self(output, key) + } +} + +#[cfg_attr(not(feature = "std"), allow(rustdoc::broken_intra_doc_links))] +/// A convenient error type for fillers; you might even like to use it yourself. +/// +/// You could also use `()`, but this gives you +/// [From](core::convert::From)<[core::fmt::Error]> so that you can use +/// `write!(out, …)?`, and sane [`core::fmt::Display`] and [`std::error::Error`] implementations. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum SimpleFillerError { + /// The map didn’t contain the requested key. + NoSuchKey, + /// Some fmt::Write operation returned an error. + WriteFailed(fmt::Error), +} + +impl From for SimpleFillerError { + fn from(e: fmt::Error) -> Self { + SimpleFillerError::WriteFailed(e) + } +} + +impl fmt::Display for SimpleFillerError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SimpleFillerError::NoSuchKey => f.write_str("no such key"), + SimpleFillerError::WriteFailed(fmt::Error) => f.write_str("write failed"), + } + } +} + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +impl std::error::Error for SimpleFillerError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + SimpleFillerError::WriteFailed(error) => Some(error), + _ => None, + } + } +} + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +impl Filler for &HashMap +where + K: Borrow + Eq + Hash, + V: AsRef, + W: fmt::Write, +{ + fn fill(&mut self, output: &mut W, key: &str) -> Result<(), SimpleFillerError> { + self.get(key) + .ok_or(SimpleFillerError::NoSuchKey) + .and_then(|value| output.write_str(value.as_ref()).map_err(Into::into)) + } +} + +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +impl Filler for &BTreeMap +where + K: Borrow + Ord, + V: AsRef, + W: fmt::Write, +{ + fn fill(&mut self, output: &mut W, key: &str) -> Result<(), SimpleFillerError> { + self.get(key) + .ok_or(SimpleFillerError::NoSuchKey) + .and_then(|value| output.write_str(value.as_ref()).map_err(Into::into)) + } +} + +/// String extension methods for the template string. +/// +/// This is generally how I recommend using this library, because I find that the method receiver +/// makes code clearer: that `template.fill_into(output, filler)` is easier to understand than +/// `fill(template, filler, output)`. +pub trait StrExt { + /// Fill this template, producing a new string. + /// + /// This is a convenience method for ergonomics in the case where you aren’t fussed about + /// allocations and are using the standard `String` type. + /// + #[cfg_attr(feature = "std", doc = " Example, using a hash map:")] + #[cfg_attr( + not(feature = "std"), + doc = " Example, using a hash map (requires the *std* feature):" + )] + /// + /// ```rust + /// # #[cfg(feature = "std")] { + /// # use human_string_filler::StrExt; + /// # use std::collections::HashMap; + /// let map = [("name", "world")].into_iter().collect::>(); + /// assert_eq!( + /// "Hello, {name}!".fill_to_string(&map).unwrap(), + /// "Hello, world!", + /// ); + /// # } + /// ``` + #[cfg(feature = "alloc")] + #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] + fn fill_to_string(&self, filler: F) -> Result> + where + F: Filler, + { + let mut out = String::new(); + self.fill_into(&mut out, filler).map(|()| out) + } + + /// Fill this template string into the provided string, with the provided filler. + /// + /// Uses an existing string, which is more efficient if you want to push to an existing string + /// or can reuse a string allocation. + /// + /// Example, using a closure: + /// + /// ```rust + /// # use human_string_filler::StrExt; + /// let filler = |output: &mut String, key: &str| { + /// match key { + /// "name" => output.push_str("world"), + /// _ => return Err(()), + /// } + /// Ok(()) + /// }; + /// let mut string = String::new(); + /// assert!("Hello, {name}!".fill_into(&mut string, filler).is_ok()); + /// assert_eq!(string, "Hello, world!"); + /// ``` + fn fill_into(&self, output: &mut W, filler: F) -> Result<(), Error> + where + F: Filler, + W: fmt::Write; +} + +impl StrExt for str { + #[inline] + fn fill_into(&self, output: &mut W, filler: F) -> Result<(), Error> + where + F: Filler, + W: fmt::Write, + { + fill(self, filler, output) + } +} + +/// The lowest-level form, as a function: fill the template string, into a provided writer. +/// +/// This is the most efficient form. It splits a string by `{…}` sections, adding anything outside +/// them to the output string (with escaped curlies dedoubled) and passing template regions through +/// the filler, which handles pushing to the output string itself. +/// +/// See also [`StrExt::fill_into`] which respells `fill(template, filler, output)` as +/// `template.fill_into(output, filler)`. +pub fn fill<'a, F, W, E>( + mut template: &'a str, + mut filler: F, + output: &mut W, +) -> Result<(), Error<'a, E>> +where + F: Filler, + W: fmt::Write, +{ + let mut index = 0; + loop { + if let Some(i) = template.find(|c| c == '{' || c == '}') { + #[allow(clippy::wildcard_in_or_patterns)] + match template.as_bytes()[i] { + c @ b'}' | c @ b'{' if template.as_bytes().get(i + 1) == Some(&c) => { + output.write_str(&template[0..i + 1])?; + template = &template[i + 2..]; + index += i + 2; + } + b'}' => return Err(Error::UnexpectedClosingBrace { index: index + i }), + b'{' | _ => { + // (_ here just to lazily skip an unreachable!().) + output.write_str(&template[0..i])?; + template = &template[i..]; + index += i; + if let Some(i) = template[1..].find(|c| c == '{' || c == '}') { + match template.as_bytes()[i + 1] { + b'}' => { + if let Err(e) = filler.fill(output, &template[1..i + 1]) { + return Err(Error::BadReplacement { + key: &template[1..i + 1], + range: (index + 1)..(index + i + 1), + error: e, + }); + } + template = &template[i + 2..]; + index += i + 2; + } + // (Again, _ is unreachable.) + b'{' | _ => { + return Err(Error::UnexpectedOpeningBrace { + index: index + i + 1, + }) + } + } + } else { + return Err(Error::UnclosedRegion { + source: template, + range: index..(index + template.len()), + }); + } + } + } + } else { + output.write_str(template)?; + break; + } + } + + Ok(()) +} + +/// Fill a template, producing a new string. +/// +/// This is a convenience function for ergonomics in the case where you aren’t fussed about +/// allocations and are using the standard `String` type. +/// +/// See also [`StrExt::fill_to_string`], which respells `fill_to_string(template, filler)` as +/// `template.fill_to_string(filler)`. +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +pub fn fill_to_string(template: &str, filler: F) -> Result> +where + F: Filler, +{ + let mut out = String::new(); + fill(template, filler, &mut out).map(|()| out) +} + +/// A convenience function to split a string on a character. +/// +/// This is nicer than using `string.split(c, 2)` because it gives you the two values up-front. +/// +/// # Returns +/// +/// A two-tuple of: +/// +/// 1. What comes before the split character, or the entire string if there was none; and +/// 2. The remainder after the split character, if there was one (even if it’s empty). +/// +/// ``` +/// # use human_string_filler::split_on; +/// assert_eq!(split_on("The quick brown fox", ':'), ("The quick brown fox", None)); +/// assert_eq!(split_on("/", '/'), ("", Some(""))); +/// assert_eq!(split_on("harum = scarum", '='), ("harum ", Some(" scarum"))); +/// assert_eq!(split_on("diæresis:tréma:umlaut", ':'), ("diæresis", Some("tréma:umlaut"))); +/// ``` +pub fn split_on(string: &str, c: char) -> (&str, Option<&str>) { + match string.find(c) { + Some(i) => (&string[..i], Some(&string[i + c.len_utf8()..])), + None => (string, None), + } +} + +/// The separators to use in [`split_propertied`]. +/// +/// A couple of sets of plausible-looking values (but if you want a concrete recommendation, like +/// Gallio of old I refuse to be a judge of these things): +/// +/// - `(' ', ' ', '=')` looks like `Hello, {name first formal=false case=lower}!`. +/// - `('|', ',', ':')` looks like `Hello, {name|first,formal:false,case:lower}!`. +#[derive(Clone, Copy, Debug)] +pub struct Separators { + /// What character indicates the end of the key and the start of the properties. + pub between_key_and_properties: char, + + /// What character indicates the end of one property’s name or value and the start of the next + /// property’s name. + pub between_properties: char, + + /// What character indicates the end of a property’s name and the start of its value. + /// Remember that properties aren’t required to have values, but can be booleanyish. + // “booleanyish” sounded better than “booleanishy”. That’s my story and I’m sticking with it. + /// For that matter, if you want *all* properties to be boolean, set this to the same value as + /// `between_properties`, because `between_properties` is greedier. + pub between_property_name_and_value: char, +} + +/// A convenience function to split a key that is followed by properties. +/// +/// In keeping with this library in general, this is deliberately very simple and consequently not +/// able to express all possible values; for example, if you use space as the separator between +/// properties, you can’t use space in property values; and this doesn’t guard against empty keys +/// or property names in any way. +/// +/// ``` +/// use human_string_filler::{Separators, split_propertied}; +/// +/// let (key, properties) = split_propertied("key:prop1,prop2=value2,prop3=4+5=9", Separators { +/// between_key_and_properties: ':', +/// between_properties: ',', +/// between_property_name_and_value: '=', +/// }); +/// +/// assert_eq!(key, "key"); +/// assert_eq!(properties.collect::>(), +/// vec![("prop1", None), ("prop2", Some("value2")), ("prop3", Some("4+5=9"))]); +/// ``` +/// +/// This method consumes exactly one character for the separators; if space is your +/// between-properties separator, for example, multiple spaces will not be combined, but +/// you’ll get `("", None)` properties instead. As I say, this is deliberately simple. +pub fn split_propertied( + s: &str, + separators: Separators, +) -> ( + &str, + impl Iterator)> + + DoubleEndedIterator + + FusedIterator + + Clone + + fmt::Debug, +) { + let (key, properties) = split_on(s, separators.between_key_and_properties); + let properties = properties + .map(|properties| properties.split(separators.between_properties)) + .unwrap_or_else(|| { + // We need an iterator of the same type that will yield None, but Split yields an empty + // string first. Nice and easy: consume that, then continue on our way. + let mut dummy = "".split(' '); + dummy.next(); + dummy + }) + .map(move |word| split_on(word, separators.between_property_name_and_value)); + (key, properties) +} + +#[cfg(test)] +mod tests { + #[allow(unused_imports)] + use super::*; + + #[cfg(feature = "alloc")] + macro_rules! test { + ($name:ident, $filler:expr) => { + #[test] + fn $name() { + let filler = $filler; + + assert_eq!( + "Hello, {}!".fill_to_string(&filler).as_ref().map(|s| &**s), + Ok("Hello, (this space intentionally left blank)!"), + ); + assert_eq!( + "Hello, {name}!" + .fill_to_string(&filler) + .as_ref() + .map(|s| &**s), + Ok("Hello, world!"), + ); + assert_eq!( + "Hello, {you}!".fill_to_string(&filler), + Err(Error::BadReplacement { + key: "you", + range: 8..11, + error: SimpleFillerError::NoSuchKey, + }), + ); + assert_eq!( + "I like {keys with SPACES!? 😱}" + .fill_to_string(&filler) + .as_ref() + .map(|s| &**s), + Ok("I like identifier-only keys 👌"), + ); + } + }; + } + + #[cfg(feature = "alloc")] + test!(closure_filler, |out: &mut String, key: &str| { + use core::fmt::Write; + out.write_str(match key { + "" => "(this space intentionally left blank)", + "name" => "world", + "keys with SPACES!? 😱" => "identifier-only keys 👌", + _ => return Err(SimpleFillerError::NoSuchKey), + }) + .map_err(Into::into) + }); + + #[cfg(feature = "std")] + test!(hash_map_fillter, { + [ + ("", "(this space intentionally left blank)"), + ("name", "world"), + ("keys with SPACES!? 😱", "identifier-only keys 👌"), + ] + .into_iter() + .collect::>() + }); + + #[cfg(feature = "alloc")] + test!(btree_map_fillter, { + [ + ("", "(this space intentionally left blank)"), + ("name", "world"), + ("keys with SPACES!? 😱", "identifier-only keys 👌"), + ] + .into_iter() + .collect::>() + }); + + #[test] + #[cfg(feature = "alloc")] + fn fill_errors() { + let c = |_: &mut String, _: &str| -> Result<(), ()> { Ok(()) }; + + assert_eq!( + fill_to_string("Hello, {thing", c), + Err(Error::UnclosedRegion { + source: "{thing", + range: 7..13 + }) + ); + assert_eq!( + fill_to_string("{}/{x}/{xx}/{xxx}/{{/}}/{thing", c), + Err(Error::UnclosedRegion { + source: "{thing", + range: 24..30 + }) + ); + + assert_eq!( + fill_to_string("Hello, }thing", c), + Err(Error::UnexpectedClosingBrace { index: 7 }) + ); + assert_eq!( + fill_to_string("{}/{x}/{xx}/{xxx}/{{/}}/}thing", c), + Err(Error::UnexpectedClosingBrace { index: 24 }) + ); + + assert_eq!( + fill_to_string("Hello, {thi{{ng}", c), + Err(Error::UnexpectedOpeningBrace { index: 11 }) + ); + assert_eq!( + fill_to_string("{}/{x}/{xx}/{xxx}/{{/}}/{x{", c), + Err(Error::UnexpectedOpeningBrace { index: 26 }) + ); + + assert_eq!( + fill_to_string("Hello, {thi}}ng}", c), + Err(Error::UnexpectedClosingBrace { index: 12 }) + ); + assert_eq!( + fill_to_string("{}/{x}/{xx}/{xxx}/{{/}}/}", c), + Err(Error::UnexpectedClosingBrace { index: 24 }) + ); + } + + // This is almost enough to make me only expose a dyn fmt::Writer. + #[test] + #[cfg(feature = "alloc")] + fn do_not_do_this_at_home_kids() { + // Whatever possessed me!? + let s = "Don’t{␡}{}{^H} do this at home, {who}!".fill_to_string( + |output: &mut String, key: &str| { + match key { + "␡" | "" | "^H" => { + output.pop(); + } + "who" => { + output.push_str("kids"); + } + _ => return Err(()), + } + Ok(()) + }, + ); + assert_eq!(s.unwrap(), "Do do this at home, kids!"); + + // I haven’t yet decided whether this is better or worse than the previous one. + let s = "Don’t yell at {who}!{←make ASCII uppercase} (Please.)".fill_to_string( + |output: &mut String, key: &str| { + match key { + "←make ASCII uppercase" => { + output.make_ascii_uppercase(); + } + "who" => { + output.push_str("me"); + } + _ => return Err(()), + } + Ok(()) + }, + ); + assert_eq!(s.unwrap(), "DON’T YELL AT ME! (Please.)"); + } +} diff --git a/test b/test new file mode 100755 index 0000000..6e523e6 --- /dev/null +++ b/test @@ -0,0 +1,13 @@ +#!/bin/sh +set -e +export RUSTFLAGS="-D warnings" +export RUSTDOCFLAGS="-D warnings" +for release in "" "--release"; do + for subcommand in clippy test doc; do + cargo $subcommand $release + cargo $subcommand $release --no-default-features + cargo $subcommand $release --no-default-features --features alloc + done +done + +RUSTDOCFLAGS="-D warnings --cfg docsrs" cargo +nightly doc --all-features --no-deps -- 2.42.0