From 20220401431af94d10883592520af737ba151a9f Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Fri, 1 Apr 2022 15:30:27 +1100 Subject: [PATCH 1/1] U+ 1.0.0 --- .gitignore | 2 ++ CHANGELOG.md | 18 ++++++++++ COPYING | 24 +++++++++++++ Cargo.toml | 10 ++++++ KNOWN_ISSUES.md | 35 +++++++++++++++++++ README.md | 37 ++++++++++++++++++++ src/lib.rs | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 216 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 COPYING create mode 100644 Cargo.toml create mode 100644 KNOWN_ISSUES.md create mode 100644 README.md create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..73e25ee --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ +# Changelog + +## 1.0.0 (2022-04-01) + +Like a breath of fresh air wafting down from above, +Bringing enlightenment to the masses, +This is the initial release, +Perfect in every way. + +(See also KNOWN_ISSUES.md.) + +# Afterword + +Just because something is a joke doesn’t mean it shouldn’t get a changelog. +Think of the poor code’s feelings! +“Woe is me,” it might otherwise say: +“for I was created by one so cruel that he set me +adrift in this world not knowing my heritage!” diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..b34fc67 --- /dev/null +++ b/COPYING @@ -0,0 +1,24 @@ +© 2022 Chris Morgan (do not hold it against me) + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the “Software”), to appreciate +the author’s refined sense of humour, for up to but not +exceeding five (5) minutes’ duration. + +Any subsequent merriment may only be had upon expressing +this amusement to the author in word or currency. + +All other rights are reserved. You are not given leave to +actually use whatever this is. Look, I’d sell you a license +to use it for a thousand Australian dollars, but I must +confess that this isn’t my best code. You could hire me to +do better if you like: https://chrismorgan.info/hire-me/ + +THE SOFTWARE IS WARRANTED UNMERCHANTABLE, AND UNFIT FOR +ANY PURPOSE SAVE HUMOUR, AND EVEN THAT MAY BE DEBATED. +IF YOU USE THIS SOFTWARE, YOU DESERVE WHAT YOU GET; +I MAY BE LIABLE TO LAUGH IF I HEAR, BUT THAT IS THE +FULL EXTENT OF MY LIABILITY. DAMAGES ARE ON YOU. + +(Apologies to the MIT license, whence I lifted the most.) diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ba8ce45 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "u-plus" +version = "1.0.0" +authors = ["Chris Morgan "] +edition = "2021" +description = "Pretty Unicode code point literals: U+12345 instead of '\\u{12345}'" +repository = "https://git.chrismorgan.info/u-plus" +keywords = ["joke"] +categories = ["rust-patterns", "no-std"] +license-file = "COPYING" diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md new file mode 100644 index 0000000..5317615 --- /dev/null +++ b/KNOWN_ISSUES.md @@ -0,0 +1,35 @@ +Known issues that probably won’t ever be fixed (and hopefully *can’t* ever be fixed): + +- Unicode scalar values whose hexadecimal representations contain a non-decimal + character (such as U+1F622) cannot use this representation (except as noted below). + + (I thought I might have to mention that surrogates like U+DEAD can’t be + represented because of using `char`, but this rule already covers surrogates, + as they all start with D and so already couldn’t be expressed. + I’m sticking with calling this code point literals rather than + scalar value literals, and you probably can’t stop me.) + +- Sequences ending in F32 or F64 (e.g. U+0F32) but containing no other + non-decimal characters work, but the F has to be lowercase (e.g. `U+0f32`). + +- If you write the number after `U+` in a format other than decimal, + you will be punished with incorrect answers (other than for zero). + I mean, really, what did you *expect*, writing a monstrosity like `U+0x1F92E`? + +Known issues that could conceivably become fixable at some future point (but probably not): + +- rustfmt will uglify the code, turning `U+1234` into `U + 1234` and possibly + even inserting a line break. Tch. Such poor taste. + +Known issues that will probably become fixable in the future: + +- Depends on nightly rustc for various const stuff. + (You do want your Unicode literals checked at compile time, right?) + +- Doesn’t explain the error very well. + +Other limitations: + +- This only works in stead of `char` literals; for string literals, + you *could* use things like the `const_format` crate if you really want to, + like `concatcp!("U+1234 = ", U+1234)`. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7b48d32 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# U+: pretty Unicode code point literals + +I was reading +for some reason, and read `let U = 0; U = U + 1;`. + +Suddenly my mind was awhirl with a Concept. I implemented it at once. + +## The problem + +Unicode expresses its code points in syntax like U+1234 (full range U+0000–U+10FFFF). + +But then when you want to transfer it to a programming language, +you have to learn another syntax. Will it be `\u1234`, `\u{1234}`, +`\x1E\x88\xB4`, `\341\210\264`, something else? + +And then astral plane characters make it even worse: +`\U0001F631`, `\u{1F631}`, `\xF0\x9F\x98\xB1`, `\uD83D\uDE31` +(with all the associated pain the abomination UTF-16 entails, +especially that your char type may simply not be able to represent this), +something else? + +And so here is this crate that lets you use the True Unicode Syntax: + +```rust +use u_plus::U; + +assert_eq!(U+1234, '\u{1234}'); +``` + +So forget about `\u{…}` syntax! + +(Caution: there are some limitations with this approach, see KNOWN_ISSUES.md for details.) + +> “All things are lawful for me,” but not all things are profitable. +> “All things are lawful for me,” but not all things build up. +> +> — Paul the Apostle, *1 Corinthians 10:23* diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e853663 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,90 @@ +// 99 characters, I guess I can’t add any more feature flags. Just as well I have all those I need. +#![feature(const_trait_impl, const_char_convert, const_option, const_fn_floating_point_arithmetic)] +#![no_std] +// What do you want *docs* for!? (But I will begrudgingly allow doctests to run.) +#![cfg(any(not(doc), doctest))] + +use core::ops::Add; + +pub struct U; + +impl const Add for U { + type Output = char; + + fn add(self, n: u32) -> char { + char::from_u32( + n % 10 + + (n / 10) % 10 * 0x10 + + (n / 100) % 10 * 0x100 + + (n / 1000) % 10 * 0x1000 + + (n / 10000) % 10 * 0x10000 + + (n / 100000) % 10 * 0x100000 + ).expect("I expected better of you. Depart, and reflect upon your transgressions.") + } +} + +impl const Add for U { + type Output = char; + + fn add(self, f: f32) -> char { + let n = f as u32; + if f != (n as f32) { + panic!("Begone, fractionaliser of U+ literals!"); + } + char::from_u32( + 0xf32 + + n % 10 * 0x1000 + + (n / 10) % 10 * 0x10000 + + (n / 100) % 10 * 0x100000 + ).expect("Will you never be better?") + } +} + +impl const Add for U { + type Output = char; + + fn add(self, f: f64) -> char { + let n = f as u32; + if f != (n as f64) { + panic!("Get ye hence, fracticious one!"); + } + char::from_u32( + 0xf64 + + n % 10 * 0x1000 + + (n / 10) % 10 * 0x10000 + + (n / 100) % 10 * 0x100000 + ).expect("I am exceedingly wrothful to youwards.") + } +} + +#[test] +fn success() { + assert_eq!(U+0000, '\u{0}'); + assert_eq!(U+1234, '\u{1234}'); + assert_eq!(U+2f64, '\u{2f64}'); + assert_eq!(U+102f64, '\u{102f64}'); + assert_eq!(U+104f32, '\u{104f32}'); + // Evaluated at const time! + const MAX: char = U+109999; + assert_eq!(MAX, '\u{109999}'); +} + +/// ```rust +/// #![feature(const_trait_impl)] // Not sure quite why this is needed, but meh. +/// use u_plus::U; +/// const HAPPINESS: char = U+2323; +/// ``` +/// +/// ```rust,compile_fail +/// #![feature(const_trait_impl)] +/// use u_plus::U; +/// const MISERY: char = U+119999; +/// ``` +/// +/// ```rust,compile_fail +/// #![feature(const_trait_impl)] +/// use u_plus::U; +/// const SORROW: char = U+1F622; +/// ``` +#[cfg(doctest)] +const _: () = (); -- 2.47.1