1 //! Specialised small-value base-32 encoding and decoding.
3 //! This implementation deals with values in the range [0, 2¹⁰⁰), which means up to 20 base-32
4 //! characters (log₃₂ 2¹⁰⁰ = 20), because that’s the upper limit I’ve chosen for TESID.
5 //! If you want a general-purpose base-32 library for longer values, the base-x crate looks good.
6 //! But we get better performance by applying an upper bound consistent with the 5peck usage.
8 // Digits and letters, minus 0, 1, l, o.
9 const ALPHABET
: [u8; 32] = *b
"23456789abcdefghijkmnpqrstuvwxyz";
11 // (I wish there was a real u7 so I could do Option<u7> instead of i8 with sentinel -1.
12 // NonZeroU8 is insufficient since I need zero. Kinda want NonMaxU8 instead.)
13 const ALPHABET_INVERSE
: [i8; 256] = [
14 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
16 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
17 -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1,
18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 -1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, -1, 19, 20, -1,
21 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1,
22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 fn check_base32_alphabet_tables_are_correct() {
34 for (i
, &n
) in ALPHABET_INVERSE
.iter().enumerate() {
36 -128..=-2 | 32..=127 => unreachable!(),
38 0..=31 => assert_eq!(i
as u8, ALPHABET
[n
as usize]),
41 for (i
, &c
) in ALPHABET
.iter().enumerate() {
42 // SAFETY proof here, that ALPHABET members are all ASCII. (Though really, if you don’t
43 // trust simple observation of ALPHABET, I dunno why you’d trust this line! 😀 But I guess
44 // it’s in the test, which means something.)
45 assert!(c
.is_ascii());
46 assert_eq!(ALPHABET_INVERSE
[c
as usize] as usize, i
);
50 /// Base-32-encode a u128 that’s below 2¹⁰⁰.
51 /// The output is ASCII right-aligned with leading zero padding.
52 /// If the input is 2¹⁰⁰ or higher, bits will be lost.
53 pub const fn encode(input
: u128
) -> [u8; 20] {
54 // Observe the unrolled loop. A regular loop is, *for a given length*, somewhere between the
55 // same speed (small values) and several times slower (large values). In this library we only
56 // need 20-character output, so unrolled is mildly preferable.
58 ALPHABET
[((input
>> 95) & 31) as usize],
59 ALPHABET
[((input
>> 90) & 31) as usize],
60 ALPHABET
[((input
>> 85) & 31) as usize],
61 ALPHABET
[((input
>> 80) & 31) as usize],
62 ALPHABET
[((input
>> 75) & 31) as usize],
63 ALPHABET
[((input
>> 70) & 31) as usize],
64 ALPHABET
[((input
>> 65) & 31) as usize],
65 ALPHABET
[((input
>> 60) & 31) as usize],
66 ALPHABET
[((input
>> 55) & 31) as usize],
67 ALPHABET
[((input
>> 50) & 31) as usize],
68 ALPHABET
[((input
>> 45) & 31) as usize],
69 ALPHABET
[((input
>> 40) & 31) as usize],
70 ALPHABET
[((input
>> 35) & 31) as usize],
71 ALPHABET
[((input
>> 30) & 31) as usize],
72 ALPHABET
[((input
>> 25) & 31) as usize],
73 ALPHABET
[((input
>> 20) & 31) as usize],
74 ALPHABET
[((input
>> 15) & 31) as usize],
75 ALPHABET
[((input
>> 10) & 31) as usize],
76 ALPHABET
[((input
>> 5) & 31) as usize],
77 ALPHABET
[((input
>> 0) & 31) as usize],
81 /// Base-32-decode a string. No sanity checking is performed on length, nor is overflow checked.
82 /// Strings below 26 characters long will succeed.
83 pub const fn decode(input
: &str) -> Result
<u128
, ()> {
84 // Unlike encode, decode is not well-adapted to loop-unrolling.
85 // My simple attempts both took three times as long.
87 // No `for b in input.as_bytes()` at this time because const fn.
88 let input
= input
.as_bytes();
90 while i
< input
.len() {
91 out
= (out
<< 5) | match ALPHABET_INVERSE
[input
[i
] as usize] {
92 -128..=-1 => return Err(()),
102 fn bench_encode_short(b
: &mut test
::Bencher
) {
103 // Encode all the numbers from zero to four characters wide (up to about a million).
104 b
.iter(|| for i
in 0..(1 << 20) {
105 encode(test
::black_box(i
));
111 fn bench_encode_long(b
: &mut test
::Bencher
) {
112 // Encode around a million (same number as the short benchmarks) 20-character values.
113 // Should take much the same time as bench_encode_short.
114 b
.iter(|| for i
in (1 << 99)..((1 << 99) + (1 << 20)) {
115 encode(test
::black_box(i
));
121 fn bench_encode_and_decode_short(b
: &mut test
::Bencher
) {
122 b
.iter(|| for i
in 0..(1 << 20) {
123 decode(unsafe { core
::str::from_utf8_unchecked(&encode(test
::black_box(i
))[16..]) }).unwrap();
129 fn bench_encode_and_decode_long(b
: &mut test
::Bencher
) {
130 b
.iter(|| for i
in (1 << 99)..((1 << 99) + (1 << 20)) {
131 decode(unsafe { core
::str::from_utf8_unchecked(&encode(test
::black_box(i
))) }).unwrap();
137 const B32_345678
: u128
= 1 * 32_u128.pow(5) +
144 #[allow(non_upper_case_globals)]
145 const B32_base32
: u128
= 9 * 32_u128.pow(5) +
147 24 * 32_u128.pow(3) +
148 12 * 32_u128.pow(2) +
152 assert_eq!(encode(0), *b
"22222222222222222222");
153 assert_eq!(decode(""), Ok(0));
154 assert_eq!(decode("2"), Ok(0));
155 assert_eq!(encode(1), *b
"22222222222222222223");
156 assert_eq!(decode("3"), Ok(1));
157 assert_eq!(encode(32), *b
"22222222222222222232");
158 assert_eq!(decode("32"), Ok(32));
159 assert_eq!(encode(B32_345678
), *b
"22222222222222345678");
160 assert_eq!(decode("345678"), Ok(B32_345678
));
161 assert_eq!(encode(B32_base32
), *b
"22222222222222base32");
162 assert_eq!(decode("base32"), Ok(B32_base32
));
163 assert_eq!(encode(u128
::MAX
), *b
"zzzzzzzzzzzzzzzzzzzz"); // truncated at 120 bits
165 // Decoding overly long values leads to truncation at 2¹²⁸.
166 assert_eq!(decode("zyxwvutsrqpnmkjihgfedcba98765432"), Ok(75421586008491043410716744415157782560));
167 // Encoding overly long values leads to truncation at 2¹²⁰.
168 assert_eq!(encode(75421586008491043410716744415157782560), *b
"mkjihgfedcba98765432");