1 //! Estimate (with the Monte Carlo method) at what rate IDs would be rejected, using various
5 let coder
= tesid
:: TesidCoder
:: new ( "000102030405060708090a0b0c0d0e0f" ). unwrap ();
8 let mut i64_passed
= 0 ;
9 let mut i64_repeat2
= 0 ;
10 let mut i64_repeat3
= 0 ;
11 let mut i64_letters3
= 0 ;
12 let mut i64_letters4
= 0 ;
14 println! ( "┌────────┬────────┬─────────┬─────────┬──────────┬──────────┐" );
15 println! ( "│ Length │ Pass │ repeat2 │ repeat3 │ 3letters │ 4letters │" );
16 println! ( "┝━━━━━━━━┿━━━━━━━━┿━━━━━━━━━┿━━━━━━━━━┿━━━━━━━━━━┿━━━━━━━━━━┥" );
18 let range_low
= if j
== 1 { 0 } else { 1 << ( j
* 10 ) };
19 let range_high
= 1 << (( j
+ 1 ) * 10 );
20 let sample_start
= if j
== 1 {
24 // ↑ This is an opportunity to sample from a different region if you really want to,
25 // though because of the cipher, values are uniform so that you won’t see much change,
26 // less than 0.1 percentage points. You can go as high as `range_low << 9 - (2 << 20)`.
27 // (j == 1, the 4-character range, is excluded as we’re testing its entire range.
30 let sample_end
= sample_start
+ ( 1 << 20 );
39 let len
= coder
. encode_long ( sample_start
). unwrap (). len ();
40 assert_eq! ( coder
. encode_long ( sample_end
- 1 ). unwrap (). len (), len
,
41 "you messed things up so that the sampling range isn’t entirely of one length" );
42 for i
in sample_start
.. sample_end
{
43 let s
= coder
. encode_long ( i
). unwrap ();
46 if s
. windows ( 2 ). any (| c
| c
[ 0 ] == c
[ 1 ]) {
50 if s
. windows ( 3 ). any (| c
| c
[ 0 ] == c
[ 1 ] && c
[ 1 ] == c
[ 2 ]) {
54 if s
. windows ( 3 ). any (| digits
| digits
. iter (). all (| c
| c
. is_ascii_alphabetic ())) {
58 if s
. windows ( 4 ). any (| digits
| digits
. iter (). all (| c
| c
. is_ascii_alphabetic ())) {
67 println! ( "│ {len:2} │ {:5.2} % │ {:5.2} % │ {:5.2} % │ {:5.2} % │ {:5.2} % │" ,
68 100.0 * passed
as f64 / total
as f64 ,
69 100.0 * repeat2
as f64 / total
as f64 , // the same character twice in a row
70 100.0 * repeat3
as f64 / total
as f64 , // the same character thrice in a row
71 100.0 * letters3
as f64 / total
as f64 , // three letters in a row
72 100.0 * letters4
as f64 / total
as f64 , // four letters in a row
75 let i64s_in_range
= if range_low
> 1 << 63 {
78 range_high
. min ( 1 << 63 ) - range_low
80 i64_total
+= i64s_in_range
;
81 i64_passed
+= ( i64s_in_range
as f64 * passed
as f64 / total
as f64 ) as u64 ;
82 i64_repeat2
+= ( i64s_in_range
as f64 * repeat2
as f64 / total
as f64 ) as u64 ;
83 i64_repeat3
+= ( i64s_in_range
as f64 * repeat3
as f64 / total
as f64 ) as u64 ;
84 i64_letters3
+= ( i64s_in_range
as f64 * letters3
as f64 / total
as f64 ) as u64 ;
85 i64_letters4
+= ( i64s_in_range
as f64 * letters4
as f64 / total
as f64 ) as u64 ;
87 println! ( "└────────┴────────┴─────────┴─────────┴──────────┴──────────┘" );
89 println! ( "i64 (as commonly used by SQL databases) statistics, given sparsity=1, discriminant=0:" );
90 assert_eq! ( i64_total
, 1 << 63 ); // Sanity check 🙂
91 println! ( " • Total (=2⁶³) : {i64_total:19} " );
92 println! ( " • Passed all : {i64_passed:19} ( {:5.2} %)" , 100.0 * i64_passed
as f64 / i64_total
as f64 );
93 println! ( " • Fail repeat2 : {i64_repeat2:19} ( {:5.2} %)" , 100.0 * i64_repeat2
as f64 / i64_total
as f64 );
94 println! ( " • Fail repeat3 : {i64_repeat3:19} ( {:5.2} %)" , 100.0 * i64_repeat3
as f64 / i64_total
as f64 );
95 println! ( " • Fail letters3 : {i64_letters3:19} ( {:5.2} %)" , 100.0 * i64_letters3
as f64 / i64_total
as f64 );
96 println! ( " • Fail letters4 : {i64_letters4:19} ( {:5.2} %)" , 100.0 * i64_letters4
as f64 / i64_total
as f64 );
97 println! ( "(Most of the TESIDs fall in the 14-character range. With higher sparsity, you could get into the higher echelons of long IDs and new peaks of fail rates, but hopefully you can see that even if you reject 99% of IDs, there are still rather a lot left!)" );