rust/examples/rejection_rates.rs - tesid blob - Chris Morgan’s Git repositories

   1 //! Estimate (with the Monte Carlo method) at what rate IDs would be rejected, using various
   2 //! bad-pattern rules.
   3
   4 fn main() {
   5     let coder = tesid::TesidCoder::new("000102030405060708090a0b0c0d0e0f").unwrap();
   6
   7     let mut i64_total = 0;
   8     let mut i64_passed = 0;
   9     let mut i64_repeat2 = 0;
  10     let mut i64_repeat3 = 0;
  11     let mut i64_letters3 = 0;
  12     let mut i64_letters4 = 0;
  13
  14     println!("┌────────┬────────┬─────────┬─────────┬──────────┬──────────┐");
  15     println!("│ Length │  Pass  │ repeat2 │ repeat3 │ 3letters │ 4letters │");
  16     println!("┝━━━━━━━━┿━━━━━━━━┿━━━━━━━━━┿━━━━━━━━━┿━━━━━━━━━━┿━━━━━━━━━━┥");
  17     for j in 1..10 {
  18         let range_low = if j == 1 { 0 } else { 1 << (j * 10) };
  19         let range_high = 1 << ((j + 1) * 10);
  20         let sample_start = if j == 1 {
  21             range_low
  22         } else {
  23             range_low // << 9
  24             // ↑ This is an opportunity to sample from a different region if you really want to,
  25             // though because of the cipher, values are uniform so that you won’t see much change,
  26             // less than 0.1 percentage points. You can go as high as `range_low << 9 - (2 << 20)`.
  27             // (j == 1, the 4-character range, is excluded as we’re testing its entire range.
  28         };
  29
  30         let sample_end = sample_start + (1 << 20);
  31
  32         let mut total = 0;
  33         let mut passed = 0;
  34         let mut repeat2 = 0;
  35         let mut repeat3 = 0;
  36         let mut letters3 = 0;
  37         let mut letters4 = 0;
  38
  39         let len = coder.encode_long(sample_start).unwrap().len();
  40         assert_eq!(coder.encode_long(sample_end - 1).unwrap().len(), len,
  41             "you messed things up so that the sampling range isn’t entirely of one length");
  42         for i in sample_start..sample_end {
  43             let s = coder.encode_long(i).unwrap();
  44             let s = s.as_bytes();
  45             let mut pass = true;
  46             if s.windows(2).any(|c| c[0] == c[1]) {
  47                 repeat2 += 1;
  48                 pass = false;
  49             }
  50             if s.windows(3).any(|c| c[0] == c[1] && c[1] == c[2]) {
  51                 repeat3 += 1;
  52                 pass = false;
  53             }
  54             if s.windows(3).any(|digits| digits.iter().all(|c| c.is_ascii_alphabetic())) {
  55                 letters3 += 1;
  56                 pass = false;
  57             }
  58             if s.windows(4).any(|digits| digits.iter().all(|c| c.is_ascii_alphabetic())) {
  59                 letters4 += 1;
  60                 pass = false;
  61             }
  62             if pass {
  63                 passed += 1;
  64             }
  65             total += 1;
  66         }
  67         println!("│   {len:2}   │ {:5.2}% │ {:5.2}%  │ {:5.2}%  │  {:5.2}%  │  {:5.2}%  │",
  68             100.0 * passed   as f64 / total as f64,
  69             100.0 * repeat2  as f64 / total as f64,  // the same character twice in a row
  70             100.0 * repeat3  as f64 / total as f64,  // the same character thrice in a row
  71             100.0 * letters3 as f64 / total as f64,  // three letters in a row
  72             100.0 * letters4 as f64 / total as f64,  // four letters in a row
  73         );
  74
  75         let i64s_in_range = if range_low > 1 << 63 {
  76             0
  77         } else {
  78             range_high.min(1 << 63) - range_low
  79         };
  80         i64_total    += i64s_in_range;
  81         i64_passed   += (i64s_in_range as f64 * passed   as f64 / total as f64) as u64;
  82         i64_repeat2  += (i64s_in_range as f64 * repeat2  as f64 / total as f64) as u64;
  83         i64_repeat3  += (i64s_in_range as f64 * repeat3  as f64 / total as f64) as u64;
  84         i64_letters3 += (i64s_in_range as f64 * letters3 as f64 / total as f64) as u64;
  85         i64_letters4 += (i64s_in_range as f64 * letters4 as f64 / total as f64) as u64;
  86     }
  87     println!("└────────┴────────┴─────────┴─────────┴──────────┴──────────┘");
  88
  89     println!("i64 (as commonly used by SQL databases) statistics, given sparsity=1, discriminant=0:");
  90     assert_eq!(i64_total, 1 << 63);  // Sanity check 🙂
  91     println!(" • Total (=2⁶³)  : {i64_total:19}");
  92     println!(" • Passed all    : {i64_passed:19} ({:5.2}%)",   100.0 * i64_passed   as f64 / i64_total as f64);
  93     println!(" • Fail repeat2  : {i64_repeat2:19} ({:5.2}%)",  100.0 * i64_repeat2  as f64 / i64_total as f64);
  94     println!(" • Fail repeat3  : {i64_repeat3:19} ({:5.2}%)",  100.0 * i64_repeat3  as f64 / i64_total as f64);
  95     println!(" • Fail letters3 : {i64_letters3:19} ({:5.2}%)", 100.0 * i64_letters3 as f64 / i64_total as f64);
  96     println!(" • Fail letters4 : {i64_letters4:19} ({:5.2}%)", 100.0 * i64_letters4 as f64 / i64_total as f64);
  97     println!("(Most of the TESIDs fall in the 14-character range. With higher sparsity, you could get into the higher echelons of long IDs and new peaks of fail rates, but hopefully you can see that even if you reject 99% of IDs, there are still rather a lot left!)");
  98 }