python/src/tesid/__init__.py - tesid blob - Chris Morgan’s Git repositories

   1 r"""
   2 TESID: Textualised Encrypted Sequential Identifiers
   3
   4 Example
   5 =======
   6
   7 >>> from tesid import TESIDCoder
   8 >>> secret_key = '000102030405060708090a0b0c0d0e0f'
   9 >>> coder = TESIDCoder(secret_key)
  10 >>> coder.encode(0)
  11 'w2ej'
  12 >>> coder.encode(1)
  13 'w6um'
  14 >>> coder.encode(2)
  15 'x45g'
  16 >>> coder.encode(2**20 - 1)
  17 'atcw'
  18 >>> coder.encode(2**20)
  19 '8qwm6y'
  20 >>> coder.encode(2**30 - 1)
  21 '3eipc7'
  22 >>> coder.encode(2**30)
  23 'n3md95r4'
  24 >>> coder.encode(2**100 - 1)
  25 'ia2bvpjaiju7g5uaxn5t'
  26 >>> coder.encode(2**100)
  27 Traceback (most recent call last):
  28   ...
  29 ValueError: id out of range
  30 >>> coder.decode('w2ej')
  31 0
  32
  33 """
  34
  35 from typing import List, NamedTuple, TypeVar, Generic, cast
  36 from enum import Enum
  37 from . import base32, fpeck
  38
  39 __all__ = ['TESIDCoder', 'TypedTESIDCoder']
  40
  41
  42 TDiscriminant = TypeVar('TDiscriminant')
  43 class SplitDecode(Generic[TDiscriminant]):
  44     __slots__ = 'id', 'discriminant'
  45     id: int
  46     discriminant: TDiscriminant
  47
  48     def __init__(self, id: int, discriminant: TDiscriminant):
  49         self.id = id
  50         self.discriminant = discriminant
  51
  52     def __repr__(self):
  53         return f'SplitDecode(id={self.id!r}, discriminant={self.discriminant!r})'
  54
  55     def __eq__(self, other):
  56         return self.id == other.id and self.discriminant == other.discriminant
  57
  58
  59 class TESIDCoder:
  60     """
  61     The TESID coder.
  62
  63     >>> from tesid import TESIDCoder
  64     >>> coder = TESIDCoder('000102030405060708090a0b0c0d0e0f')
  65
  66     And for tagging, defining constants is good practice (though look at
  67     ``TypedTESIDCoder`` if you’re doing this kind of discrimination):
  68
  69     >>> TYPE_SPARSITY = 256  # meaning up to 256 possible types
  70     >>> TYPE_A = 0
  71     >>> TYPE_B = 1
  72     >>> TYPE_C = 2
  73
  74     (Methods’ examples start with this foundation.)
  75     """
  76
  77     expanded_key: List[int]
  78
  79     def __init__(self, key: str):
  80         """
  81         Initialise a TESID coder.
  82
  83         The key string must be made up of exactly 32 lowercase hexadecimal
  84         (0-9a-f) characters, and should have been generated randomly.
  85         Refer to external documentation for information on key generation.
  86         """
  87         if key.isupper() or len(key) != 32:
  88             raise ValueError('key must be 32 lowercase hex characters')
  89
  90         self.expanded_key = fpeck.expand(int(key, 16))
  91
  92     def encode(self, id: int, *, sparsity: int = 1, discriminant: int = 0) -> str:
  93         """
  94         Encode an ID.
  95
  96         Raises ValueError if ``id * sparsity + discriminant``
  97         is not in the range [0, 2¹⁰⁰).
  98
  99         >>> coder.encode(0)
 100         'w2ej'
 101
 102         You can use sparsity and discriminant for things like type tagging:
 103
 104         >>> coder.encode(0, sparsity=TYPE_SPARSITY, discriminant=TYPE_A)
 105         'w2ej'
 106         >>> coder.encode(0, sparsity=TYPE_SPARSITY, discriminant=TYPE_B)
 107         'w6um'
 108         >>> coder.encode(0, sparsity=TYPE_SPARSITY, discriminant=TYPE_C)
 109         'x45g'
 110         >>> coder.encode(1, sparsity=TYPE_SPARSITY, discriminant=TYPE_A)
 111         'dh2h'
 112         >>> coder.encode(1, sparsity=TYPE_SPARSITY, discriminant=TYPE_B)
 113         'a6xy'
 114         >>> coder.encode(1, sparsity=TYPE_SPARSITY, discriminant=TYPE_C)
 115         '7xgj'
 116
 117         """
 118         id = id * sparsity + discriminant
 119
 120         i = (None if id < 0 else
 121                 0 if id < 2**20 else
 122                 1 if id < 2**30 else
 123                 2 if id < 2**40 else
 124                 3 if id < 2**50 else
 125                 4 if id < 2**60 else
 126                 5 if id < 2**70 else
 127                 6 if id < 2**80 else
 128                 7 if id < 2**90 else
 129                 8 if id < 2**100 else None)
 130
 131         if i is None:
 132             raise ValueError('id out of range')
 133
 134         return base32.encode(
 135             fpeck.encrypt(self.expanded_key, (i + 2) * 5, id),
 136             (i + 2) * 2,
 137         )
 138
 139     def decode(self, tesid: str, *, sparsity: int = 1, discriminant: int = 0) -> int:
 140         """
 141         Decode an ID.
 142
 143         Raises ValueError if anything goes wrong.
 144
 145         >>> coder.decode('w2ej')
 146         0
 147         >>> coder.decode('invalid')
 148         Traceback (most recent call last):
 149           ...
 150         ValueError: invalid TESID (wrong length)
 151
 152         If sparsity and/or discriminant were used on encode, matching values
 153         must be provided here, or else it will fail to decode:
 154
 155         >>> coder.decode('w2ej', sparsity=TYPE_SPARSITY, discriminant=TYPE_A)
 156         0
 157         >>> coder.decode('w2ej', sparsity=TYPE_SPARSITY, discriminant=TYPE_C)
 158         Traceback (most recent call last):
 159           ...
 160         ValueError: invalid TESID (wrong discriminant or sparsity)
 161
 162         """
 163         n = (len(tesid) // 2) * 5
 164         if len(tesid) % 2 or not (10 <= n <= 50):
 165             raise ValueError('invalid TESID (wrong length)')
 166         id = base32.decode(tesid)
 167         if id is None:
 168             raise ValueError('invalid TESID (wrong characters)')
 169         id = fpeck.decrypt(self.expanded_key, n, id)
 170
 171         # Overly-long-encoding range check
 172         if n > 10 and id < 2**(n * 2 - 10):
 173             raise ValueError('invalid TESID (overly long encoding)')
 174
 175         id -= discriminant
 176         if id < 0 or id % sparsity:
 177             raise ValueError('invalid TESID (wrong discriminant or sparsity)')
 178         return id // sparsity
 179
 180     def split_decode(self, tesid: str, sparsity: int) -> SplitDecode[int]:
 181         """
 182         Decode an ID that was encoded with certain sparsity,
 183         separating the discriminant and returning it alongside the ID.
 184
 185         This is useful if you want to accept various discriminants;
 186         one simple use case is better error reporting:
 187         “that’s an ID for type A, but this takes IDs for type B”.
 188
 189         This allows *you* to identify the discriminant,
 190         but due to the encryption, anyone who has only the ID cannot;
 191         if you want users to be able to discern the discriminant,
 192         consider adding a human-friendly prefix to the ID;
 193         I like a single uppercase letter or a word followed by an underscore.
 194
 195         This requires that the discriminant be less than the sparsity,
 196         or incorrect values will be produced.
 197
 198         Demonstration:
 199
 200         >>> coder.split_decode('w2ej', TYPE_SPARSITY)
 201         SplitDecode(id=0, discriminant=0)
 202         >>> coder.split_decode('w6um', TYPE_SPARSITY)
 203         SplitDecode(id=0, discriminant=1)
 204         >>> coder.split_decode('x45g', TYPE_SPARSITY)
 205         SplitDecode(id=0, discriminant=2)
 206         >>> coder.split_decode('dh2h', TYPE_SPARSITY)
 207         SplitDecode(id=1, discriminant=0)
 208         >>> coder.split_decode('a6xy', TYPE_SPARSITY)
 209         SplitDecode(id=1, discriminant=1)
 210         >>> _.id
 211         1
 212         >>> coder.split_decode('7xgj', TYPE_SPARSITY)
 213         SplitDecode(id=1, discriminant=2)
 214         >>> _.discriminant
 215         2
 216
 217         """
 218         id = self.decode(tesid)
 219         return SplitDecode(id=id // sparsity, discriminant=id % sparsity)
 220
 221
 222 TTypeEnum = TypeVar('TTypeEnum', bound=Enum)
 223 class TypedTESIDCoder(Generic[TTypeEnum]):
 224     """
 225     A TESID coder with type discrimination baked in.
 226
 227     >>> from tesid import TypedTESIDCoder, TESIDCoder
 228     >>> from enum import Enum
 229     >>> class Type(Enum):
 230     ...     A = 0
 231     ...     B = 1
 232     ...     C = 2
 233     >>> coder = TypedTESIDCoder(TESIDCoder('000102030405060708090a0b0c0d0e0f'), 256, Type)
 234
 235     (Methods’ examples start with this foundation.)
 236     """
 237
 238     def __init__(self, coder: TESIDCoder, sparsity: int, type_enum: type[TTypeEnum]):
 239         """
 240         Initialise a typed TESID coder.
 241
 242         This takes a ``TESIDCoder`` (rather than a key) so that you can share a
 243         coder, if you don’t always use the one sparsity and type enum.
 244
 245         ``sparsity`` must exceed the highest variant in ``type_enum``.
 246         """
 247         self.coder = coder
 248         self.sparsity = sparsity
 249         self.type = type_enum
 250
 251     def __repr__(self):
 252         return f'TypedTESIDCoder(coder={self.coder!r}, sparsity={self.sparsity!r}, type={self.type!r})'
 253
 254     def encode(self, type: TTypeEnum, id: int):
 255         """
 256         Encode an ID and type.
 257
 258         >>> coder.encode(Type.A, 0)
 259         'w2ej'
 260         >>> coder.encode(Type.B, 0)
 261         'w6um'
 262         >>> coder.encode(Type.A, 1)
 263         'dh2h'
 264
 265         """
 266         return self.coder.encode(id, sparsity=self.sparsity, discriminant=type.value)
 267
 268     def decode(self, type: TTypeEnum, tesid: str):
 269         """
 270         Decode an ID and type.
 271
 272         >>> coder.decode(Type.A, 'w2ej')
 273         0
 274         >>> coder.decode(Type.B, 'w6um')
 275         0
 276         >>> coder.decode(Type.A, 'dh2h')
 277         1
 278         >>> coder.decode(Type.A, 'w6um')
 279         Traceback (most recent call last):
 280           ...
 281         ValueError: invalid TESID (wrong discriminant or sparsity)
 282
 283         """
 284         return self.coder.decode(tesid, sparsity=self.sparsity, discriminant=type.value)
 285
 286     def split_decode(self, tesid: str) -> SplitDecode[TTypeEnum]:
 287         """
 288         Decode an ID but separate and return its discriminant too.
 289
 290         >>> coder.split_decode('w2ej')
 291         SplitDecode(id=0, discriminant=<Type.A: 0>)
 292         >>> coder.split_decode('w6um')
 293         SplitDecode(id=0, discriminant=<Type.B: 1>)
 294         >>> coder.split_decode('x45g')
 295         SplitDecode(id=0, discriminant=<Type.C: 2>)
 296         >>> coder.split_decode('dh2h')
 297         SplitDecode(id=1, discriminant=<Type.A: 0>)
 298         >>> coder.split_decode('a6xy')
 299         SplitDecode(id=1, discriminant=<Type.B: 1>)
 300         >>> _.id
 301         1
 302         >>> coder.split_decode('7xgj')
 303         SplitDecode(id=1, discriminant=<Type.C: 2>)
 304         >>> _.discriminant == Type.C
 305         True
 306         >>> coder.split_decode('6mqv')  # id=0, discriminant=3
 307         Traceback (most recent call last):
 308           ...
 309         ValueError: 3 is not a valid Type
 310
 311         """
 312         with_int = self.coder.split_decode(tesid, sparsity=self.sparsity)
 313         with_typed = cast(SplitDecode[TTypeEnum], with_int)
 314         with_typed.discriminant = self.type(with_int.discriminant)
 315         return with_typed