r""" TESID: Textualised Encrypted Sequential Identifiers Example ======= >>> from tesid import TESIDCoder >>> secret_key = '000102030405060708090a0b0c0d0e0f' >>> coder = TESIDCoder(secret_key) >>> coder.encode(0) 'w2ej' >>> coder.encode(1) 'w6um' >>> coder.encode(2) 'x45g' >>> coder.encode(2**20 - 1) 'atcw' >>> coder.encode(2**20) '8qwm6y' >>> coder.encode(2**30 - 1) '3eipc7' >>> coder.encode(2**30) 'n3md95r4' >>> coder.encode(2**100 - 1) 'ia2bvpjaiju7g5uaxn5t' >>> coder.encode(2**100) Traceback (most recent call last): ... ValueError: id out of range >>> coder.decode('w2ej') 0 """ from typing import List, NamedTuple, TypeVar, Generic, cast from enum import Enum from . import base32, fpeck __all__ = ['TESIDCoder', 'TypedTESIDCoder'] TDiscriminant = TypeVar('TDiscriminant') class SplitDecode(Generic[TDiscriminant]): __slots__ = 'id', 'discriminant' id: int discriminant: TDiscriminant def __init__(self, id: int, discriminant: TDiscriminant): self.id = id self.discriminant = discriminant def __repr__(self): return f'SplitDecode(id={self.id!r}, discriminant={self.discriminant!r})' def __eq__(self, other): return self.id == other.id and self.discriminant == other.discriminant class TESIDCoder: """ The TESID coder. >>> from tesid import TESIDCoder >>> coder = TESIDCoder('000102030405060708090a0b0c0d0e0f') And for tagging, defining constants is good practice (though look at ``TypedTESIDCoder`` if you’re doing this kind of discrimination): >>> TYPE_SPARSITY = 256 # meaning up to 256 possible types >>> TYPE_A = 0 >>> TYPE_B = 1 >>> TYPE_C = 2 (Methods’ examples start with this foundation.) """ expanded_key: List[int] def __init__(self, key: str): """ Initialise a TESID coder. The key string must be made up of exactly 32 lowercase hexadecimal (0-9a-f) characters, and should have been generated randomly. Refer to external documentation for information on key generation. """ if key.isupper() or len(key) != 32: raise ValueError('key must be 32 lowercase hex characters') self.expanded_key = fpeck.expand(int(key, 16)) def encode(self, id: int, *, sparsity: int = 1, discriminant: int = 0) -> str: """ Encode an ID. Raises ValueError if ``id * sparsity + discriminant`` is not in the range [0, 2¹⁰⁰). >>> coder.encode(0) 'w2ej' You can use sparsity and discriminant for things like type tagging: >>> coder.encode(0, sparsity=TYPE_SPARSITY, discriminant=TYPE_A) 'w2ej' >>> coder.encode(0, sparsity=TYPE_SPARSITY, discriminant=TYPE_B) 'w6um' >>> coder.encode(0, sparsity=TYPE_SPARSITY, discriminant=TYPE_C) 'x45g' >>> coder.encode(1, sparsity=TYPE_SPARSITY, discriminant=TYPE_A) 'dh2h' >>> coder.encode(1, sparsity=TYPE_SPARSITY, discriminant=TYPE_B) 'a6xy' >>> coder.encode(1, sparsity=TYPE_SPARSITY, discriminant=TYPE_C) '7xgj' """ id = id * sparsity + discriminant i = (None if id < 0 else 0 if id < 2**20 else 1 if id < 2**30 else 2 if id < 2**40 else 3 if id < 2**50 else 4 if id < 2**60 else 5 if id < 2**70 else 6 if id < 2**80 else 7 if id < 2**90 else 8 if id < 2**100 else None) if i is None: raise ValueError('id out of range') return base32.encode( fpeck.encrypt(self.expanded_key, (i + 2) * 5, id), (i + 2) * 2, ) def decode(self, tesid: str, *, sparsity: int = 1, discriminant: int = 0) -> int: """ Decode an ID. Raises ValueError if anything goes wrong. >>> coder.decode('w2ej') 0 >>> coder.decode('invalid') Traceback (most recent call last): ... ValueError: invalid TESID (wrong length) If sparsity and/or discriminant were used on encode, matching values must be provided here, or else it will fail to decode: >>> coder.decode('w2ej', sparsity=TYPE_SPARSITY, discriminant=TYPE_A) 0 >>> coder.decode('w2ej', sparsity=TYPE_SPARSITY, discriminant=TYPE_C) Traceback (most recent call last): ... ValueError: invalid TESID (wrong discriminant or sparsity) """ n = (len(tesid) // 2) * 5 if len(tesid) % 2 or not (10 <= n <= 50): raise ValueError('invalid TESID (wrong length)') id = base32.decode(tesid) if id is None: raise ValueError('invalid TESID (wrong characters)') id = fpeck.decrypt(self.expanded_key, n, id) # Overly-long-encoding range check if n > 10 and id < 2**(n * 2 - 10): raise ValueError('invalid TESID (overly long encoding)') id -= discriminant if id < 0 or id % sparsity: raise ValueError('invalid TESID (wrong discriminant or sparsity)') return id // sparsity def split_decode(self, tesid: str, sparsity: int) -> SplitDecode[int]: """ Decode an ID that was encoded with certain sparsity, separating the discriminant and returning it alongside the ID. This is useful if you want to accept various discriminants; one simple use case is better error reporting: “that’s an ID for type A, but this takes IDs for type B”. This allows *you* to identify the discriminant, but due to the encryption, anyone who has only the ID cannot; if you want users to be able to discern the discriminant, consider adding a human-friendly prefix to the ID; I like a single uppercase letter or a word followed by an underscore. This requires that the discriminant be less than the sparsity, or incorrect values will be produced. Demonstration: >>> coder.split_decode('w2ej', TYPE_SPARSITY) SplitDecode(id=0, discriminant=0) >>> coder.split_decode('w6um', TYPE_SPARSITY) SplitDecode(id=0, discriminant=1) >>> coder.split_decode('x45g', TYPE_SPARSITY) SplitDecode(id=0, discriminant=2) >>> coder.split_decode('dh2h', TYPE_SPARSITY) SplitDecode(id=1, discriminant=0) >>> coder.split_decode('a6xy', TYPE_SPARSITY) SplitDecode(id=1, discriminant=1) >>> _.id 1 >>> coder.split_decode('7xgj', TYPE_SPARSITY) SplitDecode(id=1, discriminant=2) >>> _.discriminant 2 """ id = self.decode(tesid) return SplitDecode(id=id // sparsity, discriminant=id % sparsity) TTypeEnum = TypeVar('TTypeEnum', bound=Enum) class TypedTESIDCoder(Generic[TTypeEnum]): """ A TESID coder with type discrimination baked in. >>> from tesid import TypedTESIDCoder, TESIDCoder >>> from enum import Enum >>> class Type(Enum): ... A = 0 ... B = 1 ... C = 2 >>> coder = TypedTESIDCoder(TESIDCoder('000102030405060708090a0b0c0d0e0f'), 256, Type) (Methods’ examples start with this foundation.) """ def __init__(self, coder: TESIDCoder, sparsity: int, type_enum: type[TTypeEnum]): """ Initialise a typed TESID coder. This takes a ``TESIDCoder`` (rather than a key) so that you can share a coder, if you don’t always use the one sparsity and type enum. ``sparsity`` must exceed the highest variant in ``type_enum``. """ self.coder = coder self.sparsity = sparsity self.type = type_enum def __repr__(self): return f'TypedTESIDCoder(coder={self.coder!r}, sparsity={self.sparsity!r}, type={self.type!r})' def encode(self, type: TTypeEnum, id: int): """ Encode an ID and type. >>> coder.encode(Type.A, 0) 'w2ej' >>> coder.encode(Type.B, 0) 'w6um' >>> coder.encode(Type.A, 1) 'dh2h' """ return self.coder.encode(id, sparsity=self.sparsity, discriminant=type.value) def decode(self, type: TTypeEnum, tesid: str): """ Decode an ID and type. >>> coder.decode(Type.A, 'w2ej') 0 >>> coder.decode(Type.B, 'w6um') 0 >>> coder.decode(Type.A, 'dh2h') 1 >>> coder.decode(Type.A, 'w6um') Traceback (most recent call last): ... ValueError: invalid TESID (wrong discriminant or sparsity) """ return self.coder.decode(tesid, sparsity=self.sparsity, discriminant=type.value) def split_decode(self, tesid: str) -> SplitDecode[TTypeEnum]: """ Decode an ID but separate and return its discriminant too. >>> coder.split_decode('w2ej') SplitDecode(id=0, discriminant=) >>> coder.split_decode('w6um') SplitDecode(id=0, discriminant=) >>> coder.split_decode('x45g') SplitDecode(id=0, discriminant=) >>> coder.split_decode('dh2h') SplitDecode(id=1, discriminant=) >>> coder.split_decode('a6xy') SplitDecode(id=1, discriminant=) >>> _.id 1 >>> coder.split_decode('7xgj') SplitDecode(id=1, discriminant=) >>> _.discriminant == Type.C True >>> coder.split_decode('6mqv') # id=0, discriminant=3 Traceback (most recent call last): ... ValueError: 3 is not a valid Type """ with_int = self.coder.split_decode(tesid, sparsity=self.sparsity) with_typed = cast(SplitDecode[TTypeEnum], with_int) with_typed.discriminant = self.type(with_int.discriminant) return with_typed