diff --git a/CHANGELOG.md b/CHANGELOG.md index 1da90aa..56e58e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Add `i18n`, `itertools` - Add `toml` as a hard dependency - Add support for Python dicts as `ConfigSource` +- Implement ULID -> SIQ migrator (with flaws) - First release on pip under name `sakuragasaki46-suou` - Improve sqlalchemy support diff --git a/src/suou/__init__.py b/src/suou/__init__.py index 4e06041..8b663a4 100644 --- a/src/suou/__init__.py +++ b/src/suou/__init__.py @@ -17,18 +17,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. """ from .iding import Siq, SiqCache, SiqType, SiqGen -from .codecs import StringCase -from .configparse import MissingConfigError, MissingConfigWarning, ConfigOptions, ConfigParserConfigSource, ConfigSource, ConfigValue, EnvConfigSource +from .codecs import StringCase, cb32encode, cb32decode +from .bits import count_ones, mask_shift +from .configparse import MissingConfigError, MissingConfigWarning, ConfigOptions, ConfigParserConfigSource, ConfigSource, DictConfigSource, ConfigValue, EnvConfigSource from .functools import deprecated, not_implemented from .classtools import Wanted, Incomplete from .itertools import makelist, kwargs_prefix from .i18n import I18n, JsonI18n, TomlI18n -__version__ = "0.2.0-dev21" +__version__ = "0.2.0" __all__ = ( 'Siq', 'SiqCache', 'SiqType', 'SiqGen', 'StringCase', - 'MissingConfigError', 'MissingConfigWarning', 'ConfigOptions', 'ConfigParserConfigSource', 'ConfigSource', 'ConfigValue', 'EnvConfigSource', + 'MissingConfigError', 'MissingConfigWarning', 'ConfigOptions', 'ConfigParserConfigSource', 'ConfigSource', 'ConfigValue', 'EnvConfigSource', 'DictConfigSource', 'deprecated', 'not_implemented', 'Wanted', 'Incomplete', - 'makelist', 'kwargs_prefix', 'I18n', 'JsonI18n', 'TomlI18n' + 'makelist', 'kwargs_prefix', 'I18n', 'JsonI18n', 'TomlI18n', 'cb32encode', 'cb32decode', 'count_ones', 'mask_shift' ) diff --git a/src/suou/configparse.py b/src/suou/configparse.py index a217f91..ace075f 100644 --- a/src/suou/configparse.py +++ b/src/suou/configparse.py @@ -98,7 +98,7 @@ class ConfigParserConfigSource(ConfigSource): class DictConfigSource(ConfigSource): ''' - Config source from Python mappings + Config source from Python mappings. Useful with JSON/TOML config ''' __slots__ = ('_d',) @@ -126,6 +126,12 @@ class ConfigValue: You can specify further sources, if the parent ConfigOptions class supports them. + + Arguments: + - public: mark value as public, making it available across the app (e.g. in Jinja2 templates). + - prefix: src but for the lazy + - preserve_case: if True, src is not CAPITALIZED. Useful for parsing from Python dictionaries or ConfigParser's + - required: throw an error if empty or not supplied """ # XXX disabled for https://stackoverflow.com/questions/45864273/slots-conflicts-with-a-class-variable-in-a-generic-class #__slots__ = ('_srcs', '_val', '_default', '_cast', '_required', '_preserve_case') @@ -215,6 +221,8 @@ class ConfigOptions: def expose(self, public_name: str, attr_name: str | None = None) -> None: ''' Mark a config value as public. + + Called automatically by ConfigValue.__set_name__(). ''' attr_name = attr_name or public_name self._pub[public_name] = attr_name @@ -227,7 +235,7 @@ class ConfigOptions: __all__ = ( - 'MissingConfigError', 'MissingConfigWarning', 'ConfigOptions', 'EnvConfigSource', 'ConfigParserConfigSource', 'ConfigSource', 'ConfigValue' + 'MissingConfigError', 'MissingConfigWarning', 'ConfigOptions', 'EnvConfigSource', 'ConfigParserConfigSource', 'DictConfigSource', 'ConfigSource', 'ConfigValue' ) - + diff --git a/src/suou/migrate.py b/src/suou/migrate.py index b6c1f42..357dc03 100644 --- a/src/suou/migrate.py +++ b/src/suou/migrate.py @@ -59,6 +59,8 @@ class SnowflakeSiqMigrator(SiqMigrator): optimization requirements, are based on a different epoch (e.g. Jan 1, 2015 for Discord); epoch is wanted as seconds since Unix epoch (i.e. midnight of Jan 1, 1970). + + There should be a 1-on-1 correspondence from snowflakes and SIQs. """ def __init__(self, domain: str, epoch: int, *, ts_stop: int = 22, ts_accuracy: int = 1000, @@ -73,7 +75,7 @@ class SnowflakeSiqMigrator(SiqMigrator): self.serial_mask = serial_mask @override - def to_siq(self, orig_id, target_type: SiqType) -> int: + def to_siq(self, orig_id: int, target_type: SiqType) -> int: ts_ms = (orig_id >> self.ts_stop) + self.epoch ts = int(ts_ms / self.ts_accuracy * (1 << 16)) shard = mask_shift(orig_id, self.shard_mask) @@ -96,15 +98,41 @@ class SnowflakeSiqMigrator(SiqMigrator): ) - -## TODO: UlidSiqMigrator - -@not_implemented class UlidSiqMigrator(SiqMigrator): ''' Migrate from ULID's to SIQ. - ULIDs have a timestamp part (expressed in milliseconds) and a randomly generated part. + ULIDs are 128-bit identifiers with 48 timestamp bits (expressed in milliseconds) and 80 random bits. + + Structure (simplified): + tttttttt tttttttt tttttttt tttttttt tttttttt tttttttt + rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr + rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr + + For obvious reasons, this makes 1-on-1 correspondence impossible. (Yes, the 16 spare bits.) + + It means that, of the 80 random bits, only 24 to 27 bits are preserved: + - 6 bits summed to the timestamp. + - 8 bits as shard ID. + - 10 to 13 bits in the progressive counter. + - The rest is *just discarded*. ''' + @override + def to_siq(self, orig_id, target_type: SiqType) -> int: + ts_seq = mask_shift(orig_id, 0xfc000000000000000000) + shard = mask_shift(orig_id, 0x3fc0000000000000000) + seq = mask_shift(orig_id, 0x3fffc000000000000) + ts = ((orig_id >> 80) << 16) // 1000 + ts_seq + return ( + (ts << 56)| + ((shard % 256) << 48)| + ((self.domain_hash % 0xffffffff) << 16)| + (((seq & ~((1 << target_type.n_bits) - 1)) | target_type.prepend(0)) % 0xffff) + ) + + +__all__ = ( + 'SnowflakeSiqMigrator', 'UlidSiqMigrator' +) \ No newline at end of file diff --git a/src/suou/peewee.py b/src/suou/peewee.py index 395cd69..939ebce 100644 --- a/src/suou/peewee.py +++ b/src/suou/peewee.py @@ -97,6 +97,13 @@ class RegexCharField(CharField): class SiqField(Field): + ''' + Field holding a SIQ. + + Stored as varbinary(16). + + XXX UNTESTED! + ''' field_type = 'varbinary(16)' def db_value(self, value: int | Siq | bytes) -> bytes: @@ -111,5 +118,5 @@ class SiqField(Field): return Siq.from_bytes(value) -__all__ = ('connect_reconnect', 'RegexCharField') +__all__ = ('connect_reconnect', 'RegexCharField', 'SiqField') diff --git a/src/suou/sqlalchemy.py b/src/suou/sqlalchemy.py index af66b71..070881b 100644 --- a/src/suou/sqlalchemy.py +++ b/src/suou/sqlalchemy.py @@ -149,5 +149,5 @@ def age_pair(*, nullable: bool = False, **ka) -> tuple[Column, Column]: __all__ = ( 'IdType', 'id_column', 'entity_base', 'declarative_base', 'token_signer', 'match_column', 'match_constraint', - 'author_pair' + 'author_pair', 'age_pair' ) \ No newline at end of file