2025-05-23 21:23:54 +02:00
|
|
|
"""
|
|
|
|
|
Tools for migrating to SIS and SIQ.
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
Copyright (c) 2025 Sakuragasaki46.
|
|
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
|
See LICENSE for the specific language governing permissions and
|
|
|
|
|
limitations under the License.
|
|
|
|
|
|
|
|
|
|
This software is distributed on an "AS IS" BASIS,
|
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from abc import abstractmethod
|
|
|
|
|
from typing import override
|
|
|
|
|
|
|
|
|
|
from .functools import not_implemented
|
|
|
|
|
from .iding import SiqType, make_domain_hash
|
|
|
|
|
|
|
|
|
|
from .bits import mask_shift, count_ones
|
|
|
|
|
|
|
|
|
|
class SiqMigrator:
|
|
|
|
|
"""
|
|
|
|
|
Base class for SIQ migrators.
|
|
|
|
|
|
|
|
|
|
Every migrator has a .to_siq() function,
|
|
|
|
|
taking orig_id and target_type parameters.
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self, domain: str) -> None:
|
|
|
|
|
self.domain_hash = make_domain_hash(domain)
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def to_siq(self, orig_id, target_type: SiqType) -> int:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
class SnowflakeSiqMigrator(SiqMigrator):
|
|
|
|
|
"""
|
|
|
|
|
Migrate from Snowflake ID's (i.e. the ones in use at Twitter / Discord).
|
|
|
|
|
|
|
|
|
|
Discord snowflakes are in this format:
|
|
|
|
|
tttttttt tttttttt tttttttt tttttttt
|
|
|
|
|
tttttttt ttddddds sssspppp pppppppp
|
|
|
|
|
|
|
|
|
|
where:
|
|
|
|
|
t: timestamp (in milliseconds) — 42 bits
|
|
|
|
|
d: local ID — 5 bits
|
|
|
|
|
s: shard ID — 5 bits
|
|
|
|
|
p: progressive counter — 10 bits
|
|
|
|
|
|
|
|
|
|
Converter takes local ID and shard ID as one; latter 8 bits are taken for
|
|
|
|
|
the shard ID, while the former 2 are added to timestamp, taking advantage of
|
|
|
|
|
more precision — along with up to 2 most significant bits of progressive co
|
|
|
|
|
|
|
|
|
|
The constructor takes an epoch argument, since snowflakes, due to
|
|
|
|
|
optimization requirements, are based on a different epoch (e.g.
|
|
|
|
|
Jan 1, 2015 for Discord); epoch is wanted as seconds since Unix epoch
|
|
|
|
|
(i.e. midnight of Jan 1, 1970).
|
2025-05-27 01:29:52 +02:00
|
|
|
|
|
|
|
|
There should be a 1-on-1 correspondence from snowflakes and SIQs.
|
2025-05-23 21:23:54 +02:00
|
|
|
"""
|
|
|
|
|
def __init__(self, domain: str, epoch: int, *,
|
|
|
|
|
ts_stop: int = 22, ts_accuracy: int = 1000,
|
|
|
|
|
shard_mask: int = 255 << 12, shard_ts_mask: int = 3 << 20,
|
|
|
|
|
serial_mask: int = 4095):
|
|
|
|
|
super().__init__(domain)
|
|
|
|
|
self.epoch = epoch
|
|
|
|
|
self.ts_stop = ts_stop
|
|
|
|
|
self.ts_accuracy = ts_accuracy
|
|
|
|
|
self.shard_mask = shard_mask
|
|
|
|
|
self.shard_ts_mask = shard_ts_mask
|
|
|
|
|
self.serial_mask = serial_mask
|
|
|
|
|
|
|
|
|
|
@override
|
2025-05-27 01:29:52 +02:00
|
|
|
def to_siq(self, orig_id: int, target_type: SiqType) -> int:
|
2025-05-23 21:23:54 +02:00
|
|
|
ts_ms = (orig_id >> self.ts_stop) + self.epoch
|
|
|
|
|
ts = int(ts_ms / self.ts_accuracy * (1 << 16))
|
|
|
|
|
shard = mask_shift(orig_id, self.shard_mask)
|
|
|
|
|
shard_hi = 0
|
|
|
|
|
if self.shard_ts_mask:
|
|
|
|
|
shard_hi += mask_shift(orig_id, self.shard_ts_mask)
|
|
|
|
|
ser = mask_shift(orig_id, self.serial_mask)
|
|
|
|
|
ser_bits = 16 - target_type.n_bits
|
|
|
|
|
orig_ser_bits = count_ones(self.serial_mask)
|
|
|
|
|
if ser_bits < orig_ser_bits:
|
|
|
|
|
ser, ser_hi = ser % (1 << ser_bits), ser >> ser_bits
|
|
|
|
|
shard_hi <<= orig_ser_bits - ser_bits
|
|
|
|
|
shard_hi += ser_hi
|
|
|
|
|
ts += shard_hi
|
|
|
|
|
return (
|
|
|
|
|
(ts << 56)|
|
|
|
|
|
((shard % 256) << 48)|
|
|
|
|
|
((self.domain_hash % 0xffffffff)<< 16)|
|
|
|
|
|
(target_type.prepend(ser) % 0xffff)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class UlidSiqMigrator(SiqMigrator):
|
|
|
|
|
'''
|
|
|
|
|
Migrate from ULID's to SIQ.
|
|
|
|
|
|
2025-05-27 01:29:52 +02:00
|
|
|
ULIDs are 128-bit identifiers with 48 timestamp bits (expressed in milliseconds) and 80 random bits.
|
|
|
|
|
|
|
|
|
|
Structure (simplified):
|
|
|
|
|
tttttttt tttttttt tttttttt tttttttt tttttttt tttttttt
|
|
|
|
|
rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr
|
|
|
|
|
rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr
|
|
|
|
|
|
|
|
|
|
For obvious reasons, this makes 1-on-1 correspondence impossible. (Yes, the 16 spare bits.)
|
|
|
|
|
|
|
|
|
|
It means that, of the 80 random bits, only 24 to 27 bits are preserved:
|
|
|
|
|
- 6 bits summed to the timestamp.
|
|
|
|
|
- 8 bits as shard ID.
|
|
|
|
|
- 10 to 13 bits in the progressive counter.
|
|
|
|
|
- The rest is *just discarded*.
|
2025-05-23 21:23:54 +02:00
|
|
|
'''
|
|
|
|
|
|
2025-05-27 01:29:52 +02:00
|
|
|
@override
|
|
|
|
|
def to_siq(self, orig_id, target_type: SiqType) -> int:
|
|
|
|
|
ts_seq = mask_shift(orig_id, 0xfc000000000000000000)
|
|
|
|
|
shard = mask_shift(orig_id, 0x3fc0000000000000000)
|
|
|
|
|
seq = mask_shift(orig_id, 0x3fffc000000000000)
|
|
|
|
|
|
|
|
|
|
ts = ((orig_id >> 80) << 16) // 1000 + ts_seq
|
|
|
|
|
return (
|
|
|
|
|
(ts << 56)|
|
|
|
|
|
((shard % 256) << 48)|
|
|
|
|
|
((self.domain_hash % 0xffffffff) << 16)|
|
|
|
|
|
(((seq & ~((1 << target_type.n_bits) - 1)) | target_type.prepend(0)) % 0xffff)
|
|
|
|
|
)
|
|
|
|
|
|
2025-05-23 21:23:54 +02:00
|
|
|
|
2025-05-27 01:29:52 +02:00
|
|
|
__all__ = (
|
|
|
|
|
'SnowflakeSiqMigrator', 'UlidSiqMigrator'
|
|
|
|
|
)
|