suou/src/suou/migrate.py

138 lines
4.5 KiB
Python
Raw Normal View History

2025-05-23 21:23:54 +02:00
"""
Tools for migrating to SIS and SIQ.
---
Copyright (c) 2025 Sakuragasaki46.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
See LICENSE for the specific language governing permissions and
limitations under the License.
This software is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
"""
from abc import abstractmethod
from typing import override
from .functools import not_implemented
from .iding import SiqType, make_domain_hash
from .bits import mask_shift, count_ones
class SiqMigrator:
"""
Base class for SIQ migrators.
Every migrator has a .to_siq() function,
taking orig_id and target_type parameters.
"""
def __init__(self, domain: str) -> None:
self.domain_hash = make_domain_hash(domain)
@abstractmethod
def to_siq(self, orig_id, target_type: SiqType) -> int:
pass
class SnowflakeSiqMigrator(SiqMigrator):
"""
Migrate from Snowflake ID's (i.e. the ones in use at Twitter / Discord).
Discord snowflakes are in this format:
tttttttt tttttttt tttttttt tttttttt
tttttttt ttddddds sssspppp pppppppp
where:
t: timestamp (in milliseconds) 42 bits
d: local ID 5 bits
s: shard ID 5 bits
p: progressive counter 10 bits
Converter takes local ID and shard ID as one; latter 8 bits are taken for
the shard ID, while the former 2 are added to timestamp, taking advantage of
more precision along with up to 2 most significant bits of progressive co
The constructor takes an epoch argument, since snowflakes, due to
optimization requirements, are based on a different epoch (e.g.
Jan 1, 2015 for Discord); epoch is wanted as seconds since Unix epoch
(i.e. midnight of Jan 1, 1970).
There should be a 1-on-1 correspondence from snowflakes and SIQs.
2025-05-23 21:23:54 +02:00
"""
def __init__(self, domain: str, epoch: int, *,
ts_stop: int = 22, ts_accuracy: int = 1000,
shard_mask: int = 255 << 12, shard_ts_mask: int = 3 << 20,
serial_mask: int = 4095):
super().__init__(domain)
self.epoch = epoch
self.ts_stop = ts_stop
self.ts_accuracy = ts_accuracy
self.shard_mask = shard_mask
self.shard_ts_mask = shard_ts_mask
self.serial_mask = serial_mask
@override
def to_siq(self, orig_id: int, target_type: SiqType) -> int:
2025-05-23 21:23:54 +02:00
ts_ms = (orig_id >> self.ts_stop) + self.epoch
ts = int(ts_ms / self.ts_accuracy * (1 << 16))
shard = mask_shift(orig_id, self.shard_mask)
shard_hi = 0
if self.shard_ts_mask:
shard_hi += mask_shift(orig_id, self.shard_ts_mask)
ser = mask_shift(orig_id, self.serial_mask)
ser_bits = 16 - target_type.n_bits
orig_ser_bits = count_ones(self.serial_mask)
if ser_bits < orig_ser_bits:
ser, ser_hi = ser % (1 << ser_bits), ser >> ser_bits
shard_hi <<= orig_ser_bits - ser_bits
shard_hi += ser_hi
ts += shard_hi
return (
(ts << 56)|
((shard % 256) << 48)|
((self.domain_hash % 0xffffffff)<< 16)|
(target_type.prepend(ser) % 0xffff)
)
class UlidSiqMigrator(SiqMigrator):
'''
Migrate from ULID's to SIQ.
ULIDs are 128-bit identifiers with 48 timestamp bits (expressed in milliseconds) and 80 random bits.
Structure (simplified):
tttttttt tttttttt tttttttt tttttttt tttttttt tttttttt
rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr
rrrrrrrr rrrrrrrr rrrrrrrr rrrrrrrr
For obvious reasons, this makes 1-on-1 correspondence impossible. (Yes, the 16 spare bits.)
It means that, of the 80 random bits, only 24 to 27 bits are preserved:
- 6 bits summed to the timestamp.
- 8 bits as shard ID.
- 10 to 13 bits in the progressive counter.
- The rest is *just discarded*.
2025-05-23 21:23:54 +02:00
'''
@override
def to_siq(self, orig_id, target_type: SiqType) -> int:
ts_seq = mask_shift(orig_id, 0xfc000000000000000000)
shard = mask_shift(orig_id, 0x3fc0000000000000000)
seq = mask_shift(orig_id, 0x3fffc000000000000)
ts = ((orig_id >> 80) << 16) // 1000 + ts_seq
return (
(ts << 56)|
((shard % 256) << 48)|
((self.domain_hash % 0xffffffff) << 16)|
(((seq & ~((1 << target_type.n_bits) - 1)) | target_type.prepend(0)) % 0xffff)
)
2025-05-23 21:23:54 +02:00
__all__ = (
'SnowflakeSiqMigrator', 'UlidSiqMigrator'
)