diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b70a17..f767253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,13 @@ + New module `lex` with functions `symbol_table()` and `lex()` — make tokenization more affordable + Add `dorks` module and `flask.harden()` + Add `sqlalchemy.bool_column()`: make making flags painless -+ Added `addattr()`, `PrefixIdentifier()` ++ Introduce `rb64encode()` and `rb64decode()` to deal with issues about Base64 and padding ++ Added `addattr()`, `PrefixIdentifier()`, `mod_floor()`, `mod_ceil()` ++ First version to have unit tests! + +## 0.3.7 + +- Fixed a bug in `b64decode()` padding handling which made the function inconsistent and non injective. Now, leading `'A'` is NEVER stripped. ## 0.3.6 diff --git a/src/suou/__init__.py b/src/suou/__init__.py index a3dfff1..1a8a077 100644 --- a/src/suou/__init__.py +++ b/src/suou/__init__.py @@ -18,8 +18,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from .iding import Siq, SiqCache, SiqType, SiqGen from .codecs import (StringCase, cb32encode, cb32decode, b32lencode, b32ldecode, b64encode, b64decode, b2048encode, b2048decode, - jsonencode, want_bytes, want_str, ssv_list) -from .bits import count_ones, mask_shift, split_bits, join_bits + jsonencode, want_bytes, want_str, ssv_list, want_urlsafe) +from .bits import count_ones, mask_shift, split_bits, join_bits, mod_ceil, mod_floor from .configparse import MissingConfigError, MissingConfigWarning, ConfigOptions, ConfigParserConfigSource, ConfigSource, DictConfigSource, ConfigValue, EnvConfigSource from .functools import deprecated, not_implemented from .classtools import Wanted, Incomplete @@ -37,7 +37,8 @@ __all__ = ( 'SiqType', 'Snowflake', 'SnowflakeGen', 'StringCase', 'TomlI18n', 'Wanted', 'additem', 'b2048decode', 'b2048encode', 'b32ldecode', 'b32lencode', 'b64encode', 'b64decode', 'cb32encode', 'cb32decode', 'count_ones', - 'deprecated', 'ilex', 'join_bits', 'jsonencode', 'kwargs_prefix', 'lex', 'ltuple', - 'makelist', 'mask_shift', 'not_implemented', 'rtuple', 'split_bits', - 'ssv_list', 'symbol_table', 'want_bytes', 'want_str' + 'deprecated', 'ilex', 'join_bits', 'jsonencode', 'kwargs_prefix', 'lex', + 'ltuple', 'makelist', 'mask_shift', 'mod_ceil', 'mod_floor', + 'not_implemented', 'rtuple', 'split_bits', 'ssv_list', 'symbol_table', + 'want_bytes', 'want_str', 'want_urlsafe' ) diff --git a/src/suou/bits.py b/src/suou/bits.py index 0288c0f..86f55ba 100644 --- a/src/suou/bits.py +++ b/src/suou/bits.py @@ -1,5 +1,5 @@ ''' -Utilities for working with bits +Utilities for working with bits & handy arithmetics --- @@ -93,5 +93,19 @@ def join_bits(l: list[int], nbits: int) -> bytes: return ou +## arithmetics because yes -__all__ = ('count_ones', 'mask_shift', 'split_bits', 'join_bits') +def mod_floor(x: int, y: int) -> int: + """ + Greatest integer smaller than x and divisible by y + """ + return x - x % y + +def mod_ceil(x: int, y: int) -> int: + """ + Smallest integer greater than x and divisible by y + """ + return x + (y - x % y) % y + + +__all__ = ('count_ones', 'mask_shift', 'split_bits', 'join_bits', 'mod_floor', 'mod_ceil') diff --git a/src/suou/codecs.py b/src/suou/codecs.py index 3efe53f..f8dbf13 100644 --- a/src/suou/codecs.py +++ b/src/suou/codecs.py @@ -22,7 +22,7 @@ import math import re from typing import Any, Callable -from .bits import split_bits, join_bits +from .bits import mod_ceil, split_bits, join_bits from .functools import deprecated # yes, I know ItsDangerous implements that as well, but remember @@ -49,6 +49,25 @@ def want_str(s: str | bytes, encoding: str = "utf-8", errors: str = "strict") -> s = s.decode(encoding, errors) return s + +BASE64_TO_URLSAFE = str.maketrans('+/', '-_', ' ') + +def want_urlsafe(s: str | bytes) -> str: + """ + Force a Base64 string into its urlsafe representation. + + Behavior is unchecked and undefined with anything else than Base64 strings. + + Used by b64encode() and b64decode(). + """ + return want_str(s).translate(BASE64_TO_URLSAFE) + +def want_urlsafe_bytes(s: str | bytes) -> bytes: + """ + Shorthand for want_bytes(want_urlsafe(s)). + """ + return want_bytes(want_urlsafe(s)) + B32_TO_CROCKFORD = str.maketrans( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', '0123456789ABCDEFGHJKMNPQRSTVWXYZ', @@ -59,6 +78,7 @@ CROCKFORD_TO_B32 = str.maketrans( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', '=') + BIP39_WORD_LIST = """ abandon ability able about above absent absorb abstract absurd abuse access accident account accuse achieve acid acoustic acquire across act action actor actress actual adapt add addict address adjust admit adult advance advice aerobic affair afford afraid again age agent agree ahead aim air airport @@ -178,16 +198,31 @@ def b32ldecode(val: bytes | str) -> bytes: def b64encode(val: bytes, *, strip: bool = True) -> str: ''' - Wrapper around base64.urlsafe_b64encode() which also strips trailing '=' and leading 'A'. + Wrapper around base64.urlsafe_b64encode() which also strips trailing '='. ''' b = want_str(base64.urlsafe_b64encode(val)) - return b.lstrip('A').rstrip('=') if strip else b + return b.rstrip('=') if strip else b def b64decode(val: bytes | str) -> bytes: ''' Wrapper around base64.urlsafe_b64decode() which deals with padding. ''' - return base64.urlsafe_b64decode(want_bytes(val).replace(b'/', b'_').replace(b'+', b'-') + b'=' * ((4 - len(val) % 4) % 4)) + val = want_urlsafe(val) + return base64.urlsafe_b64decode(val.ljust(mod_ceil(len(val), 4), '=')) + +def rb64encode(val: bytes, *, strip: bool = True) -> str: + ''' + Call base64.urlsafe_b64encode() with null bytes i.e. '\\0' padding to the start. Leading 'A' are stripped from result. + ''' + b = want_str(base64.urlsafe_b64encode(val.rjust(mod_ceil(len(val), 3), '\0'))) + return b.lstrip('A') if strip else b + +def rb64decode(val: bytes | str) -> bytes: + ''' + Wrapper around base64.urlsafe_b64decode() which deals with padding. + ''' + val = want_urlsafe(val) + return base64.urlsafe_b64decode(val.rjust(mod_ceil(len(val), 4), 'A')) def b2048encode(val: bytes) -> str: ''' diff --git a/src/suou/strtools.py b/src/suou/strtools.py index 2381953..ee5264b 100644 --- a/src/suou/strtools.py +++ b/src/suou/strtools.py @@ -18,6 +18,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from typing import Callable, Iterable +from pydantic import validate_call from .itertools import makelist @@ -34,11 +35,16 @@ class PrefixIdentifier: raise ValueError('invalid prefix') self._prefix = prefix + @validate_call() def __getattr__(self, key: str): return f'{self._prefix}{key}' + @validate_call() def __getitem__(self, key: str) -> str: return f'{self._prefix}{key}' + def __str__(self): + return f'{self._prefix}' + __all__ = ('PrefixIdentifier',) diff --git a/tests/test_codecs.py b/tests/test_codecs.py index 7ac70d6..0e23296 100644 --- a/tests/test_codecs.py +++ b/tests/test_codecs.py @@ -2,7 +2,7 @@ import binascii import unittest -from suou.codecs import b64encode, b64decode +from suou.codecs import b64encode, b64decode, want_urlsafe B1 = b'N\xf0\xb4\xc3\x85\n\xf9\xb6\x9a\x0f\x82\xa6\x99G\x07#' B2 = b'\xbcXiF,@|{\xbe\xe3\x0cz\xa8\xcbQ\x82' @@ -47,4 +47,8 @@ class TestCodecs(unittest.TestCase): self.assertRaises(binascii.Error, b64decode, 'C') - + def test_want_urlsafe(self): + self.assertEqual('__init__', want_urlsafe('//init_/')) + self.assertEqual('Disney-', want_urlsafe('Disney+')) + self.assertEqual('spaziocosenza', want_urlsafe('spazio cosenza')) + self.assertEqual('=======', want_urlsafe('=======')) diff --git a/tests/test_strtools.py b/tests/test_strtools.py index d07ed88..e3ef328 100644 --- a/tests/test_strtools.py +++ b/tests/test_strtools.py @@ -4,6 +4,7 @@ import unittest from suou.strtools import PrefixIdentifier +from pydantic import ValidationError class TestStrtools(unittest.TestCase): def setUp(self) -> None: @@ -19,12 +20,12 @@ class TestStrtools(unittest.TestCase): self.assertEqual(pi['\x1b\x00'], '\x1b\0') self.assertEqual(pi.same_thing, pi['same_thing']) - with self.assertRaises(TypeError): + with self.assertRaises(ValidationError): pi[0] - self.assertEqual(PrefixIdentifier(None), PrefixIdentifier('')) + self.assertEqual(f'{PrefixIdentifier(None)}', f'{PrefixIdentifier("")}') - def test_PrefixIdentifier_invalid(self): + def test_PrefixIdentifier_get_nostr(self): with self.assertRaises(TypeError): pi = PrefixIdentifier(1) pi.hello @@ -35,4 +36,5 @@ class TestStrtools(unittest.TestCase): with self.assertRaises(TypeError): PrefixIdentifier(b'alpha_') - \ No newline at end of file + + \ No newline at end of file