0.2.0 add triwat post parsing
This commit is contained in:
parent
5b75dcd028
commit
939c3e8919
5 changed files with 351 additions and 185 deletions
9
CHANGELOG.md
Normal file
9
CHANGELOG.md
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
# 0.2.0
|
||||||
|
|
||||||
|
+ Added Tríwat post parsing utilities
|
||||||
|
+ Turned into a multi command
|
||||||
|
|
||||||
|
# 0.1.0
|
||||||
|
|
||||||
|
+ Initial commit
|
||||||
|
+ Added overworld route calculation
|
||||||
|
|
@ -1,199 +1,55 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from functools import lru_cache
|
|
||||||
import json
|
|
||||||
import argparse
|
import argparse
|
||||||
from math import ceil
|
import sys
|
||||||
from heapq import heapify, heappush, heappop
|
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
from .utils import HourMin
|
||||||
|
from .rails import main_rails
|
||||||
|
from .triwat import main_triwat
|
||||||
|
|
||||||
|
__version__ = "0.2.0"
|
||||||
|
|
||||||
ALL_DATA = json.load(open('data/network.json'))
|
def build_parser():
|
||||||
LEGACY_DATA = json.load(open('data/network.1.json'))
|
|
||||||
INFINITY = 2147483648
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Station:
|
|
||||||
name: str
|
|
||||||
code: str
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Line:
|
|
||||||
name: str
|
|
||||||
code: str
|
|
||||||
route: list[RouteStep]
|
|
||||||
|
|
||||||
def __contains__(self, st: Station | str):
|
|
||||||
if isinstance(st, Station):
|
|
||||||
st = st.code
|
|
||||||
for rs in self.route:
|
|
||||||
if rs.origin.code == st or rs.target.code == st:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RouteStep:
|
|
||||||
origin: Station
|
|
||||||
target: Station
|
|
||||||
time: int
|
|
||||||
line: str
|
|
||||||
|
|
||||||
def find_station(code: str) -> Station | None:
|
|
||||||
st_name = ALL_DATA.get('stations', {}).get(code)
|
|
||||||
if st_name:
|
|
||||||
return Station(
|
|
||||||
name = st_name,
|
|
||||||
code = code
|
|
||||||
)
|
|
||||||
|
|
||||||
def take_first(s):
|
|
||||||
if isinstance(s, (str, bytes)):
|
|
||||||
return s
|
|
||||||
elif hasattr(s, '__iter__'):
|
|
||||||
return list(s)[0]
|
|
||||||
return s
|
|
||||||
|
|
||||||
def build_route_list(line_stops: list, line_code):
|
|
||||||
route_list = []
|
|
||||||
last_step = None
|
|
||||||
for step_data in line_stops:
|
|
||||||
cur_step = step_data['code']
|
|
||||||
if last_step:
|
|
||||||
try:
|
|
||||||
line_time = ceil(step_data['time'])
|
|
||||||
except Exception:
|
|
||||||
try:
|
|
||||||
line_time = ceil(step_data['dist'] / 64)
|
|
||||||
except Exception:
|
|
||||||
line_time = 100 # TODO better fallback
|
|
||||||
route_list.append(RouteStep(
|
|
||||||
origin = find_station(last_step) or Station(last_step, last_step),
|
|
||||||
target = find_station(cur_step) or Station(last_step, last_step),
|
|
||||||
time = line_time,
|
|
||||||
line = line_code
|
|
||||||
))
|
|
||||||
last_step = cur_step
|
|
||||||
return route_list
|
|
||||||
|
|
||||||
def build_all_lines():
|
|
||||||
lines = {}
|
|
||||||
for line_data in ALL_DATA['lines']['overworld']:
|
|
||||||
|
|
||||||
lines[line_data['code']] = Line(
|
|
||||||
code = line_data['code'],
|
|
||||||
name = line_data['name'],
|
|
||||||
route = build_route_list(line_data['stops'], line_data['code'])
|
|
||||||
)
|
|
||||||
return lines
|
|
||||||
|
|
||||||
ALL_LINES = build_all_lines()
|
|
||||||
|
|
||||||
## TODO algorithms of research
|
|
||||||
def find_route(start: str, stop: str):
|
|
||||||
steps_i = []
|
|
||||||
|
|
||||||
dist, prev = dijkstra(start)
|
|
||||||
|
|
||||||
cur = stop
|
|
||||||
steps_i.append((find_station(stop), dist[stop]))
|
|
||||||
while (cur_prev := prev[cur]) != start:
|
|
||||||
steps_i.insert(0, (find_station(cur_prev), dist[cur_prev]))
|
|
||||||
cur = cur_prev
|
|
||||||
|
|
||||||
steps_i.insert(0, (find_station(start), 0))
|
|
||||||
|
|
||||||
return steps_i
|
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def find_neighbors(start):
|
|
||||||
neighs = []
|
|
||||||
|
|
||||||
for line in ALL_LINES.values():
|
|
||||||
line: Line
|
|
||||||
if start in line:
|
|
||||||
for rs in line.route:
|
|
||||||
rs: RouteStep
|
|
||||||
if rs.origin.code == start:
|
|
||||||
neighs.append((rs.target.code, rs.time))
|
|
||||||
if rs.target.code == start:
|
|
||||||
neighs.append((rs.origin.code, rs.time))
|
|
||||||
|
|
||||||
return neighs
|
|
||||||
|
|
||||||
def dijkstra(start: str):
|
|
||||||
dist = {node: INFINITY for node in ALL_DATA['stations']}
|
|
||||||
dist[start] = 0
|
|
||||||
prev = {node: None for node in ALL_DATA['stations']}
|
|
||||||
|
|
||||||
pq = [(0, start)]
|
|
||||||
heapify(pq)
|
|
||||||
|
|
||||||
visited = set()
|
|
||||||
|
|
||||||
while pq:
|
|
||||||
cur_dist, cur_node = heappop(pq)
|
|
||||||
|
|
||||||
if cur_node in visited:
|
|
||||||
continue
|
|
||||||
visited.add(cur_node)
|
|
||||||
|
|
||||||
for neigh, time in find_neighbors(cur_node):
|
|
||||||
tentative_dist = cur_dist + time
|
|
||||||
if tentative_dist < dist.setdefault(neigh, INFINITY):
|
|
||||||
dist[neigh] = tentative_dist
|
|
||||||
prev[neigh] = cur_node
|
|
||||||
heappush(pq, (tentative_dist, neigh))
|
|
||||||
|
|
||||||
return dist, prev
|
|
||||||
|
|
||||||
|
|
||||||
class HourMin(int):
|
|
||||||
def __str__(self):
|
|
||||||
h, m = divmod(self, 60)
|
|
||||||
return f'{h:01}:{m:02}'
|
|
||||||
|
|
||||||
def __new__(cls, *args):
|
|
||||||
if len(args) == 1 and isinstance(args[0], str) and ':' in args[0]:
|
|
||||||
h, m = args[0].split(':')
|
|
||||||
return super().__new__(cls, int(h) * 60 + int(m))
|
|
||||||
return super().__new__(cls, *args)
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--version', action='version', version=__version__)
|
parser.add_argument('--version', action='version', version=__version__)
|
||||||
parser.add_argument('--time', help='time of start', type=HourMin, default=HourMin(0))
|
|
||||||
parser.add_argument('--start', help='station of start')
|
|
||||||
parser.add_argument('--end', help='station of end')
|
|
||||||
parser.add_argument('--legacy', action='store_true', help="use legacy network")
|
|
||||||
parser.add_argument('--search', help='search a station by its name')
|
|
||||||
|
|
||||||
return parser.parse_args()
|
parsers = parser.add_subparsers(dest='action')
|
||||||
|
parser_r = parsers.add_parser("rail", aliases=('rails','r'))
|
||||||
|
|
||||||
|
parser_r.add_argument('start', help='station of start', default=None, nargs='?')
|
||||||
|
parser_r.add_argument('end', help='station of end', default=None, nargs='?')
|
||||||
|
parser_r.add_argument('-t', '--time', help='time of start', type=HourMin, default=HourMin(0))
|
||||||
|
parser_r.add_argument('--legacy', action='store_true', help="use legacy network")
|
||||||
|
parser_r.add_argument('-q', '--search', help='search a station by its name')
|
||||||
|
|
||||||
|
parser_t = parsers.add_parser('triwat', aliases=('t',))
|
||||||
|
parser_t.add_argument('id', help="post ID (copy from discord)")
|
||||||
|
parser_t.add_argument('parent', help="parent ID (copy from discord)", nargs='?', default=None)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
SHORTCUTS = {
|
||||||
|
'R': 'rail',
|
||||||
|
'T': 'triwat'
|
||||||
|
}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = parse_args()
|
raw_args = sys.argv[1:]
|
||||||
|
|
||||||
if args.legacy:
|
# Arch-like switch
|
||||||
global ALL_DATA, ALL_LINES
|
if raw_args and raw_args[0][0] == '-' and raw_args[0][1] in SHORTCUTS:
|
||||||
ALL_DATA = LEGACY_DATA
|
raw_args.insert(0, SHORTCUTS[raw_args[0][1]])
|
||||||
ALL_LINES = build_all_lines()
|
if len(raw_args[1]) > 2:
|
||||||
|
raw_args[1] = '-' + raw_args[1][2:]
|
||||||
|
else:
|
||||||
|
raw_args.pop(1)
|
||||||
|
|
||||||
if args.search:
|
parser = build_parser()
|
||||||
query = args.search.lower()
|
args = parser.parse_args(raw_args)
|
||||||
for st_code, st_name in ALL_DATA['stations'].items():
|
|
||||||
if query in take_first(st_name).lower():
|
|
||||||
print('*', st_code, st_name)
|
|
||||||
return
|
|
||||||
|
|
||||||
st_start = find_station(args.start)
|
if args.action == 'rail':
|
||||||
st_end = find_station(args.end)
|
main_rails(args)
|
||||||
st_time = args.time
|
elif args.action == 'triwat':
|
||||||
|
main_triwat(args)
|
||||||
|
else:
|
||||||
|
parser.print_help()
|
||||||
|
|
||||||
if not st_start or not st_end:
|
|
||||||
return
|
|
||||||
|
|
||||||
route = find_route(st_start.code, st_end.code)
|
|
||||||
|
|
||||||
for st_step, time in route:
|
|
||||||
print(HourMin(st_time + time), st_step.code, st_step.name)
|
|
||||||
173
src/micorail/rails.py
Normal file
173
src/micorail/rails.py
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from functools import lru_cache
|
||||||
|
import json
|
||||||
|
from math import ceil
|
||||||
|
from heapq import heapify, heappush, heappop
|
||||||
|
|
||||||
|
ALL_DATA = json.load(open('data/network.json'))
|
||||||
|
LEGACY_DATA = json.load(open('data/network.1.json'))
|
||||||
|
INFINITY = 2147483648
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Station:
|
||||||
|
name: str
|
||||||
|
code: str
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Line:
|
||||||
|
name: str
|
||||||
|
code: str
|
||||||
|
route: list[RouteStep]
|
||||||
|
|
||||||
|
def __contains__(self, st: Station | str):
|
||||||
|
if isinstance(st, Station):
|
||||||
|
st = st.code
|
||||||
|
for rs in self.route:
|
||||||
|
if rs.origin.code == st or rs.target.code == st:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RouteStep:
|
||||||
|
origin: Station
|
||||||
|
target: Station
|
||||||
|
time: int
|
||||||
|
line: str
|
||||||
|
|
||||||
|
def find_station(code: str) -> Station | None:
|
||||||
|
st_name = ALL_DATA.get('stations', {}).get(code)
|
||||||
|
if st_name:
|
||||||
|
return Station(
|
||||||
|
name = st_name,
|
||||||
|
code = code
|
||||||
|
)
|
||||||
|
|
||||||
|
def take_first(s):
|
||||||
|
if isinstance(s, (str, bytes)):
|
||||||
|
return s
|
||||||
|
elif hasattr(s, '__iter__'):
|
||||||
|
return list(s)[0]
|
||||||
|
return s
|
||||||
|
|
||||||
|
def build_route_list(line_stops: list, line_code):
|
||||||
|
route_list = []
|
||||||
|
last_step = None
|
||||||
|
for step_data in line_stops:
|
||||||
|
cur_step = step_data['code']
|
||||||
|
if last_step:
|
||||||
|
try:
|
||||||
|
line_time = ceil(step_data['time'])
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
line_time = ceil(step_data['dist'] / 64)
|
||||||
|
except Exception:
|
||||||
|
line_time = 100 # TODO better fallback
|
||||||
|
route_list.append(RouteStep(
|
||||||
|
origin = find_station(last_step) or Station(last_step, last_step),
|
||||||
|
target = find_station(cur_step) or Station(last_step, last_step),
|
||||||
|
time = line_time,
|
||||||
|
line = line_code
|
||||||
|
))
|
||||||
|
last_step = cur_step
|
||||||
|
return route_list
|
||||||
|
|
||||||
|
def build_all_lines():
|
||||||
|
lines = {}
|
||||||
|
for line_data in ALL_DATA['lines']['overworld']:
|
||||||
|
|
||||||
|
lines[line_data['code']] = Line(
|
||||||
|
code = line_data['code'],
|
||||||
|
name = line_data['name'],
|
||||||
|
route = build_route_list(line_data['stops'], line_data['code'])
|
||||||
|
)
|
||||||
|
return lines
|
||||||
|
|
||||||
|
ALL_LINES = build_all_lines()
|
||||||
|
|
||||||
|
## TODO algorithms of research
|
||||||
|
def find_route(start: str, stop: str):
|
||||||
|
steps_i = []
|
||||||
|
|
||||||
|
dist, prev = dijkstra(start)
|
||||||
|
|
||||||
|
cur = stop
|
||||||
|
steps_i.append((find_station(stop), dist[stop]))
|
||||||
|
while (cur_prev := prev[cur]) != start:
|
||||||
|
steps_i.insert(0, (find_station(cur_prev), dist[cur_prev]))
|
||||||
|
cur = cur_prev
|
||||||
|
|
||||||
|
steps_i.insert(0, (find_station(start), 0))
|
||||||
|
|
||||||
|
return steps_i
|
||||||
|
|
||||||
|
@lru_cache()
|
||||||
|
def find_neighbors(start):
|
||||||
|
neighs = []
|
||||||
|
|
||||||
|
for line in ALL_LINES.values():
|
||||||
|
line: Line
|
||||||
|
if start in line:
|
||||||
|
for rs in line.route:
|
||||||
|
rs: RouteStep
|
||||||
|
if rs.origin.code == start:
|
||||||
|
neighs.append((rs.target.code, rs.time))
|
||||||
|
if rs.target.code == start:
|
||||||
|
neighs.append((rs.origin.code, rs.time))
|
||||||
|
|
||||||
|
return neighs
|
||||||
|
|
||||||
|
def dijkstra(start: str):
|
||||||
|
dist = {node: INFINITY for node in ALL_DATA['stations']}
|
||||||
|
dist[start] = 0
|
||||||
|
prev = {node: None for node in ALL_DATA['stations']}
|
||||||
|
|
||||||
|
pq = [(0, start)]
|
||||||
|
heapify(pq)
|
||||||
|
|
||||||
|
visited = set()
|
||||||
|
|
||||||
|
while pq:
|
||||||
|
cur_dist, cur_node = heappop(pq)
|
||||||
|
|
||||||
|
if cur_node in visited:
|
||||||
|
continue
|
||||||
|
visited.add(cur_node)
|
||||||
|
|
||||||
|
for neigh, time in find_neighbors(cur_node):
|
||||||
|
tentative_dist = cur_dist + time
|
||||||
|
if tentative_dist < dist.setdefault(neigh, INFINITY):
|
||||||
|
dist[neigh] = tentative_dist
|
||||||
|
prev[neigh] = cur_node
|
||||||
|
heappush(pq, (tentative_dist, neigh))
|
||||||
|
|
||||||
|
return dist, prev
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main_rails(args):
|
||||||
|
if args.legacy:
|
||||||
|
global ALL_DATA, ALL_LINES
|
||||||
|
ALL_DATA = LEGACY_DATA
|
||||||
|
ALL_LINES = build_all_lines()
|
||||||
|
|
||||||
|
if args.search:
|
||||||
|
query = args.search.lower()
|
||||||
|
for st_code, st_name in ALL_DATA['stations'].items():
|
||||||
|
if query in take_first(st_name).lower():
|
||||||
|
print('*', st_code, st_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
st_start = find_station(args.start)
|
||||||
|
st_end = find_station(args.end)
|
||||||
|
st_time = args.time
|
||||||
|
|
||||||
|
if not st_start or not st_end:
|
||||||
|
print('error: missing stations')
|
||||||
|
return
|
||||||
|
|
||||||
|
route = find_route(st_start.code, st_end.code)
|
||||||
|
|
||||||
|
for st_step, time in route:
|
||||||
|
print(HourMin(st_time + time), st_step.code, st_step.name)
|
||||||
117
src/micorail/triwat.py
Normal file
117
src/micorail/triwat.py
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TriwatPost:
|
||||||
|
id: str
|
||||||
|
date: datetime.datetime
|
||||||
|
title: str
|
||||||
|
author: str
|
||||||
|
content: str
|
||||||
|
reply_to: str | None
|
||||||
|
tags: list[str] | None
|
||||||
|
muted: bool = False
|
||||||
|
|
||||||
|
def to_json(self):
|
||||||
|
return {
|
||||||
|
"id": self.id,
|
||||||
|
"date": self.date.isoformat(),
|
||||||
|
"title": self.title,
|
||||||
|
"author": self.author,
|
||||||
|
"content": self.content,
|
||||||
|
"reply_to": self.reply_to,
|
||||||
|
"tags": self.tags,
|
||||||
|
"muted": self.muted
|
||||||
|
}
|
||||||
|
|
||||||
|
EPOCH = 1420066800
|
||||||
|
|
||||||
|
def id_to_date(id: str | int):
|
||||||
|
id = int(id)
|
||||||
|
timestamp_millis = id >> 22
|
||||||
|
timestamp = timestamp_millis / 1000
|
||||||
|
return datetime.datetime.fromtimestamp(EPOCH + timestamp)
|
||||||
|
|
||||||
|
def store_triwat_post(p: TriwatPost) -> bool:
|
||||||
|
try:
|
||||||
|
with open(f"data/triwat.{int(p.id) >> 51}.json") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
data = {'posts': []}
|
||||||
|
|
||||||
|
post_exists = [x for x in data['posts'] if x['id'] == p.id]
|
||||||
|
if post_exists:
|
||||||
|
return False
|
||||||
|
|
||||||
|
data['posts'].append(p.to_json())
|
||||||
|
|
||||||
|
with open(f'data/triwat.{int(p.id) >> 51}.json', 'w') as fw:
|
||||||
|
json.dump(data, fw, indent=2)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main_triwat(args):
|
||||||
|
post_id = args.id
|
||||||
|
reply_id = args.parent or None
|
||||||
|
|
||||||
|
whole_post = sys.stdin.read().strip()
|
||||||
|
|
||||||
|
first_line, *rest = whole_post.splitlines()
|
||||||
|
|
||||||
|
post_is_reply = False
|
||||||
|
|
||||||
|
if first_line.startswith(('Reply', '*Reply*', '**Reply**')):
|
||||||
|
post_is_reply = True
|
||||||
|
|
||||||
|
title_part, _, author_part = first_line.partition('(')
|
||||||
|
author_name, _, _ = author_part.partition(')')
|
||||||
|
mg = re.search(r'(\*?)(\\?\*){1,2}([^\s\*]+)\1', author_name)
|
||||||
|
muted = False
|
||||||
|
if mg:
|
||||||
|
author = mg.group(3)
|
||||||
|
stars = mg.group(2).replace('\\', '')
|
||||||
|
muted = len(stars) > 1
|
||||||
|
|
||||||
|
title = title_part.strip('* ')
|
||||||
|
|
||||||
|
while rest and not rest[0].strip():
|
||||||
|
rest.pop(0)
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
if rest[0].startswith('&'):
|
||||||
|
tags_line = rest.pop(0)
|
||||||
|
tags = [x.strip() for x in tags_line.split('&') if x.strip()]
|
||||||
|
|
||||||
|
while rest and not rest[0].strip():
|
||||||
|
rest.pop(0)
|
||||||
|
|
||||||
|
content = '\n'.join(rest)
|
||||||
|
|
||||||
|
p = TriwatPost(
|
||||||
|
id = post_id,
|
||||||
|
date = id_to_date(post_id),
|
||||||
|
title = title,
|
||||||
|
author = author,
|
||||||
|
content = content,
|
||||||
|
reply_to = reply_id if post_is_reply else None,
|
||||||
|
tags = tags,
|
||||||
|
muted = muted
|
||||||
|
)
|
||||||
|
|
||||||
|
if store_triwat_post(p):
|
||||||
|
print(f'** Post {p.id} stored!')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
11
src/micorail/utils.py
Normal file
11
src/micorail/utils.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
|
||||||
|
class HourMin(int):
|
||||||
|
def __str__(self):
|
||||||
|
h, m = divmod(self, 60)
|
||||||
|
return f'{h:01}:{m:02}'
|
||||||
|
|
||||||
|
def __new__(cls, *args):
|
||||||
|
if len(args) == 1 and isinstance(args[0], str) and ':' in args[0]:
|
||||||
|
h, m = args[0].split(':')
|
||||||
|
return super().__new__(cls, int(h) * 60 + int(m))
|
||||||
|
return super().__new__(cls, *args)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue