salvi/app_sync.py

134 lines
4 KiB
Python
Raw Normal View History

2021-08-03 11:41:18 +02:00
"""
Helper module for sync.
(c) 2021 Sakuragasaki46.
"""
import datetime, time
import requests
import sys, os
from configparser import ConfigParser
from app import Page, PageRevision, PageText
from peewee import IntegrityError
from functools import lru_cache
## CONSTANTS ##
APP_BASE_DIR = os.path.dirname(__file__)
UPLOAD_DIR = APP_BASE_DIR + '/media'
DATABASE_DIR = APP_BASE_DIR + "/database"
#### GENERAL CONFIG ####
DEFAULT_CONF = {
('site', 'title'): 'Salvi',
('config', 'media_dir'): APP_BASE_DIR + '/media',
('config', 'database_dir'): APP_BASE_DIR + "/database",
}
_cfp = ConfigParser()
if _cfp.read([APP_BASE_DIR + '/site.conf']):
@lru_cache(maxsize=50)
def _getconf(k1, k2, fallback=None):
if fallback is None:
fallback = DEFAULT_CONF.get((k1, k2))
v = _cfp.get(k1, k2, fallback=fallback)
return v
else:
def _getconf(k1, k2, fallback=None):
if fallback is None:
fallback = DEFAULT_CONF.get((k1, k2))
return fallback
#### misc. helpers ####
def _makelist(l):
if isinstance(l, (str, bytes, bytearray)):
return [l]
elif hasattr(l, '__iter__'):
return list(l)
elif l:
return [l]
else:
return []
#### REQUESTS ####
def fetch_updated_ids(baseurl):
try:
with open(_getconf("config", "database_dir") + "/latest_sync") as f:
last_sync = float(f.read().rstrip("\n"))
except (OSError, ValueError):
last_sync = 946681200.0 # Jan 1, 2000
r = requests.get(baseurl + "/_jsoninfo/changed/{ts}".format(ts=last_sync))
if r.status_code >= 400:
raise RuntimeError("sync unavailable")
return r.json()["ids"]
def update_page(p, pageinfo):
p.touched = datetime.datetime.fromtimestamp(pageinfo["touched"])
p.url = pageinfo["url"]
p.title = pageinfo["title"]
p.save()
p.change_tags(pageinfo["tags"])
assert len(pageinfo["text"]) == pageinfo["latest"]["length"]
pr = PageRevision.create(
page=p,
user_id=0,
comment='',
textref=PageText.create_content(pageinfo['text']),
pub_date=datetime.datetime.fromtimestamp(pageinfo["latest"]["pub_date"]),
length=pageinfo["latest"]["length"]
)
#### MAIN ####
def main():
baseurl = _getconf("sync", "master", "this")
if baseurl == "this":
print("unsyncable: master", file=sys.stderr)
return
if not baseurl.startswith(("http:", "https:")):
print("unsyncable: invalid url", repr(baseurl), file=sys.stderr)
return
passed, failed = 0, 0
for i in fetch_updated_ids(baseurl):
pageinfo_r = requests.post(baseurl + "/_jsoninfo/{i}".format(i=i))
if pageinfo_r.status_code >= 400:
print("\x1b[31mSkipping {i}: HTTP {s}\x1b[0m".format(i=i, s=pageinfo_r.status_code))
failed += 1
continue
pageinfo = pageinfo_r.json()
try:
p = Page[i]
except Page.DoesNotExist:
try:
p = Page.create(
id=i,
url=pageinfo['url'],
title=pageinfo['title'],
is_redirect=pageinfo['is_redirect'],
touched=datetime.datetime.fromtimestamp(pageinfo["touched"]),
is_sync = True
)
update_page(p, pageinfo)
except IntegrityError:
print("\x1b[31mSkipping {i}: Integrity error\x1b[0m".format(i=i))
failed += 1
continue
else:
if pageinfo["touched"] > p.touched:
update_page(p, pageinfo)
passed += 1
with open(DATABASE_DIR + "/last_sync", "w") as fw:
fw.write(str(time.time()))
if passed > 0 and failed == 0:
print("\x1b[32mSuccessfully updated {p} pages :)\x1b[0m".format(p=passed))
else:
print("\x1b[33m{p} pages successfully updated, {f} errors.\x1b[0m".format(p=passed, f=failed))
if __name__ == "__main__":
main()