134 lines
4 KiB
Python
134 lines
4 KiB
Python
|
|
"""
|
||
|
|
Helper module for sync.
|
||
|
|
|
||
|
|
(c) 2021 Sakuragasaki46.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import datetime, time
|
||
|
|
import requests
|
||
|
|
import sys, os
|
||
|
|
from configparser import ConfigParser
|
||
|
|
from app import Page, PageRevision, PageText
|
||
|
|
from peewee import IntegrityError
|
||
|
|
from functools import lru_cache
|
||
|
|
|
||
|
|
## CONSTANTS ##
|
||
|
|
|
||
|
|
APP_BASE_DIR = os.path.dirname(__file__)
|
||
|
|
|
||
|
|
UPLOAD_DIR = APP_BASE_DIR + '/media'
|
||
|
|
DATABASE_DIR = APP_BASE_DIR + "/database"
|
||
|
|
|
||
|
|
#### GENERAL CONFIG ####
|
||
|
|
|
||
|
|
DEFAULT_CONF = {
|
||
|
|
('site', 'title'): 'Salvi',
|
||
|
|
('config', 'media_dir'): APP_BASE_DIR + '/media',
|
||
|
|
('config', 'database_dir'): APP_BASE_DIR + "/database",
|
||
|
|
}
|
||
|
|
|
||
|
|
_cfp = ConfigParser()
|
||
|
|
if _cfp.read([APP_BASE_DIR + '/site.conf']):
|
||
|
|
@lru_cache(maxsize=50)
|
||
|
|
def _getconf(k1, k2, fallback=None):
|
||
|
|
if fallback is None:
|
||
|
|
fallback = DEFAULT_CONF.get((k1, k2))
|
||
|
|
v = _cfp.get(k1, k2, fallback=fallback)
|
||
|
|
return v
|
||
|
|
else:
|
||
|
|
def _getconf(k1, k2, fallback=None):
|
||
|
|
if fallback is None:
|
||
|
|
fallback = DEFAULT_CONF.get((k1, k2))
|
||
|
|
return fallback
|
||
|
|
|
||
|
|
#### misc. helpers ####
|
||
|
|
|
||
|
|
def _makelist(l):
|
||
|
|
if isinstance(l, (str, bytes, bytearray)):
|
||
|
|
return [l]
|
||
|
|
elif hasattr(l, '__iter__'):
|
||
|
|
return list(l)
|
||
|
|
elif l:
|
||
|
|
return [l]
|
||
|
|
else:
|
||
|
|
return []
|
||
|
|
|
||
|
|
#### REQUESTS ####
|
||
|
|
|
||
|
|
def fetch_updated_ids(baseurl):
|
||
|
|
try:
|
||
|
|
with open(_getconf("config", "database_dir") + "/latest_sync") as f:
|
||
|
|
last_sync = float(f.read().rstrip("\n"))
|
||
|
|
except (OSError, ValueError):
|
||
|
|
last_sync = 946681200.0 # Jan 1, 2000
|
||
|
|
r = requests.get(baseurl + "/_jsoninfo/changed/{ts}".format(ts=last_sync))
|
||
|
|
if r.status_code >= 400:
|
||
|
|
raise RuntimeError("sync unavailable")
|
||
|
|
return r.json()["ids"]
|
||
|
|
|
||
|
|
def update_page(p, pageinfo):
|
||
|
|
p.touched = datetime.datetime.fromtimestamp(pageinfo["touched"])
|
||
|
|
p.url = pageinfo["url"]
|
||
|
|
p.title = pageinfo["title"]
|
||
|
|
p.save()
|
||
|
|
p.change_tags(pageinfo["tags"])
|
||
|
|
assert len(pageinfo["text"]) == pageinfo["latest"]["length"]
|
||
|
|
pr = PageRevision.create(
|
||
|
|
page=p,
|
||
|
|
user_id=0,
|
||
|
|
comment='',
|
||
|
|
textref=PageText.create_content(pageinfo['text']),
|
||
|
|
pub_date=datetime.datetime.fromtimestamp(pageinfo["latest"]["pub_date"]),
|
||
|
|
length=pageinfo["latest"]["length"]
|
||
|
|
)
|
||
|
|
|
||
|
|
#### MAIN ####
|
||
|
|
|
||
|
|
def main():
|
||
|
|
baseurl = _getconf("sync", "master", "this")
|
||
|
|
if baseurl == "this":
|
||
|
|
print("unsyncable: master", file=sys.stderr)
|
||
|
|
return
|
||
|
|
if not baseurl.startswith(("http:", "https:")):
|
||
|
|
print("unsyncable: invalid url", repr(baseurl), file=sys.stderr)
|
||
|
|
return
|
||
|
|
passed, failed = 0, 0
|
||
|
|
for i in fetch_updated_ids(baseurl):
|
||
|
|
pageinfo_r = requests.post(baseurl + "/_jsoninfo/{i}".format(i=i))
|
||
|
|
if pageinfo_r.status_code >= 400:
|
||
|
|
print("\x1b[31mSkipping {i}: HTTP {s}\x1b[0m".format(i=i, s=pageinfo_r.status_code))
|
||
|
|
failed += 1
|
||
|
|
continue
|
||
|
|
pageinfo = pageinfo_r.json()
|
||
|
|
try:
|
||
|
|
p = Page[i]
|
||
|
|
except Page.DoesNotExist:
|
||
|
|
try:
|
||
|
|
p = Page.create(
|
||
|
|
id=i,
|
||
|
|
url=pageinfo['url'],
|
||
|
|
title=pageinfo['title'],
|
||
|
|
is_redirect=pageinfo['is_redirect'],
|
||
|
|
touched=datetime.datetime.fromtimestamp(pageinfo["touched"]),
|
||
|
|
is_sync = True
|
||
|
|
)
|
||
|
|
update_page(p, pageinfo)
|
||
|
|
except IntegrityError:
|
||
|
|
print("\x1b[31mSkipping {i}: Integrity error\x1b[0m".format(i=i))
|
||
|
|
failed += 1
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
if pageinfo["touched"] > p.touched:
|
||
|
|
update_page(p, pageinfo)
|
||
|
|
passed += 1
|
||
|
|
with open(DATABASE_DIR + "/last_sync", "w") as fw:
|
||
|
|
fw.write(str(time.time()))
|
||
|
|
if passed > 0 and failed == 0:
|
||
|
|
print("\x1b[32mSuccessfully updated {p} pages :)\x1b[0m".format(p=passed))
|
||
|
|
else:
|
||
|
|
print("\x1b[33m{p} pages successfully updated, {f} errors.\x1b[0m".format(p=passed, f=failed))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|