From 6f67d125af855e40fd57b9f9860cc4d1d8634111 Mon Sep 17 00:00:00 2001
From: Yusur Princeps
Date: Wed, 8 Oct 2025 14:46:09 +0200
Subject: [PATCH] 0.2.0 initial commit
---
.gitignore | 32 +++
CHANGELOG.md | 5 +
Dockerfile | 0
README.md | 2 +
alembic/README | 1 +
alembic/env.py | 79 ++++++
alembic/script.py.mako | 24 ++
alembic/versions/3bfaa1b74794_.py | 77 ++++++
alembic/versions/63d014f73934_.py | 94 +++++++
alembic/versions/fbf0bcc3368a_.py | 28 ++
pyproject.toml | 33 +++
wordenizer.c | 291 +++++++++++++++++++++
xefyl/__init__.py | 182 +++++++++++++
xefyl/__main__.py | 5 +
xefyl/cli.py | 38 +++
xefyl/hashing.py | 110 ++++++++
xefyl/image_tracker.py | 236 +++++++++++++++++
xefyl/interutils.py | 11 +
xefyl/models.py | 265 +++++++++++++++++++
xefyl/search_engine.py | 62 +++++
xefyl/static/style.css | 35 +++
xefyl/templates/404.html | 12 +
xefyl/templates/500.html | 12 +
xefyl/templates/base.html | 14 +
xefyl/templates/collection.html | 26 ++
xefyl/templates/collectionlist.html | 23 ++
xefyl/templates/home.html | 14 +
xefyl/templates/htmldir.html | 12 +
xefyl/templates/inc/imagegrid.html | 59 +++++
xefyl/templates/inc/newcollectionlink.html | 5 +
xefyl/templates/inc/title.html | 16 ++
xefyl/templates/tracker.html | 75 ++++++
xefyl/utils.py | 65 +++++
xefyl/website/__init__.py | 13 +
xefyl/website/accounts.py | 14 +
xefyl/website/htmlfiles.py | 29 ++
xefyl/website/images.py | 51 ++++
xefyl/wordenizer.py | 1 +
38 files changed, 2051 insertions(+)
create mode 100644 .gitignore
create mode 100644 CHANGELOG.md
create mode 100644 Dockerfile
create mode 100644 README.md
create mode 100644 alembic/README
create mode 100644 alembic/env.py
create mode 100644 alembic/script.py.mako
create mode 100644 alembic/versions/3bfaa1b74794_.py
create mode 100644 alembic/versions/63d014f73934_.py
create mode 100644 alembic/versions/fbf0bcc3368a_.py
create mode 100644 pyproject.toml
create mode 100644 wordenizer.c
create mode 100644 xefyl/__init__.py
create mode 100644 xefyl/__main__.py
create mode 100644 xefyl/cli.py
create mode 100644 xefyl/hashing.py
create mode 100644 xefyl/image_tracker.py
create mode 100644 xefyl/interutils.py
create mode 100644 xefyl/models.py
create mode 100644 xefyl/search_engine.py
create mode 100644 xefyl/static/style.css
create mode 100644 xefyl/templates/404.html
create mode 100644 xefyl/templates/500.html
create mode 100644 xefyl/templates/base.html
create mode 100644 xefyl/templates/collection.html
create mode 100644 xefyl/templates/collectionlist.html
create mode 100644 xefyl/templates/home.html
create mode 100644 xefyl/templates/htmldir.html
create mode 100644 xefyl/templates/inc/imagegrid.html
create mode 100644 xefyl/templates/inc/newcollectionlink.html
create mode 100644 xefyl/templates/inc/title.html
create mode 100644 xefyl/templates/tracker.html
create mode 100644 xefyl/utils.py
create mode 100644 xefyl/website/__init__.py
create mode 100644 xefyl/website/accounts.py
create mode 100644 xefyl/website/htmlfiles.py
create mode 100644 xefyl/website/images.py
create mode 100644 xefyl/wordenizer.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6975c2c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,32 @@
+\#*\#
+.\#*
+**~
+.*.swp
+**.pyc
+**.pyo
+**.egg-info
+__pycache__/
+database/
+data/
+thumb/
+node_modules/
+.env
+.env.*
+.venv
+env
+venv
+venv-*/
+alembic.ini
+config/
+.err
+conf/
+config.json
+data/
+.build/
+dist/
+/target
+.vscode
+/run.sh
+**/static/css
+**/static/js
+wordenizer
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..3186b20
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,5 @@
+# Changelog
+
+## 0.2.0
+
+- Initial commit
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dd49caa
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2 @@
+# Sakuragasaki46 Surf
+
diff --git a/alembic/README b/alembic/README
new file mode 100644
index 0000000..98e4f9c
--- /dev/null
+++ b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
\ No newline at end of file
diff --git a/alembic/env.py b/alembic/env.py
new file mode 100644
index 0000000..d01b130
--- /dev/null
+++ b/alembic/env.py
@@ -0,0 +1,79 @@
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+ fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+from xefyl import models
+target_metadata = models.Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+ """Run migrations in 'offline' mode.
+
+ This configures the context with just a URL
+ and not an Engine, though an Engine is acceptable
+ here as well. By skipping the Engine creation
+ we don't even need a DBAPI to be available.
+
+ Calls to context.execute() here emit the given string to the
+ script output.
+
+ """
+ url = config.get_main_option("sqlalchemy.url")
+ context.configure(
+ url=url,
+ target_metadata=target_metadata,
+ literal_binds=True,
+ dialect_opts={"paramstyle": "named"},
+ )
+
+ with context.begin_transaction():
+ context.run_migrations()
+
+
+def run_migrations_online() -> None:
+ """Run migrations in 'online' mode.
+
+ In this scenario we need to create an Engine
+ and associate a connection with the context.
+
+ """
+ connectable = engine_from_config(
+ config.get_section(config.config_ini_section, {}),
+ prefix="sqlalchemy.",
+ poolclass=pool.NullPool,
+ )
+
+ with connectable.connect() as connection:
+ context.configure(
+ connection=connection, target_metadata=target_metadata
+ )
+
+ with context.begin_transaction():
+ context.run_migrations()
+
+
+if context.is_offline_mode():
+ run_migrations_offline()
+else:
+ run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
new file mode 100644
index 0000000..55df286
--- /dev/null
+++ b/alembic/script.py.mako
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+ ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+ ${downgrades if downgrades else "pass"}
diff --git a/alembic/versions/3bfaa1b74794_.py b/alembic/versions/3bfaa1b74794_.py
new file mode 100644
index 0000000..e71d11e
--- /dev/null
+++ b/alembic/versions/3bfaa1b74794_.py
@@ -0,0 +1,77 @@
+"""empty message
+
+Revision ID: 3bfaa1b74794
+Revises:
+Create Date: 2023-11-19 15:50:38.211123
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '3bfaa1b74794'
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.create_table('image',
+ sa.Column('id', sa.BigInteger(), nullable=False),
+ sa.Column('type', sa.SmallInteger(), nullable=True),
+ sa.Column('format', sa.SmallInteger(), nullable=True),
+ sa.Column('name', sa.String(length=256), nullable=True),
+ sa.Column('hash', sa.String(length=256), nullable=True),
+ sa.Column('date', sa.DateTime(), nullable=True),
+ sa.Column('size', sa.BigInteger(), nullable=True),
+ sa.Column('pathname', sa.String(length=8192), nullable=True),
+ sa.PrimaryKeyConstraint('id'),
+ sa.UniqueConstraint('pathname')
+ )
+ op.create_index(op.f('ix_image_date'), 'image', ['date'], unique=False)
+ op.create_index(op.f('ix_image_hash'), 'image', ['hash'], unique=False)
+ op.create_index(op.f('ix_image_name'), 'image', ['name'], unique=False)
+ op.create_index(op.f('ix_image_type'), 'image', ['type'], unique=False)
+ op.create_table('video',
+ sa.Column('id', sa.BigInteger(), nullable=False),
+ sa.Column('type', sa.SmallInteger(), nullable=True),
+ sa.Column('format', sa.SmallInteger(), nullable=True),
+ sa.Column('name', sa.String(length=256), nullable=True),
+ sa.Column('duration', sa.Float(), nullable=True),
+ sa.Column('date', sa.DateTime(), nullable=True),
+ sa.Column('size', sa.BigInteger(), nullable=True),
+ sa.Column('pathname', sa.String(length=8192), nullable=True),
+ sa.PrimaryKeyConstraint('id'),
+ sa.UniqueConstraint('pathname')
+ )
+ op.create_index(op.f('ix_video_date'), 'video', ['date'], unique=False)
+ op.create_index(op.f('ix_video_duration'), 'video', ['duration'], unique=False)
+ op.create_index(op.f('ix_video_name'), 'video', ['name'], unique=False)
+ op.create_index(op.f('ix_video_type'), 'video', ['type'], unique=False)
+ op.create_table('imagetag',
+ sa.Column('id', sa.BigInteger(), nullable=False),
+ sa.Column('image_id', sa.BigInteger(), nullable=True),
+ sa.Column('name', sa.String(length=64), nullable=True),
+ sa.ForeignKeyConstraint(['image_id'], ['image.id'], ),
+ sa.PrimaryKeyConstraint('id'),
+ sa.UniqueConstraint('image_id', 'name')
+ )
+ # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_table('imagetag')
+ op.drop_index(op.f('ix_video_type'), table_name='video')
+ op.drop_index(op.f('ix_video_name'), table_name='video')
+ op.drop_index(op.f('ix_video_duration'), table_name='video')
+ op.drop_index(op.f('ix_video_date'), table_name='video')
+ op.drop_table('video')
+ op.drop_index(op.f('ix_image_type'), table_name='image')
+ op.drop_index(op.f('ix_image_name'), table_name='image')
+ op.drop_index(op.f('ix_image_hash'), table_name='image')
+ op.drop_index(op.f('ix_image_date'), table_name='image')
+ op.drop_table('image')
+ # ### end Alembic commands ###
diff --git a/alembic/versions/63d014f73934_.py b/alembic/versions/63d014f73934_.py
new file mode 100644
index 0000000..e200bab
--- /dev/null
+++ b/alembic/versions/63d014f73934_.py
@@ -0,0 +1,94 @@
+"""empty message
+
+Revision ID: 63d014f73934
+Revises: 3bfaa1b74794
+Create Date: 2023-12-02 21:50:41.457594
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '63d014f73934'
+down_revision = '3bfaa1b74794'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.create_table('user',
+ sa.Column('id', sa.BigInteger(), nullable=False),
+ sa.Column('username', sa.String(length=30), nullable=True),
+ sa.Column('display_name', sa.String(length=64), nullable=True),
+ sa.Column('passhash', sa.String(length=256), nullable=True),
+ sa.Column('joined_at', sa.String(length=256), nullable=True),
+ sa.PrimaryKeyConstraint('id'),
+ sa.UniqueConstraint('username')
+ )
+ op.create_table('imagecollection',
+ sa.Column('id', sa.BigInteger(), nullable=False),
+ sa.Column('name', sa.String(length=128), nullable=False),
+ sa.Column('owner_id', sa.BigInteger(), nullable=True),
+ sa.Column('description', sa.String(length=4096), nullable=True),
+ sa.ForeignKeyConstraint(['owner_id'], ['user.id'], ),
+ sa.PrimaryKeyConstraint('id')
+ )
+ op.create_table('imageincollection',
+ sa.Column('image_id', sa.BigInteger(), nullable=False),
+ sa.Column('collection_id', sa.BigInteger(), nullable=False),
+ sa.Column('since', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=True),
+ sa.ForeignKeyConstraint(['collection_id'], ['imagecollection.id'], ),
+ sa.ForeignKeyConstraint(['image_id'], ['image.id'], ),
+ sa.PrimaryKeyConstraint('image_id', 'collection_id')
+ )
+ op.create_index(op.f('ix_imageincollection_since'), 'imageincollection', ['since'], unique=False)
+ op.drop_index('idx_60001_image_date', table_name='image')
+ op.drop_index('idx_60001_image_hash', table_name='image')
+ op.drop_index('idx_60001_image_name', table_name='image')
+ op.drop_index('idx_60001_image_pathname', table_name='image')
+ op.drop_index('idx_60001_image_type', table_name='image')
+ op.create_index(op.f('ix_image_date'), 'image', ['date'], unique=False)
+ op.create_index(op.f('ix_image_hash'), 'image', ['hash'], unique=False)
+ op.create_index(op.f('ix_image_name'), 'image', ['name'], unique=False)
+ op.create_index(op.f('ix_image_type'), 'image', ['type'], unique=False)
+ op.create_unique_constraint(None, 'image', ['pathname'])
+ op.drop_index('idx_59995_imagetag_image_id', table_name='imagetag')
+ op.drop_index('idx_59995_imagetag_image_id_name', table_name='imagetag')
+ op.create_unique_constraint(None, 'imagetag', ['image_id', 'name'])
+ op.create_foreign_key(None, 'imagetag', 'image', ['image_id'], ['id'])
+ op.create_index(op.f('ix_video_date'), 'video', ['date'], unique=False)
+ op.create_index(op.f('ix_video_duration'), 'video', ['duration'], unique=False)
+ op.create_index(op.f('ix_video_name'), 'video', ['name'], unique=False)
+ op.create_index(op.f('ix_video_type'), 'video', ['type'], unique=False)
+ op.create_unique_constraint(None, 'video', ['pathname'])
+ # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_constraint(None, 'video', type_='unique')
+ op.drop_index(op.f('ix_video_type'), table_name='video')
+ op.drop_index(op.f('ix_video_name'), table_name='video')
+ op.drop_index(op.f('ix_video_duration'), table_name='video')
+ op.drop_index(op.f('ix_video_date'), table_name='video')
+ op.drop_constraint(None, 'imagetag', type_='foreignkey')
+ op.drop_constraint(None, 'imagetag', type_='unique')
+ op.create_index('idx_59995_imagetag_image_id_name', 'imagetag', ['image_id', 'name'], unique=False)
+ op.create_index('idx_59995_imagetag_image_id', 'imagetag', ['image_id'], unique=False)
+ op.drop_constraint(None, 'image', type_='unique')
+ op.drop_index(op.f('ix_image_type'), table_name='image')
+ op.drop_index(op.f('ix_image_name'), table_name='image')
+ op.drop_index(op.f('ix_image_hash'), table_name='image')
+ op.drop_index(op.f('ix_image_date'), table_name='image')
+ op.create_index('idx_60001_image_type', 'image', ['type'], unique=False)
+ op.create_index('idx_60001_image_pathname', 'image', ['pathname'], unique=False)
+ op.create_index('idx_60001_image_name', 'image', ['name'], unique=False)
+ op.create_index('idx_60001_image_hash', 'image', ['hash'], unique=False)
+ op.create_index('idx_60001_image_date', 'image', ['date'], unique=False)
+ op.drop_index(op.f('ix_imageincollection_since'), table_name='imageincollection')
+ op.drop_table('imageincollection')
+ op.drop_table('imagecollection')
+ op.drop_table('user')
+ # ### end Alembic commands ###
diff --git a/alembic/versions/fbf0bcc3368a_.py b/alembic/versions/fbf0bcc3368a_.py
new file mode 100644
index 0000000..fb0a30e
--- /dev/null
+++ b/alembic/versions/fbf0bcc3368a_.py
@@ -0,0 +1,28 @@
+"""empty message
+
+Revision ID: fbf0bcc3368a
+Revises: 63d014f73934
+Create Date: 2023-12-31 16:10:00.055273
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'fbf0bcc3368a'
+down_revision = '63d014f73934'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ pass
+ # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+ # ### commands auto generated by Alembic - please adjust! ###
+ pass
+ # ### end Alembic commands ###
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..bb1d725
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,33 @@
+[project]
+name = "sakuragasaki46_xefyl"
+authors = [
+ { name = "Sakuragasaki46" }
+]
+dynamic = ["version"]
+dependencies = [
+ "Python-Dotenv>=1.0.0",
+ "Flask",
+ "SQLAlchemy",
+ "Flask-SqlAlchemy",
+ "Flask-Login",
+ "Flask-WTF",
+ "Pillow",
+ "Pillow-Heif",
+ "psycopg2",
+ "alembic"
+]
+requires-python = ">=3.10"
+classifiers = [
+ "Private :: X"
+]
+
+[project.optional-dependencies]
+dev = [
+ "Cython"
+]
+
+[tool.setuptools]
+packages = ["xefyl"]
+
+[tool.setuptools.dynamic]
+version = { attr = "xefyl.__version__" }
\ No newline at end of file
diff --git a/wordenizer.c b/wordenizer.c
new file mode 100644
index 0000000..6852de4
--- /dev/null
+++ b/wordenizer.c
@@ -0,0 +1,291 @@
+/* Tokenize words, letter by letter.
+ * Supports Latin characters.
+ * Compile with gcc
+ *
+ * (c) 2021 Sakuragasaki46
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+// user interface
+const char HELP[] = "%s - words in a text file\n\
+Prints a list of words in lowercase form, one per line.\n\
+Supports UTF-8 strings.\n\
+\n\
+Usage: %s [filename]\n\
+\n\
+Arguments:\n\
+\tfilename\t\tfilename to analyze (defaults to stdin)\n\
+\t-s\t\tcase sensitive\n\
+\t-n\t\tdon’t normalize non-ASCII characters\n\
+\t--help\t\tshows this help message and exits\n\
+\t--version\t\tshow version and exit\n";
+const char VERSION[] = "0.2";
+
+// behavior switches
+int cASE_SENSITIVE = 0;
+int nO_NORMALIZE = 0;
+
+// table data for normCharcode()
+// if character in first table is uppercase, data from second table
+// is read at same position
+
+// latin-1
+const char ASCII_DATA_192[] =
+ "aaaaaaAceeeeiiiiDnooooo OuuuuyTSaaaaaaAceeeeiiiiDnooooo OuuuuyTy";
+const char ASCII_DATA_192_B[] =
+ " e h e hs e h e h ";
+
+// latin extended A
+const char ASCII_DATA_U0100[] =
+ "aaaaaaccCCccCCddDDeeeeeeeeYYgggggggghhhhiiiiiiiiiiIIjjkkklllllll"
+ "lllnnnnnnnNNooooooOOrrrrrrssSSSSSSTTttttuuuuuuOOuuuuwwyyyZZzzZZs";
+const char ASCII_DATA_U0100_B[] =
+ " hh hh jj ee jj "
+ " gg ee hhhhhhss uu hh hh ";
+
+
+typedef struct string_linkedlist_s {
+ char * s;
+ size_t len;
+ size_t bufsize;
+ //struct string_linkedlist_s *next;
+} StringLL;
+
+int main_file(const char* filename);
+int main_stdin(void);
+int tok_words(FILE *fr);
+int readCharcode(FILE* fh);
+int readWagonChar(FILE *fh);
+int normCharcode(char *s, size_t pos, size_t offset, const char *table1, const char *table2);
+
+StringLL* StringLL_new();
+StringLL* StringLL_grow(StringLL*);
+StringLL* StringLL_appendchar(StringLL*, int);
+StringLL* StringLL_next_print(StringLL*);
+void StringLL_destroy(StringLL*);
+
+int main(int argc, char *argv[]) {
+ int curarg = 1;
+
+ // set locale, otherwise towlower() does not work
+ setlocale(LC_ALL, "en_US.UTF-8");
+
+ if (argc == 1) {
+ return main_stdin();
+ }
+ while (curarg < argc){
+ if (!strcmp(argv[1], "--help")) {
+ printf(HELP, argv[0], argv[0]);
+ exit(0);
+ } else if (!strcmp(argv[curarg], "--version")) {
+ puts(VERSION);
+ exit(0);
+ } else if (!strcmp(argv[curarg], "-s")) {
+ cASE_SENSITIVE = 1;
+ } else if (!strcmp(argv[curarg], "-n")) {
+ nO_NORMALIZE = 1;
+ } else if (!strcmp(argv[curarg], "-") || argv[curarg][0] == '\0') {
+ return main_stdin();
+ } else if (strncmp(argv[curarg], "-", 1)) {
+ return main_file(argv[curarg]);
+ } else {
+ fprintf(stderr, "Unknown option: \"%s\"\n", argv[curarg]);
+ return 1;
+ }
+ }
+
+ return 0;
+};
+
+int main_file(const char* filename){
+ FILE *fh;
+
+ fh = fopen(filename, "r");
+ if (!fh) {
+ fprintf(stderr, "[Errno %d] Could not open file \"%s\"\n", errno, filename);
+ exit(1);
+ }
+
+ return tok_words(fh);
+}
+
+int main_stdin(void){
+ return tok_words(stdin);
+}
+
+/**
+ * Main activity.
+ *
+ * @param fh a read-only file handle
+ */
+int tok_words(FILE *fh){
+ int charcode = 0, intoword = 0;
+ StringLL *lend;
+
+ lend = StringLL_new();
+
+ while ((charcode = readCharcode(fh)) >= 0) {
+ if (iswalpha(charcode)){
+ intoword++;
+
+ // locale lower case
+ if (!cASE_SENSITIVE) {
+ charcode = towlower(charcode);
+ }
+
+ lend = StringLL_appendchar(lend, charcode);
+ } else if (intoword > 0) {
+ intoword = 0;
+
+ lend = StringLL_next_print(lend);
+ }
+ }
+
+ if (intoword){
+ lend = StringLL_next_print(lend);
+ }
+
+ StringLL_destroy(lend);
+ return 0;
+}
+
+/**
+ * Read an UTF-8 character, and return its code.
+ * Returns a non-negative value on success,
+ * -1 on EOF.
+ */
+int readCharcode(FILE* fh){
+ int c, c2;
+
+ c = fgetc(fh);
+ if (c < 0) return -1;
+ if (0 <= c && c < 128) return c;
+ else if (192 <= c && c < 224){
+ c -= 192;
+ c *= 64;
+ c2 = readWagonChar(fh);
+ if (c2 < 0) return 0;
+ c += c2;
+ return c;
+ } else if (224 <= c && c < 240) {
+ c -= 224;
+ c *= 64;
+ c2 = readWagonChar(fh);
+ if (c2 < 0) return 0;
+ c += c2;
+ c *= 64;
+ c2 = readWagonChar(fh);
+ if (c2 < 0) return 0;
+ c += c2;
+ return c;
+ } else {
+ return 0;
+ }
+}
+
+int readWagonChar(FILE * fh){
+ int c;
+
+ c = fgetc(fh);
+ if (c < 128 || c >= 192) return -1;
+ return c - 128;
+}
+
+int normCharcode(char * s, size_t pos, size_t offset, const char *table1, const char *table2){
+ char c1;
+
+ // if character in first table is uppercase, data from second table
+ // is read at same position
+ c1 = table1[offset];
+ if (c1 == ' '){
+ return 0;
+ } else if (isupper(c1)) {
+ s[pos++] = tolower(c1);
+ s[pos++] = table2[offset];
+ return 2;
+ } else {
+ s[pos++] = c1;
+ return 1;
+ }
+}
+
+/***** StringLL functions *******/
+
+
+StringLL* StringLL_new () {
+ StringLL* l;
+
+ l = (StringLL*) malloc (sizeof(StringLL));
+ l->bufsize = 16;
+ l->s = (char *) malloc(l->bufsize);
+ l->len = 0;
+
+ return l;
+}
+
+StringLL* StringLL_grow (StringLL* l){
+ l->bufsize *= 2;
+ l->s = (char*) realloc(l->s, l->bufsize);
+ return l;
+}
+
+StringLL* StringLL_appendchar(StringLL* l, int c){
+ if (c == 0) {
+ return l;
+ }
+
+ if (l->bufsize - l->len <= 4){
+ l = StringLL_grow(l);
+ }
+
+ if (c < 128){
+ // ascii
+ l->s[l->len++] = (char) c;
+ } else if (!nO_NORMALIZE && 192 <= c && c < 256) {
+ // latin-1 supplement
+ l->len += normCharcode(l->s, l->len, c - 192, ASCII_DATA_192, ASCII_DATA_192_B);
+ } else if (!nO_NORMALIZE && 256 <= c && c < 384) {
+ // latin extended-A
+ l->len += normCharcode(l->s, l->len, c - 256, ASCII_DATA_U0100, ASCII_DATA_U0100_B);
+ } else if (c < 0x800) {
+ // 2 byte UTF-8
+ l->s[l->len++] = (char) (c / 64) | 192;
+ l->s[l->len++] = (char) (c % 64) | 128;
+ } else if (c < 0x10000) {
+ // 3 byte UTF-8
+ l->s[l->len++] = (char) (c / 0x1000) | 224;
+ l->s[l->len++] = (char) (c % 0x1000 / 64) | 128;
+ l->s[l->len++] = (char) (c % 64) | 128;
+ } else {
+ // 4-byte UTF-8
+ l->s[l->len++] = (char) (c / 0x40000) | 240;
+ l->s[l->len++] = (char) (c % 0x40000 / 0x1000) | 128;
+ l->s[l->len++] = (char) (c % 0x1000 / 64) | 128;
+ l->s[l->len++] = (char) (c / 64) | 128;
+ }
+ return l;
+}
+
+StringLL* StringLL_next_print (StringLL *l){
+ StringLL *next;
+ l->s[l->len] = 0;
+ printf("%s\n", l->s);
+ next = StringLL_new();
+ free(l->s);
+ free(l);
+ return next;
+}
+
+void StringLL_destroy (StringLL *l){
+ free(l->s);
+ free(l);
+}
+
diff --git a/xefyl/__init__.py b/xefyl/__init__.py
new file mode 100644
index 0000000..147c43c
--- /dev/null
+++ b/xefyl/__init__.py
@@ -0,0 +1,182 @@
+"""
+Xefyl - A link and image indexer with tags
+
+(c) 2023, 2025 Sakuragasaki46
+See LICENSE for copying info
+"""
+
+import datetime
+from functools import partial
+from logging import warning
+import os, sys
+import re
+from flask import Flask, redirect, request, render_template, abort
+from flask_login import LoginManager
+from flask_wtf.csrf import CSRFProtect
+from flask_sqlalchemy import SQLAlchemy
+import dotenv
+from sqlalchemy import func, select, create_engine
+from sqlalchemy.exc import ProgrammingError
+import warnings
+
+__version__ = "0.2.0"
+
+APP_BASE_DIR = os.path.dirname(os.path.dirname(__file__))
+
+dotenv.load_dotenv(os.path.join(APP_BASE_DIR, ".env"))
+
+correct_database_url = os.environ["DATABASE_URL"]
+
+def fix_database_url():
+ if app.config['SQLALCHEMY_DATABASE_URI'] != correct_database_url:
+ warnings.warn('mod_wsgi got the database wrong!', RuntimeWarning)
+ app.config['SQLALCHEMY_DATABASE_URI'] = correct_database_url
+
+def create_session_interactively():
+ '''Helper for interactive session management'''
+ engine = create_engine(correct_database_url)
+ return db.Session(bind = engine)
+
+CSI = create_session_interactively
+
+from .models import db, Image, ImageCollection, ImageTag, User, Video
+from .utils import DhashConverter, ImgExtConverter, VideoExtConverter
+
+app = Flask(__name__)
+app.secret_key = os.getenv("SECRET_KEY")
+app.config["SQLALCHEMY_DATABASE_URI"] = correct_database_url
+
+db.init_app(app)
+
+login_manager = LoginManager(app)
+login_manager.login_view = "accounts.login"
+
+app.url_map.converters['dhash'] = DhashConverter
+app.url_map.converters['img_ext'] = ImgExtConverter
+app.url_map.converters['video_ext'] = VideoExtConverter
+
+
+## helpers
+
+
+
+def paginated_query(q, n_per_page, argname="page"):
+ if isinstance(argname, str) and not argname.isdigit():
+ n = int(request.args.get(argname, 1))
+ else:
+ n = int(argname)
+
+ return db.paginate(q, page=n, per_page=n_per_page)
+
+render_tempIate = render_template
+
+## request hooks
+
+@app.before_request
+def _before_request():
+ pass
+
+
+@app.context_processor
+def _inject_variables():
+ return dict(
+ re=re, datetime=datetime, min=min, max=max,
+ app_name = os.getenv('APP_NAME')
+ )
+
+@login_manager.user_loader
+def _inject_user(user_id):
+ return db.session.execute(select(User).where(User.id == user_id)).scalar()
+
+## url map converters
+
+@app.route("/")
+def homepage():
+ return render_template("home.html")
+
+@app.route("/tracker")
+def show_tracker():
+ q = select(Image)
+
+ if "date" in request.args:
+ dateym = request.args["date"]
+ try:
+ datey, datem = (int(x) for x in dateym.split('-'))
+ d1 = datetime.datetime(datey, datem, 1).isoformat("T")
+ d2 = datetime.datetime(datey + (datem == 12), datem + 1 if datem < 12 else 1, 1).isoformat("T")
+ q = q.where((Image.date < d2) & (Image.date >= d1))
+ except Exception as e:
+ # ignore the exception
+ pass
+
+ if "type" in request.args:
+ try:
+ typ = int(request.args('type'))
+ q = q.where(Image.type == typ)
+ except Exception as e:
+ pass
+
+ if 'hash' in request.args:
+ q = q.where(Image.hash.like(request.args['hash'] + '%'))
+
+ if 'tags' in request.args:
+ tags = request.args["tags"].split()
+ print(tags, file=sys.stderr)
+ q = q.join(ImageTag).where(ImageTag.name.in_(tags))
+
+
+ if 'sort' in request.args:
+ sortby = request.args['sort']
+ if sortby == 'hash':
+ q = q.order_by(Image.hash)
+ elif sortby == 'oldest':
+ q = q.order_by(Image.date.desc())
+ else:
+ q = q.order_by(Image.date.desc())
+ else:
+ q = q.order_by(Image.date.desc())
+
+ return render_tempIate(
+ "tracker.html", is_video_tracker=False,
+ l=paginated_query(q, 50, "page")
+ )
+
+@app.route('/collection/')
+def show_collection(id):
+ coll = db.session.execute(select(ImageCollection).where(ImageCollection.id == id)).scalar()
+
+ if not coll:
+ abort(404)
+
+ return render_template("collection.html", coll=coll, coll_items=paginated_query(coll.images, 50, "page"))
+
+@app.route('/tracker_video')
+def show_video_tracker():
+ q = select(Video)
+ if 'date' in request.args:
+ dateym = request.args['date']
+ try:
+ datey, datem = (int(x) for x in dateym.split('-'))
+ d1 = datetime.datetime(datey, datem, 1).timestamp()
+ d2 = datetime.datetime(datey + (datem == 12), datem + 1 if datem < 12 else 1, 1).timestamp()
+ q = q.where((Video.date < d2) & (Video.date >= d1))
+ except Exception as e:
+ # ignore the exception
+ pass
+ return render_tempIate('tracker.html', l=paginated_query(q.order_by(Video.date.desc()), 50, "page"), is_video_tracker=True)
+
+
+@app.errorhandler(404)
+def error_404(e):
+ return render_template('404.html'), 404
+
+
+@app.errorhandler(500)
+def error_500(e):
+ g.no_user = True
+ return render_template('500.html'), 500
+
+from .website import blueprints
+for bp in blueprints:
+ app.register_blueprint(bp)
+
diff --git a/xefyl/__main__.py b/xefyl/__main__.py
new file mode 100644
index 0000000..6b68667
--- /dev/null
+++ b/xefyl/__main__.py
@@ -0,0 +1,5 @@
+
+from .cli import main
+
+main()
+
diff --git a/xefyl/cli.py b/xefyl/cli.py
new file mode 100644
index 0000000..ea27095
--- /dev/null
+++ b/xefyl/cli.py
@@ -0,0 +1,38 @@
+
+
+import sys
+import argparse
+
+from . import __version__ as version
+
+def main_crawl(args):
+ ...
+
+def make_parser():
+ parser = argparse.ArgumentParser(
+ prog='xefyl',
+ description='image tracker and search engine'
+ )
+ parser.add_argument('--version', '-V', action="version", version=version)
+
+ subparsers = parser.add_subparsers()
+
+ parser_c = subparsers.add_parser('crawl', help='crawl the Web', aliases=('c',))
+ parser_c.set_defaults(action=main_crawl)
+
+ ...
+
+ return parser
+
+
+def main():
+ parser = make_parser()
+ args = parser.parse_args()
+ if hasattr(args, 'action') and callable(args.action):
+ args.action(args)
+ else:
+ parser.print_help()
+
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/xefyl/hashing.py b/xefyl/hashing.py
new file mode 100644
index 0000000..f5f525c
--- /dev/null
+++ b/xefyl/hashing.py
@@ -0,0 +1,110 @@
+'''
+Helpers for image hashing.
+
+Image hashes are 128-bit numbers divided in two 64-bit sequences.
+The former is a differential hash, computed considering values of two adjacent pixels.
+The latter is a color hash, formed in turn comparing one channel with the two others, alternately.
+
+(C) 2021-2025 Sakuragasaki46.
+'''
+
+from PIL import Image, ImageOps
+import re
+import warnings
+from functools import reduce
+
+try:
+ from pillow_heif import register_heif_opener
+ register_heif_opener()
+except ImportError:
+ warnings.warn('pillow_heif not installed, .heic file format not supported')
+
+DHASH_EXP_RE = r'([a-f0-9]{16})(?:-([a-f0-9]{16}))?'
+
+
+# dhash object
+class Dhash(object):
+ def __init__(self, val):
+ mo = re.match(DHASH_EXP_RE, val)
+ if not mo:
+ raise ValueError('Not a valid dhash')
+ dpart, cpart = mo.group(1), mo.group(2)
+ self._d = dpart
+ self._c = cpart
+ def __hash__(self):
+ return hash((self._d, self._c))
+ def __str__(self):
+ if self._c:
+ return self._d + '-' + self._c
+ else:
+ return self._d
+ def __repr__(self):
+ return '{}({!r})'.format(self.__class__.__name__, str(self))
+ def __eq__(self, other):
+ return self._d == other._d and self._c == other._c
+ def same_d(self, other):
+ return self._d == other._d
+ def __lt__(self, other):
+ return self._d < other._d
+ @property
+ def diff(self):
+ return self._d
+ @property
+ def color(self):
+ return self._c
+ def bw(self):
+ return self.__class__(self._d)
+
+
+
+# image hash helpers and stuff
+
+def _pixel(q):
+ '''Workaround for BW images'''
+ if isinstance(q, int):
+ return q, q, q
+ return q
+
+def compress_bytes(arr):
+ '''
+ Convert an array of integers into a hex sequence.
+ Positive numbers get 1; everything else 0.
+ '''
+ return '{{:0{}x}}'.format(len(arr) // 4).format(reduce(lambda a, b: a*2+b,
+ [a > 0 for a in arr], 0))
+
+def _dhash(im):
+ '''
+ Differential hash of a 9x8 image.
+ '''
+ return compress_bytes([im.getpixel((a+1,b)) - im.getpixel((a,b)) for a in range(8) for b in range(8)])
+
+
+def _dhash_color(im):
+ '''
+ Differential hash of a 9x8 image, color by color.
+ '''
+ l = []
+ for i in range(64):
+ a, b = divmod(i, 8)
+ p1 = _pixel(im.getpixel((a,b)))
+ p2 = _pixel(im.getpixel((a+1,b)))
+ l.append(p1[i % 3] * 2 - p2[(i+1) % 3] - p2[(i+2) % 3])
+ return compress_bytes(l)
+
+def full_hash(im, donly=False):
+ '''
+ Main hash function.
+ Takes a image file, shrinks its content to a 9x8 square, then computes
+ a hash of its contents.
+ Both differential and color hashes are computed.
+ If donly is True, only differential one is.
+ The differential part is separated by the color part with a hyphen.
+ '''
+ im_mini = im.resize((9,8))
+ im_mbw = ImageOps.grayscale(im_mini)
+ h = _dhash(im_mbw)
+ if not donly:
+ h += '-'
+ h += _dhash_color(im_mini)
+ return Dhash(h)
diff --git a/xefyl/image_tracker.py b/xefyl/image_tracker.py
new file mode 100644
index 0000000..f0cbf40
--- /dev/null
+++ b/xefyl/image_tracker.py
@@ -0,0 +1,236 @@
+import argparse
+import datetime
+import glob
+import json
+import os
+import subprocess
+import sys
+import warnings
+
+from . import APP_BASE_DIR
+from .models import IMG_EXTENSIONS, VIDEO_EXTENSIONS, Image
+from sqlalchemy import create_engine, insert, select, update
+from sqlalchemy.exc import InternalError
+from sqlalchemy.orm import Session
+from PIL import Image as _Image
+from .hashing import full_hash
+
+IMG_THUMB_PATH = os.path.join(APP_BASE_DIR, 'data/thumb/16x16')
+
+def create_engine_from_env():
+ return create_engine(os.getenv('DATABASE_URL'))
+
+class ImageTracker(object):
+ def __init__(self, engine=None):
+ self.engine = engine or create_engine_from_env()
+ self.session = None
+
+ def ensure_session(self):
+ if self.session is None:
+ raise RuntimeError("no session set; did you forget to run this inside a context manager?")
+
+ def add_file(self, path, typ):
+ self.ensure_session()
+
+ _, name = os.path.split(path)
+ name, ext = os.path.splitext(name)
+ ext = ext.lstrip(".")
+
+ if ext in IMG_EXTENSIONS:
+ self.session.begin_nested()
+
+ if self.session.execute(select(Image).where(Image.pathname == path)).scalar() is not None:
+ self.session.rollback()
+ return 0
+
+ try:
+ f = _Image.open(path)
+ except Exception as e:
+ warnings.warn(f"could not open image {path!r}: {e.__class__.__name__}: {e}")
+ self.session.rollback()
+ return 0
+
+ formt = IMG_EXTENSIONS[ext]
+ date = datetime.datetime.fromtimestamp(os.path.getmtime(path))
+ size = os.path.getsize(path)
+ h = full_hash(f)
+
+ try:
+ self.session.execute(insert(Image).values(
+ type = typ,
+ format = formt,
+ name = name,
+ hash = str(h),
+ date = date,
+ size = size,
+ pathname = path
+ ))
+ self.session.commit()
+ return 1
+ except Exception as e:
+ warnings.warn(f"exception ignored while inserting: {e.__class__.__name__}: {e}")
+ self.session.rollback()
+ return 0
+ elif ext in VIDEO_EXTENSIONS:
+ self.session.begin_nested()
+
+
+
+ try:
+ duration = float(
+ subprocess.run([
+ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of",
+ "default=noprint_wrappers=1:nokey=1", path
+ ])
+ )
+ except Exception as e:
+ warnings.warn(f"Invalid duration: {e}")
+ return 0
+
+ formt = VIDEO_EXTENSIONS[ext]
+ date = datetime.datetime.fromtimestamp(os.path.getmtime(path))
+ size = os.path.getsize(path)
+
+ try:
+ self.session.execute(insert(Image).values(
+ type = typ,
+ format = formt,
+ name = name,
+ hash = h,
+ date = date,
+ size = size,
+ pathname = path
+ ))
+ self.session.commit()
+ return 1
+ except Exception:
+ warnings.warn(f"exception ignored while inserting: {e.__class__.__name__}: {e.__class__.__name__}: {e}")
+ self.session.rollback()
+ return 0
+ else:
+ raise TypeError(f"invalid extension: {ext!r}")
+
+ def add_file_glob(self, path, typ):
+ self.ensure_session()
+
+ counter = 0
+
+ for f in glob.glob(path):
+ try:
+ counter += self.add_file(f, typ)
+ except InternalError:
+ self.session.rollback()
+ raise
+ except Exception as e:
+ warnings.warn(f"Exception ignored: {e}")
+ return counter
+
+ def repair_filepath(self, imgid, basedir=None):
+ self.ensure_session()
+
+ orig_fp = imgid.pathname
+ orig_hash = imgid.hash_obj
+ orig_name = os.path.split(orig_fp)[1]
+ if not os.path.exists(orig_fp):
+ if not basedir:
+ basedir = os.path.dirname(orig_fp)
+ for fp, dirs, files in os.walk(basedir):
+ if orig_name in files:
+ new_fp = os.path.join(fp, orig_name)
+ if full_hash(_Image.open(new_fp)) == orig_hash:
+ try:
+ imgid.pathname = new_fp
+ self.session.commit()
+ except Exception:
+ # image entries colliding
+ warnings.warn('Pathname {!r} has already an entry, deleting instance'.format(new_fp))
+ delete(Image).where(Image.id == imgid.id).execute()
+ return new_fp
+ return orig_fp
+
+
+ def __enter__(self):
+ self.session = Session(self.engine, future=True)
+ return self
+
+ def __exit__(self, *exc_info):
+ if exc_info == (None, None, None) and self.session:
+ self.session.commit()
+ self.session = None
+ elif self.session:
+ self.session.rollback()
+
+ def update(self):
+ self.ensure_session()
+
+ config = json.load(open(os.path.join(APP_BASE_DIR, "config/tracker.json")))
+ for path, typ in config["sources"]:
+ print("now updating:", path, "...", file=sys.stderr, end=" ", flush=True)
+ count = self.add_file_glob(path, typ)
+ print(count, "files added", file=sys.stderr)
+
+ print("Generating thumbnails...", file=sys.stderr, end=" ", flush=True)
+ self.generate_16x16_thumbnails()
+ print("done")
+
+ def repair(self):
+ self.ensure_session()
+
+ counter = 0
+ print()
+
+ config = json.load(open(os.path.join(APP_BASE_DIR, 'config/repairtable.json')))
+
+ for i in self.session.execute(select(Image)).scalars():
+ for oldbase, newbase in config['changed']:
+ if i.pathname.startswith(oldbase):
+ self.session.begin_nested()
+ self.session.execute(
+ update(Image)
+ .where(Image.id == i.id)
+ .values(pathname = os.path.join(newbase, i.pathname[len(oldbase):].lstrip('/')))
+ )
+ self.session.commit()
+ print(f'\x1b[A{counter} file paths repaired')
+ counter += 1
+
+ print(f'{counter} file paths repaired')
+
+ def generate_16x16_thumbnails(self):
+ for i in self.session.execute(select(Image).order_by(Image.hash)).scalars():
+ img_path = os.path.join(IMG_THUMB_PATH, '{}.png'.format(i.hash))
+ if os.path.exists(img_path):
+ continue
+ img = _Image.open(i.pathname)
+ img_mini = img.resize((16, 16))
+ try:
+ img_mini.save(img_path)
+ except Exception as e:
+ warnings.warn(f'Exception ignored: {e.__class__.__name__}: {e}')
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--update', '-u', action='store_true', help='update the tracker')
+ parser.add_argument('--repair', '-r', action='store_true', help='repair file paths')
+ parser.add_argument('--genthumb', '-t', action='store_true', help='generate thumbnails')
+ args = parser.parse_args()
+
+ if args.update:
+ with ImageTracker() as tracker:
+ tracker.update()
+ print('Tracker updated!')
+
+ if args.repair:
+ with ImageTracker() as tracker:
+ tracker.repair()
+ print('Tracker repaired!')
+
+ if args.genthumb:
+ with ImageTracker() as tracker:
+ tracker.generate_16x16_thumbnails()
+ print('Generated thumbnails!')
+
+ print('Visit the tracker at ')
+
+
diff --git a/xefyl/interutils.py b/xefyl/interutils.py
new file mode 100644
index 0000000..44be34b
--- /dev/null
+++ b/xefyl/interutils.py
@@ -0,0 +1,11 @@
+
+from PIL import Image as _Image
+import subprocess
+import os
+
+def show_image(pathname):
+ if os.getenv('TERM', '') == 'xterm-kitty':
+ p = subprocess.Popen(['kitty', '+kitten', 'icat', pathname])
+ p.wait()
+ else:
+ _Image.open(pathname).show()
diff --git a/xefyl/models.py b/xefyl/models.py
new file mode 100644
index 0000000..6c0a767
--- /dev/null
+++ b/xefyl/models.py
@@ -0,0 +1,265 @@
+from flask_sqlalchemy import SQLAlchemy
+from sqlalchemy import BigInteger, Boolean, CheckConstraint, Column, DateTime, Float, ForeignKey, Integer, SmallInteger, String, Table, UniqueConstraint, func, insert, select, delete, text
+from sqlalchemy.orm import declarative_base, relationship
+from .hashing import Dhash
+from .interutils import show_image
+
+## constants
+
+IMG_EXTENSIONS = {
+ 'jpeg': 1, 'jpg': 1, 'png': 2, 'webp': 3,
+ 'gif': 4, 'svg': 5, 'heic': 6
+}
+
+IMG_EXTENSIONS_REV = {
+ 1: 'jpg', 2: 'png', 3: 'webp', 4: 'gif', 5: 'svg', 6: 'heic',
+ 101: 'mp4'
+}
+
+VIDEO_EXTENSIONS = {
+ 'mp4': 101
+}
+
+IMG_TYPE_CAMERA = 1
+IMG_TYPE_DOWNLOAD = 2
+IMG_TYPE_EDIT = 3
+IMG_TYPE_SCREENSHOT = 4
+
+IMG_FORMAT_JPEG = 1
+IMG_FORMAT_PNG = 2
+IMG_FORMAT_WEBP = 3
+IMG_FORMAT_GIF = 4
+IMG_FORMAT_SVG = 5
+
+## END constants
+
+## NOTE HEIC capabilities require pillow_heif
+
+Base = declarative_base()
+
+db = SQLAlchemy(model_class=Base)
+
+class User(Base):
+ __tablename__ = "user"
+
+ id = Column(BigInteger, primary_key=True)
+ username = Column(String(30), CheckConstraint("username = lower(username) and username ~ '^[a-z0-9_-]+$'", name="user_valid_username"), unique=True)
+ display_name = Column(String(64), nullable=True)
+ passhash = Column(String(256), nullable=True)
+ joined_at = Column(String(256), nullable=True)
+
+ collections = relationship("ImageCollection", back_populates="owner")
+
+
+class Image(Base):
+ __tablename__ = "image"
+
+ id = Column(BigInteger, primary_key=True)
+ type = Column(SmallInteger, index=True)
+ format = Column(SmallInteger) # e.g. .jpg, .png
+ name = Column(String(256), index=True)
+ hash = Column(String(256), index=True)
+ date = Column(DateTime, index=True)
+ size = Column(BigInteger)
+ # XXX Unique Indexes this length do not work with MySQL.
+ # Use PostgreSQL or SQLite instead.
+ pathname = Column(String(8192), unique=True)
+
+ tags = relationship(
+ "ImageTag",
+ back_populates="image"
+ )
+ in_collections = relationship(
+ "ImageCollection",
+ secondary="imageincollection",
+ back_populates="images"
+ )
+
+ @property
+ def hash_obj(self):
+ return Dhash(self.hash)
+
+ def url(self):
+ return f"/0/{self.hash_obj.diff}/{self.name}.{IMG_EXTENSIONS_REV[self.format]}"
+
+ def filename(self):
+ return f'{self.name}.{IMG_EXTENSIONS_REV[self.format]}'
+
+ def thumbnail_16x16_url(self):
+ return f'/thumb/16x16/{self.hash}.png'
+
+ @property
+ def category_text(self):
+ # deprecated
+ return 'Uncategorized'
+
+ def get_tags(self):
+ return [t.name for t in self.tags]
+
+ # TODO methods: set_tags, add_tags, replace_tags, show_i
+
+ def set_tags(self, replace: bool, tags):
+ with get_db().Session() as sess:
+ old_s = set(self.get_tags())
+ new_s = set(tags)
+ added_s = new_s - old_s
+ remo_s = old_s - new_s
+ for t in new_s:
+ sess.execute(insert(ImageTag).values(
+ image = self,
+ name = t,
+ ))
+ if replace:
+ sess.execute(delete(ImageTag).where(ImageTag.image == self, ImageTag.name.in_(remo_s)))
+ sess.commit()
+
+ def show_i(self):
+ show_image(self.pathname)
+
+
+class ImageTag(Base):
+ __tablename__ = "imagetag"
+
+ id = Column(BigInteger, primary_key = True)
+ image_id = Column(BigInteger, ForeignKey("image.id"))
+ name = Column(String(64))
+
+ image = relationship("Image", back_populates="tags")
+
+ __table_args__ = (
+ UniqueConstraint("image_id", "name"),
+ )
+ ...
+
+
+class ImageCollection(Base):
+ __tablename__ = "imagecollection"
+
+ id = Column(BigInteger, primary_key = True)
+ name = Column(String(128), nullable=False)
+ owner_id = Column(BigInteger, ForeignKey("user.id"), nullable=True)
+ description = Column(String(4096), nullable=True)
+
+ owner = relationship("User", back_populates="collections")
+ images = relationship("Image", back_populates="in_collections", secondary="imageincollection")
+
+
+ImageInCollection = Table(
+ "imageincollection",
+ Base.metadata,
+ Column("image_id", BigInteger, ForeignKey("image.id"), primary_key=True),
+ Column("collection_id", BigInteger, ForeignKey("imagecollection.id"), primary_key=True),
+ Column("since", DateTime, server_default=func.current_timestamp(), index=True),
+)
+
+
+## TODO maybe merge it with Image?
+class Video(Base):
+ __tablename__ = "video"
+
+ id = Column(BigInteger, primary_key = True)
+ type = Column(SmallInteger, index=True)
+ format = Column(SmallInteger) # e.g. .mp4
+ name = Column(String(256), index=True)
+ duration = Column(Float, index=True) # in seconds
+ date = Column(DateTime, index=True)
+ size = Column(BigInteger)
+ # XXX Unique Indexes this length do not work with MySQL.
+ # Use PostgreSQL or SQLite instead.
+ pathname = Column(String(8192), unique=True)
+
+ # helper methods
+ def url(self):
+ return '/video/{}.{}'.format(self.name, IMG_EXTENSIONS_REV[self.format] )
+ def filename(self):
+ return '{}.{}'.format(self.name, IMG_EXTENSIONS_REV[self.format])
+ def duration_s(self):
+ if self.duration > 3600.:
+ return "{0}:{1:02}:{2:02}".format(
+ int(self.duration // 3600),
+ int(self.duration // 60 % 60),
+ int(self.duration % 60)
+ )
+ else:
+ return "{0:02}:{1:02}".format(
+ int(self.duration // 60),
+ int(self.duration % 60)
+ )
+
+
+class Page(Base):
+ __tablename__ = "page"
+
+ id = Column(BigInteger, primary_key=True)
+ url = Column(String(4096), unique=True, nullable=False)
+ title = Column(String(1024), index=True, nullable=False)
+ description = Column(String(1024), nullable=True)
+ # renamed from pub_date
+ created_at = Column(DateTime, server_default=func.current_timestamp(), index=True, nullable=False)
+ is_robots = Column(Boolean, server_default=text('false'))
+
+class Href(Base):
+ __tablename__ = 'href'
+
+ id = Column(BigInteger, primary_key=True)
+ page_id = Column(BigInteger, ForeignKey('page.id'), nullable=True, unique=True)
+ url = Column(String(4096), unique=True, nullable=False)
+
+ @classmethod
+ def find(self, name: str):
+ hr = db.session.execute(
+ select(Href).where(Href.content == name)
+ ).scalar()
+ if hr is None:
+ hr = db.session.execute(insert(Href).values(
+ url = name,
+ page_id = None
+ ).returning(Href)).scalar()
+ return hr
+
+class PageLink(Base):
+ __tablename__ = 'pagelink'
+ id = Column(BigInteger,primary_key=True)
+ from_page_id= Column(BigInteger, ForeignKey('page.id'), nullable=False)
+ to_page_id = Column(BigInteger, ForeignKey('page.id'), nullable=False)
+ rank = Column(Integer, server_default=text('1'), nullable=False)
+
+ __table_args__ = (
+ UniqueConstraint('from_page_id', 'to_page_id'),
+ )
+
+class Word(Base):
+ __tablename__ = 'word'
+
+ id = Column(BigInteger, primary_key=True)
+ content = Column(String(256), unique=True, nullable=False)
+ parent_id = Column(BigInteger, ForeignKey('word.id'), nullable=True)
+
+ @classmethod
+ def find(self, name: str):
+ wo = db.session.execute(
+ select(Word).where(Word.content == name)
+ ).scalar()
+ if wo is None:
+ wo = db.session.execute(insert(Word).values(
+ content = name,
+ parent_id = None
+ ).returning(Word)).scalar()
+ return wo
+
+class AnchorWord(Base):
+ __tablename__ = "anchorword"
+ id = Column(BigInteger,primary_key=True)
+ word_id = Column(BigInteger, ForeignKey('word.id'), index = True, nullable=False)
+ from_page_id = Column(BigInteger, ForeignKey('page.id'))
+ to_page_id = Column(BigInteger, ForeignKey('href.id'))
+ count = Column(Integer, server_default=text('1'))
+
+
+
+...
+
+## END MODELS
+
+if __name__ == '__main__':
+ from . import CSI
diff --git a/xefyl/search_engine.py b/xefyl/search_engine.py
new file mode 100644
index 0000000..0d1f178
--- /dev/null
+++ b/xefyl/search_engine.py
@@ -0,0 +1,62 @@
+
+
+import requests
+import subprocess
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, urlsplit
+import datetime
+import os
+import random
+import re
+import json
+
+from sqlalchemy import func, select
+from . import APP_BASE_DIR
+
+CONFIG = json.load(open(os.path.join(APP_BASE_DIR, "config", "searchengine.json")))
+
+WORDENIZER_PATH = os.getenv('WORDENIZER_PATH', os.path.join(APP_BASE_DIR, 'wordenizer'))
+
+# import models definitions
+
+from .models import db, Page, Href, AnchorWord, Word
+
+
+
+class Query(object):
+ def __init__(self, words=(), exact_words=(), exclude=()):
+ self.words = words
+ self.exact_words = exact_words
+ self.exclude = exclude
+ @classmethod
+ def from_string(cls, s):
+ swa = s.lower()
+ sw = re.findall(r"-?\w+", swa)
+ words, exclude = [], []
+ for w in sw:
+ if (w.startswith('-')):
+ exclude.append(w.lstrip('-'))
+ else:
+ if w not in CONFIG['stopwords']:
+ words.append(w)
+ return cls(words=words, exclude=exclude)
+ def build(self, page=1):
+ wqc = len(self.words)
+ #if self.exclude:
+ # wq &= (Word.content != w)
+ q = None
+ for w in self.words:
+ q1 = (select(Page)
+ .join(Href, Href.page_id == Page.id)
+ .join(AnchorWord, Href.id == AnchorWord.to_page_id)
+ .join(Word).where(Word.content == w).group_by(Page.id))
+ if q is None:
+ q = q1
+ else:
+ q = q.intersect(q1)
+ q = q.order_by(func.sum(AnchorWord.count).desc())
+ return q
+ def is_empty(self):
+ return not self.words and not self.exact_words
+
+...
diff --git a/xefyl/static/style.css b/xefyl/static/style.css
new file mode 100644
index 0000000..72d79b0
--- /dev/null
+++ b/xefyl/static/style.css
@@ -0,0 +1,35 @@
+body {font-family: 'Oxygen', 'Liberation Sans', sans-serif; color: #181818; background-color: #fafafa}
+main {margin: auto; max-width: 1280px}
+a:link {color: #00809a}
+a:visited {color: #1840a5}
+
+/* TRACKER STYLES */
+ul.tracker-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fill,minmax(240px,1fr));
+ gap: 1rem;
+ list-style: none;
+ padding: 0 1em;
+}
+ul.tracker-grid > * {
+ background-color: #eaeaea;
+ border-radius: 4px;
+ min-height: 4em;
+ text-align: center;
+}
+ul.tracker-grid > * > span {
+ font-size: 0.875em;
+ color: #355;
+ display: block;
+}
+
+ul.tracker-grid > * > .tracker-image-hash {
+ font-size: 80%;
+ font-family: monospace;
+}
+
+.tracker-image-thumbnail {
+ display: block;
+ width: 100%;
+ margin: auto;
+}
diff --git a/xefyl/templates/404.html b/xefyl/templates/404.html
new file mode 100644
index 0000000..a976589
--- /dev/null
+++ b/xefyl/templates/404.html
@@ -0,0 +1,12 @@
+{% extends "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+
+{% block title %}{{ title_tag('Not found', False) }}{% endblock %}
+
+{% block body %}
+Not Found
+
+The following URL: {{ request.path }} has not been found on this server.
+
+Back to homepage
+{% endblock %}
\ No newline at end of file
diff --git a/xefyl/templates/500.html b/xefyl/templates/500.html
new file mode 100644
index 0000000..db942d7
--- /dev/null
+++ b/xefyl/templates/500.html
@@ -0,0 +1,12 @@
+{% extends "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+
+{% block title %}{{ title_tag('% _ %', False) }}{% endblock %}
+
+{% block body %}
+% _ %
+
+An error occurred while processing your request.
+
+Refresh
+{% endblock %}
\ No newline at end of file
diff --git a/xefyl/templates/base.html b/xefyl/templates/base.html
new file mode 100644
index 0000000..1aac603
--- /dev/null
+++ b/xefyl/templates/base.html
@@ -0,0 +1,14 @@
+
+
+
+ {% block title %}{{ app_name }} {% endblock %}
+
+
+
+
+
+
+ {% block body %}{% endblock %}
+
+
+
diff --git a/xefyl/templates/collection.html b/xefyl/templates/collection.html
new file mode 100644
index 0000000..97cb6d6
--- /dev/null
+++ b/xefyl/templates/collection.html
@@ -0,0 +1,26 @@
+{% extends "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+{% from "inc/imagegrid.html" import imagegrid with context %}
+
+{#
+ REQUIRES:
+ - coll: ImageCollection
+ - coll_items: paginated Image collection
+#}
+
+{% block title %}{{ title_tag(coll.name + ' | Collections') }}{% endblock %}
+
+{% block body %}
+{{ coll.name }}
+
+
+ {% if coll.owner %}Collection by {{ coll.owner.username }}
+ {% else %}Unclaimed Collection
+ {% endif %}
+
+
+{% include "inc/newcollectionlink.html" %}
+
+{{ imagegrid(coll_items, "") }}
+
+{% endblock %}
\ No newline at end of file
diff --git a/xefyl/templates/collectionlist.html b/xefyl/templates/collectionlist.html
new file mode 100644
index 0000000..a9ab6b9
--- /dev/null
+++ b/xefyl/templates/collectionlist.html
@@ -0,0 +1,23 @@
+{% include "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+
+{% block title %}{{ title_tag('Collections') }}{% endblock %}
+
+{% block body %}
+Collections
+
+{% include "inc/newcollectionlink.html" %}
+
+
+ {% for coll in l %}
+
+ {{ coll.name }}
+ {% if coll.owner %}
+ by
+ {% endif %}
+ - {{ coll.images.count() }} items
+
+ {% endfor %}
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/xefyl/templates/home.html b/xefyl/templates/home.html
new file mode 100644
index 0000000..f2297c9
--- /dev/null
+++ b/xefyl/templates/home.html
@@ -0,0 +1,14 @@
+{% extends "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+
+{% block title %}{{ title_tag(None) }}{% endblock %}
+
+{% block body %}
+Sakuragasaki46 Surf
+
+
+{% endblock %}
diff --git a/xefyl/templates/htmldir.html b/xefyl/templates/htmldir.html
new file mode 100644
index 0000000..9773871
--- /dev/null
+++ b/xefyl/templates/htmldir.html
@@ -0,0 +1,12 @@
+{% extends "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+
+{% block title %}{{ title_tag('HTML | File Browser') }}{% endblock %}
+
+{% block body %}
+
+{% endblock %}
\ No newline at end of file
diff --git a/xefyl/templates/inc/imagegrid.html b/xefyl/templates/inc/imagegrid.html
new file mode 100644
index 0000000..a9d27af
--- /dev/null
+++ b/xefyl/templates/inc/imagegrid.html
@@ -0,0 +1,59 @@
+
+{# This is the template for image grid used in Tracker and Collections. #}
+
+{#
+ Variables required:
+ - l (a Collection)
+ - and_url (a query string segment)
+#}
+
+{% macro imagegrid(l, and_url) %}
+
+{% if l.total > 0 %}
+
+Showing page {{ l.page }} of {{ l.pages }} .
+
+
+
+{% if l.has_next or l.has_prev %}
+
+ {% if l.has_prev %}
+ ← Previous page
+ {% endif %}{% if l.has_next %}
+ Next page →
+ {% endif %}
+
+{% endif %}
+
+{{ l.total }} pictures total.
+{#Not seeing the picture you want? Update the tracker.
+{% if 'update' in request.args %}{% endif %}#}
+
+{% else %}
+
+Nothing to show.
+
+{% endif %}
+
+{% endmacro %}
+
diff --git a/xefyl/templates/inc/newcollectionlink.html b/xefyl/templates/inc/newcollectionlink.html
new file mode 100644
index 0000000..5861299
--- /dev/null
+++ b/xefyl/templates/inc/newcollectionlink.html
@@ -0,0 +1,5 @@
+{% if current_user.is_authenticated %}
++ Create new collection
+{% else %}
+Log in to start creating collections.
+{% endif %}
\ No newline at end of file
diff --git a/xefyl/templates/inc/title.html b/xefyl/templates/inc/title.html
new file mode 100644
index 0000000..6b9d7b2
--- /dev/null
+++ b/xefyl/templates/inc/title.html
@@ -0,0 +1,16 @@
+
+{% macro title_tag(name, robots=True) %}
+
+{%- if name -%}
+{{ name }} | {{ app_name }}
+{%- else -%}
+{{ app_name }}
+{%- endif -%}
+
+{% if robots %}
+
+{% else %}
+
+{% endif %}
+
+{% endmacro %}
\ No newline at end of file
diff --git a/xefyl/templates/tracker.html b/xefyl/templates/tracker.html
new file mode 100644
index 0000000..67e0e61
--- /dev/null
+++ b/xefyl/templates/tracker.html
@@ -0,0 +1,75 @@
+{% extends "base.html" %}
+{% from 'inc/title.html' import title_tag with context %}
+
+{% block title %}{{ title_tag('Tracker Status') }}{% endblock %}
+
+{% block body %}
+
+{% set and_url = "" %}
+{% if 'date' in request.args %}
+{% set and_url = and_url + "&date=" + (request.args['date'] | urlencode) %}
+{% endif %}
+{% if 'sort' in request.args %}
+{% set and_url = and_url + "&sort=" + (request.args['sort'] | urlencode) %}
+{% endif %}
+{% if 'hash' in request.args %}
+{% set and_url = and_url + "&hash=" + (request.args['hash'] | urlencode) %}
+{% endif %}
+{% if "type" in request.args %}
+{% set and_url = and_url + "&type=" + (request.args["type"] | urlencode) %}
+{% endif %}
+{% if "tags" in request.args %}
+{% set and_url = and_url + "&tags=" + (request.args["tags"] | urlencode) %}
+{% endif %}
+
+
+ Show by:
+ Date:
+ {% for y in range(datetime.datetime.now().year * 12 + datetime.datetime.now().month - 1, 2017 * 12, -1) %}
+ {{ y // 12 }}.{{ "%02d" | format(y % 12 + 1) }} ·
+ {% endfor %}
+
+ Tags:
+
+
+ Sort by:
+ {% for m in ["hash", "oldest"] %}
+ {{ m }} ·
+ {% endfor %}
+
+
+
+{% include "inc/newcollectionlink.html" %}
+
+{% from "inc/imagegrid.html" import imagegrid %}
+{{ imagegrid(l, and_url=and_url) }}
+
+{% endblock %}
+
+
+{% block scripts %}
+
+{% endblock %}
diff --git a/xefyl/utils.py b/xefyl/utils.py
new file mode 100644
index 0000000..61cc148
--- /dev/null
+++ b/xefyl/utils.py
@@ -0,0 +1,65 @@
+
+
+import locale
+import logging
+import os
+from flask import abort, send_from_directory
+from werkzeug.routing import BaseConverter
+
+_log = logging.getLogger(__name__)
+
+from .hashing import Dhash
+
+def is_nonascii(s):
+ try:
+ s.encode("ascii")
+ except UnicodeEncodeError:
+ return True
+ return False
+
+
+def has_surrogates(s):
+ try:
+ s.encode("utf-8")
+ except UnicodeEncodeError:
+ return True
+ return False
+
+def uni_send_from_directory(root, filename, **kwargs):
+ if is_nonascii(filename):
+ # Flask’s “send_file” feature has a bug in the etag code,
+ # disallowing strings with surrogates, so let’s suppress etags
+ # on Unicode filenames :'(
+ try:
+ actual_filename = filename
+ except Exception as e:
+ _log.error(f"uncaught {e.__class__.__name__} on {filename!r}: {e}")
+ actual_filename = filename
+ kwargs['etag'] = False
+ else:
+ actual_filename = filename
+ if not os.path.exists(os.path.join(root, filename)):
+ _log.warning(f"file {actual_filename!r} does not exist")
+
+ image_ext = kwargs.pop('image_ext', None)
+ if image_ext == 'webp':
+ kwargs['mimetype'] = 'image/webp'
+ elif image_ext == 'heic':
+ kwargs['mimetype'] = 'image/heic'
+
+ return send_from_directory(root, filename, **kwargs)
+
+class DhashConverter(BaseConverter):
+ regex = r'[a-f0-9]{16}'
+
+ def to_python(self, value):
+ return Dhash(value)
+ def to_url(self, value):
+ return value.diff
+
+class ImgExtConverter(BaseConverter):
+ regex = r'(?:jpe?g|gif|png|webp|heic)'
+
+class VideoExtConverter(BaseConverter):
+ regex = r'(?:mp4)'
+
diff --git a/xefyl/website/__init__.py b/xefyl/website/__init__.py
new file mode 100644
index 0000000..5e52ba2
--- /dev/null
+++ b/xefyl/website/__init__.py
@@ -0,0 +1,13 @@
+
+
+blueprints = []
+
+from .images import bp
+blueprints.append(bp)
+
+from .accounts import bp
+blueprints.append(bp)
+
+from .htmlfiles import bp
+blueprints.append(bp)
+
diff --git a/xefyl/website/accounts.py b/xefyl/website/accounts.py
new file mode 100644
index 0000000..1b0d767
--- /dev/null
+++ b/xefyl/website/accounts.py
@@ -0,0 +1,14 @@
+
+
+from flask import Blueprint, abort
+
+bp = Blueprint("accounts", __name__)
+
+@bp.route("/login/")
+def login():
+ abort(501)
+
+@bp.route("/logout/")
+def logout():
+ abort(501)
+
diff --git a/xefyl/website/htmlfiles.py b/xefyl/website/htmlfiles.py
new file mode 100644
index 0000000..14cbf73
--- /dev/null
+++ b/xefyl/website/htmlfiles.py
@@ -0,0 +1,29 @@
+from flask import Blueprint, abort, redirect, send_from_directory, render_template
+import os, sys
+import codecs
+import html
+from werkzeug.exceptions import NotFound
+from ..utils import has_surrogates, uni_send_from_directory
+
+bp = Blueprint("htmlfiles", __name__)
+
+@bp.route('/HTML/')
+def listdir_html():
+ l = []
+ for f in os.listdir('/home/sakux/Downloads/HTML/'):
+ if f.endswith('.html'):
+ # XXX encoding fixes
+ #try:
+ # f = f.encode('latin-1', "surrogateescape").decode('utf-8')
+ #except Exception:
+ # l.append(('', '(encoding error)'))
+ # continue
+ l.append((f, f))
+ l.sort()
+ return render_template("htmldir.html", l=l)
+
+@bp.route('/HTML/')
+def files_html(fp):
+ # workaround for Unicode
+ return uni_send_from_directory('/home/sakux/Downloads/HTML', fp)
+
diff --git a/xefyl/website/images.py b/xefyl/website/images.py
new file mode 100644
index 0000000..0f100e2
--- /dev/null
+++ b/xefyl/website/images.py
@@ -0,0 +1,51 @@
+
+
+
+from flask import Blueprint, abort, redirect
+import warnings
+import os
+
+from sqlalchemy import select
+
+from ..image_tracker import IMG_THUMB_PATH
+from ..models import IMG_EXTENSIONS, Image, Video, db
+from ..utils import uni_send_from_directory
+
+bp = Blueprint('images', __name__)
+
+
+@bp.route('/0//.')
+def show_image(h, name, ext):
+ im: Image | None = db.session.execute(db.select(Image).where(
+ (Image.hash.ilike(f"{h.diff}%")),
+ (Image.name == name),
+ (Image.format == (IMG_EXTENSIONS[ext] if isinstance(ext, str) else ext))
+ )).scalar()
+ print(im, im.pathname if im else "-")
+ if im and im.pathname:
+ return uni_send_from_directory(*os.path.split(im.pathname), image_ext=ext)
+ warnings.warn(f'Not found: {h=} / {name=} / {ext=} ({IMG_EXTENSIONS[ext]})', UserWarning)
+ abort(404)
+
+@bp.route('/*/.')
+def lookup_image(name, ext):
+ im: Image | None = db.session.execute(db.select(Image).where(
+ (Image.name == name), (Image.format == (IMG_EXTENSIONS[ext] if isinstance(ext, str) else ext)))
+ ).scalar()
+ if im is None:
+ abort(404)
+ if im and im.pathname:
+ return redirect(im.url())
+ abort(404)
+
+@bp.route('/video/.')
+def send_video(name):
+ vi: Video | None = db.session.execute(select(Video).where(Video.name == name)).scalar()
+ if vi:
+ return uni_send_from_directory(*os.path.split(vi.pathname))
+ return 'Not Found ', 404
+
+@bp.route('/thumb/16x16/')
+def thumb_16x16(filename):
+ return uni_send_from_directory(IMG_THUMB_PATH, filename)
+
diff --git a/xefyl/wordenizer.py b/xefyl/wordenizer.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/xefyl/wordenizer.py
@@ -0,0 +1 @@
+