From b97f882bfee584e2c65f249fba6473eeda63e444 Mon Sep 17 00:00:00 2001 From: akanealw Date: Fri, 15 Mar 2024 05:55:39 -0500 Subject: [PATCH] separated archivebox and added sonic search --- archivebox/.env | 15 ++++++++++ archivebox/compose.yml | 56 ++++++++++++++++++++++++++++++++++++ archivebox/sonic.cfg | 65 ++++++++++++++++++++++++++++++++++++++++++ mixed/compose.yml | 60 +++++++++++++++++++------------------- 4 files changed, 166 insertions(+), 30 deletions(-) create mode 100755 archivebox/.env create mode 100644 archivebox/compose.yml create mode 100644 archivebox/sonic.cfg diff --git a/archivebox/.env b/archivebox/.env new file mode 100755 index 0000000..8614486 --- /dev/null +++ b/archivebox/.env @@ -0,0 +1,15 @@ +#GLOBAL SETTINGS +COMPOSE_HTTP_TIMEOUT=120 +COMPOSE_IGNORE_ORPHANS=1 +DOCKER_CONFIGS=. +DOCKERGID=999 +DOCKERHOSTNAME=DockerServer1 +DOCKERLOGGING_MAXFILE=10 +DOCKERLOGGING_MAXSIZE=200k +PGID=1000 +PUID=1000 +UMASK=000 +TZ=America/Chicago +BACKUP_DIR=/mnt/truenas-backups +MEDIA_DIR=/mnt/truenas-media +STORAGE_DIR=/mnt/truenas-storage diff --git a/archivebox/compose.yml b/archivebox/compose.yml new file mode 100644 index 0000000..d73f74d --- /dev/null +++ b/archivebox/compose.yml @@ -0,0 +1,56 @@ +version: '3' + +services: + archivebox: + container_name: archivebox + image: archivebox/archivebox:dev + command: server --quick-init 0.0.0.0:8000 + environment: + - ALLOWED_HOSTS=* + - MEDIA_MAX_SIZE=750m + - PUBLIC_ADD_VIEW=True + - PUBLIC_INDEX=True + - PUBLIC_SNAPSHOTS=False + - SAVE_TITLE=True + - SAVE_FAVICON=True + - SAVE_WGET=False + - SAVE_WARC=False + - SAVE_PDF=True + - SAVE_SCREENSHOT=True + - SAVE_DOM=False + - SAVE_SINGLEFILE=True + - SAVE_READABILITY=True + - SAVE_MERCURY=False + - SAVE_GIT=False + - SAVE_MEDIA=False + - SAVE_ARCHIVE_DOT_ORG=False + - SEARCH_BACKEND_ENGINE=sonic + - SEARCH_BACKEND_HOST_NAME=sonic + - SEARCH_BACKEND_PASSWORD=eeXa7chux9sheiviep8thaw3Yel1te + networks: + - reverse-proxy + ports: + - 8000:8000 + restart: always + volumes: + - ${DOCKER_CONFIGS}/data:/data + + ### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg + # $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg + # After starting, backfill any existing Snapshots into the full-text index: + # $ docker-compose run archivebox update --index-only + + sonic: + image: valeriansaliou/sonic:latest + expose: + - 1491 + environment: + - SEARCH_BACKEND_PASSWORD=eeXa7chux9sheiviep8thaw3Yel1te + volumes: + - ${DOCKER_CONFIGS}/sonic.cfg:/etc/sonic.cfg:ro + - ${DOCKER_CONFIGS}/data/sonic:/var/lib/sonic/store + +networks: + reverse-proxy: + name: reverse-proxy + external: true diff --git a/archivebox/sonic.cfg b/archivebox/sonic.cfg new file mode 100644 index 0000000..6c1d15c --- /dev/null +++ b/archivebox/sonic.cfg @@ -0,0 +1,65 @@ +# Sonic +# Fast, lightweight and schema-less search backend +# Configuration file +# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg + +[server] + +log_level = "warn" + + +[channel] + +inet = "0.0.0.0:1491" +tcp_timeout = 300 + +auth_password = "${env.SEARCH_BACKEND_PASSWORD}" + +[channel.search] + +query_limit_default = 65535 +query_limit_maximum = 65535 +query_alternates_try = 10 + +suggest_limit_default = 5 +suggest_limit_maximum = 20 + + +[store] + +[store.kv] + +path = "/var/lib/sonic/store/kv/" + +retain_word_objects = 100000 + +[store.kv.pool] + +inactive_after = 1800 + +[store.kv.database] + +flush_after = 900 + +compress = true +parallelism = 2 +max_files = 100 +max_compactions = 1 +max_flushes = 1 +write_buffer = 16384 +write_ahead_log = true + +[store.fst] + +path = "/var/lib/sonic/store/fst/" + +[store.fst.pool] + +inactive_after = 300 + +[store.fst.graph] + +consolidate_after = 180 + +max_size = 2048 +max_words = 250000 diff --git a/mixed/compose.yml b/mixed/compose.yml index aec5c4a..33208ee 100644 --- a/mixed/compose.yml +++ b/mixed/compose.yml @@ -21,36 +21,36 @@ services: CONTINUE_ON_ERROR: true restart: always - archivebox: - container_name: archivebox - image: archivebox/archivebox:dev - command: server --quick-init 0.0.0.0:8000 - environment: - - ALLOWED_HOSTS=* - - MEDIA_MAX_SIZE=750m - - PUBLIC_ADD_VIEW=True - - PUBLIC_INDEX=True - - PUBLIC_SNAPSHOTS=False - - SAVE_TITLE=True - - SAVE_FAVICON=True - - SAVE_WGET=False - - SAVE_WARC=False - - SAVE_PDF=True - - SAVE_SCREENSHOT=True - - SAVE_DOM=False - - SAVE_SINGLEFILE=True - - SAVE_READABILITY=True - - SAVE_MERCURY=False - - SAVE_GIT=False - - SAVE_MEDIA=False - - SAVE_ARCHIVE_DOT_ORG=False - networks: - - reverse-proxy - ports: - - 8000:8000 - restart: always - volumes: - - ${DOCKER_CONFIGS}/archivebox/data:/data + # archivebox: + # container_name: archivebox + # image: archivebox/archivebox:dev + # command: server --quick-init 0.0.0.0:8000 + # environment: + # - ALLOWED_HOSTS=* + # - MEDIA_MAX_SIZE=750m + # - PUBLIC_ADD_VIEW=True + # - PUBLIC_INDEX=True + # - PUBLIC_SNAPSHOTS=False + # - SAVE_TITLE=True + # - SAVE_FAVICON=True + # - SAVE_WGET=False + # - SAVE_WARC=False + # - SAVE_PDF=True + # - SAVE_SCREENSHOT=True + # - SAVE_DOM=False + # - SAVE_SINGLEFILE=True + # - SAVE_READABILITY=True + # - SAVE_MERCURY=False + # - SAVE_GIT=False + # - SAVE_MEDIA=False + # - SAVE_ARCHIVE_DOT_ORG=False + # networks: + # - reverse-proxy + # ports: + # - 8000:8000 + # restart: always + # volumes: + # - ${DOCKER_CONFIGS}/archivebox/data:/data collabora: container_name: collabora