diff --git a/.gitignore b/.gitignore index d133e9a..222ac3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ # Ignora tutti i backup -backups/ \ No newline at end of file +backups/ +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index d319979..4c63cc0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,258 @@ # sld-filebackups-py -This is a script that take backups of the folders declared in a json file, and it also running a backup_rotate. \ No newline at end of file +A lightweight, zero-dependency Python backup utility that archives files and folders defined in a JSON list, with automatic rotation of old backups. Designed to run as a daily cron job on Linux servers. + +--- + +## Table of Contents + +- [Features](#features) +- [Project Structure](#project-structure) +- [How It Works](#how-it-works) +- [Installation](#installation) +- [Configuration](#configuration) + - [config.json](#configjson) + - [dir_backups.json](#dir_backupsjson) + - [Environment (init.py)](#environment-initpy) +- [Usage](#usage) +- [Backup Storage Layout](#backup-storage-layout) +- [Backup Rotation](#backup-rotation) +- [Logging](#logging) +- [Running as a Cron Job](#running-as-a-cron-job) +- [Requirements](#requirements) +- [License](#license) + +--- + +## Features + +- **Selective backup** — define which paths to back up in a JSON file, each with its own enable/disable flag; no need to touch the code to add or remove entries +- **Folder backups** — directories are archived as `.tar.gz` (only the folder name is preserved as the archive root, no absolute path leaking) +- **File backups** — single files are compressed as `.gz` +- **Skip-if-exists logic** — if a backup for today already exists, it is skipped automatically, making the script safe to call multiple times per day +- **Auto-rotation** — after each backup run, old archives beyond a configurable retention count are automatically deleted per subfolder +- **Dry-run mode** — preview exactly what rotation would delete, without removing anything +- **Structured logging** — always outputs to console (useful for reading cron output); optionally writes to a persistent log file +- **Multi-environment support** — switch between `local`, `local2`, and `prod` path configurations in a single file +- **Graceful error handling** — malformed JSON entries, missing paths, empty folders, and permission errors are caught and logged without crashing the whole run + +--- + +## Project Structure +``` +backups_script/ +├── script.py # Entry point and CLI argument parser +├── functions.py # Core logic: backup, rotation, checks +├── constants.py # Shared state: paths, loaded config, timestamps +├── logger.py # Logging setup (console + optional file handler) +├── init.py # Environment selector (local / prod) +├── config.json # Runtime configuration +├── dir_backups.json # Declarative list of paths to back up +└── LICENSE # GNU GPL v3 +``` + +### Module Responsibilities + +| File | Role | +|---|---| +| `init.py` | Defines `ROOT_DIR_APP` and `ROOT_DIR_BACKUPS` based on the selected environment. Imported first by everything else. | +| `constants.py` | Builds all derived paths (backup folder, config paths), loads `config.json` and `dir_backups.json` into memory, captures today's date and current time. | +| `logger.py` | Reads `config.json` directly and configures the root Python logger with a `StreamHandler` (always on) and an optional `FileHandler`. | +| `functions.py` | Contains all business logic: `default_backup_dir()`, `check_existing_folders()`, `backups_now()`, `autorotate_backups()`, `show_enabled()`. | +| `script.py` | Bootstraps logging, then parses CLI arguments and calls the appropriate function(s). With no flags, runs a full backup + rotation. | + +--- + +## How It Works + +1. `script.py` calls `setup_logger()`, which reads `config.json` and sets up logging. +2. `default_backup_dir()` ensures the root backup folder and the host-named subfolder exist. +3. `check_existing_folders()` reads `dir_backups.json`, filters for enabled entries (`flag == 1`), verifies each path exists, and classifies it as `"folder"` or `"file"`. Empty or unreadable directories are excluded. +4. `backups_now()` iterates the verified paths: + - For **folders**: creates a `_YYYY-MM-DD.tar.gz` archive using Python's `tarfile` module. + - For **files**: creates a `_YYYY-MM-DD.gz` compressed copy using `gzip` + `shutil.copyfileobj`. + - If the target archive already exists today, the entry is skipped. +5. `autorotate_backups()` scans each immediate subfolder of the host backup directory, sorts `.gz` files by modification time (newest first), and deletes any beyond the `keep_backups` threshold. + +--- + +## Installation + +No packages to install. The script uses Python's standard library only. +```bash +git clone https://gitea.sld-server.org/sld-admin/sld-filebackups-py.git +cd sld-filebackups-py +``` + +Then set your environment and paths in `init.py` and `dir_backups.json`. + +--- + +## Configuration + +### `config.json` +```json +{ + "keep_backups": 7, + "logs": false, + "logs_path": "/home/backups/logs" +} +``` + +| Key | Type | Default | Description | +|---|---|---|---| +| `keep_backups` | integer | `7` | How many recent backup archives to retain per subfolder. Older ones are deleted by the rotation step. | +| `logs` | boolean | `false` | If `true`, a `backup.log` file is written to `logs_path` in addition to console output. | +| `logs_path` | string | `~/backups/logs` | Directory where `backup.log` will be created. Created automatically if it does not exist. | + +> **Note:** Even when `logs` is `false`, all output is still printed to stdout/stderr, which means cron will capture it via mail or redirection as usual. + +--- + +### `dir_backups.json` + +This is the declarative list of everything to back up. Each entry is a JSON array of exactly three values: +```json +[ + [ "/absolute/path/to/folder", 1, "BackupName" ], + [ "/absolute/path/to/file", 1, "ConfigBackup" ], + [ "/path/that/is/disabled", 0, "OldEntry" ] +] +``` + +| Position | Field | Description | +|---|---|---| +| 0 | `path` | Absolute path to the file or folder to back up. | +| 1 | `enabled` | `1` = include in backup runs. `0` = skip entirely (the entry is parsed but never processed). | +| 2 | `name` | A short identifier used as the subfolder name inside the backup destination, and as the prefix of the archive filename. Must be unique across entries. | + +**Tips:** +- To temporarily disable an entry without deleting it, set the flag to `0`. +- The `name` field becomes a directory under `//`, so avoid spaces and special characters. +- Folders are only backed up if they are non-empty and readable. + +--- + +### Environment (`init.py`) +```python +env = "local" # Switch between: "local", "local2", "prod" +``` + +| Environment | `ROOT_DIR_APP` | `ROOT_DIR_BACKUPS` | +|---|---|---| +| `local` | `/home/sld-admin/Scrivania/backups_script/` | `/backups/Daily_File_Backups/` | +| `local2` | `/home/simo-positive/Desktop/backups_script/` | `/backups/Daily_File_Backups/` | +| `prod` | `/opt/sld-backups/` | `/home/backups/backups_root/Daily_File_Backups/` | + +If an unknown value is set, the script exits immediately with an error. + +--- + +## Usage +```bash +# Full backup + auto-rotation (default, no flags needed) +python3 script.py + +# Show which paths are enabled and which are disabled +python3 script.py --show + +# Check whether declared paths exist on disk and print a status report +python3 script.py --check + +# Run backup with verbose debug output +python3 script.py --debug + +# Run only the rotation step (no new backups created) +python3 script.py --rotate + +# Preview what rotation would delete, without actually deleting anything +python3 script.py --rotate --dry +``` + +### CLI Reference + +| Flag | Long form | Description | +|---|---|---| +| `-s` | `--show` | Print enabled and disabled paths from `dir_backups.json`. | +| `-d` | `--debug` | Run backup with `debug="on"`, which enables verbose path-checking output. | +| `-c` | `--check` | Run `check_existing_folders()` and print a detailed status for each declared path. | +| `-r` | `--rotate` | Run `autorotate_backups()` only. Can be combined with `--dry`. | +| | `--dry` | Dry-run mode for `--rotate`: logs candidates for deletion but deletes nothing. | + +--- + +## Backup Storage Layout + +Backups are written under: +``` +/ +└── / + ├── Documents/ + │ ├── Documents_2026-03-10.tar.gz + │ ├── Documents_2026-03-11.tar.gz + │ └── Documents_2026-03-12.tar.gz + └── ConfigBackup/ + ├── ConfigBackup_2026-03-10.gz + └── ConfigBackup_2026-03-11.gz +``` + +- Each entry in `dir_backups.json` gets its own subfolder named after its `name` field. +- Archives are named `_YYYY-MM-DD.tar.gz` (folders) or `_YYYY-MM-DD.gz` (files). +- The host's hostname is used as a top-level grouping folder, which makes it easy to collect backups from multiple machines into the same root. + +--- + +## Backup Rotation + +The rotation step (`autorotate_backups`) runs automatically after every backup, or can be triggered manually with `--rotate`. + +**Logic:** +1. Scans each immediate subfolder of `//`. +2. Finds all `*.gz` files (this covers both `.gz` and `.tar.gz`). +3. Sorts them by modification time, newest first. +4. Keeps the first `keep_backups` (default: 7) and deletes the rest. + +**Dry-run** (`--rotate --dry`) logs exactly which files would be deleted, with no filesystem changes. Useful for verifying the retention setting before applying it. + +--- + +## Logging + +All functions use Python's standard `logging` module via a named logger (`__name__`). The root logger is configured by `logger.py` at startup. + +- **Console output** is always active (via `StreamHandler`), regardless of the `logs` setting. +- **File output** is added when `"logs": true` is set in `config.json`. The log file is `/backup.log` and is appended to on each run. +- Log format: `YYYY-MM-DD HH:MM:SS [LEVEL] message` + +--- + +## Running as a Cron Job + +To run a full backup every day at 2:00 AM: +```bash +crontab -e +``` +``` +0 2 * * * /usr/bin/python3 /opt/sld-backups/script.py >> /home/backups/logs/cron.log 2>&1 +``` + +Since the script always writes to stdout, cron output redirection captures the full run log even if file logging is disabled in `config.json`. + +--- + +## Requirements + +- Python **3.6+** +- **No third-party packages** — uses only the standard library: + - `tarfile`, `gzip`, `shutil` — archiving and compression + - `logging` — structured output + - `argparse` — CLI argument parsing + - `pathlib` — path handling + - `socket` — hostname detection + - `json` — configuration loading + +--- + +## License + +GNU General Public License v3.0 — see [LICENSE](LICENSE) for full terms. \ No newline at end of file diff --git a/README_IT.md b/README_IT.md new file mode 100644 index 0000000..d2c29ef --- /dev/null +++ b/README_IT.md @@ -0,0 +1,258 @@ +# sld-filebackups-py + +Utility di backup in Python, leggera e senza dipendenze esterne, che archivia file e cartelle dichiarati in un file JSON con rotazione automatica dei backup più vecchi. Pensata per girare come cron job giornaliero su server Linux. + +--- + +## Indice + +- [Funzionalità](#funzionalità) +- [Struttura del progetto](#struttura-del-progetto) +- [Come funziona](#come-funziona) +- [Installazione](#installazione) +- [Configurazione](#configurazione) + - [config.json](#configjson) + - [dir_backups.json](#dir_backupsjson) + - [Ambiente (init.py)](#ambiente-initpy) +- [Utilizzo](#utilizzo) +- [Struttura dei backup](#struttura-dei-backup) +- [Rotazione automatica](#rotazione-automatica) +- [Logging](#logging) +- [Esecuzione come cron job](#esecuzione-come-cron-job) +- [Requisiti](#requisiti) +- [Licenza](#licenza) + +--- + +## Funzionalità + +- **Backup selettivo** — definisci i percorsi da salvare in un file JSON, con un flag di abilitazione per ogni voce; non è necessario toccare il codice per aggiungere o rimuovere entry +- **Backup di cartelle** — le directory vengono archiviate come `.tar.gz` (solo il nome della cartella viene preservato come radice dell'archivio, nessun percorso assoluto esposto) +- **Backup di file singoli** — i file vengono compressi come `.gz` +- **Skip automatico se già esiste** — se un backup per la data odierna è già presente, viene saltato; il script può essere lanciato più volte al giorno senza duplicati +- **Rotazione automatica** — dopo ogni esecuzione, gli archivi più vecchi oltre una soglia configurabile vengono eliminati per ogni sottocartella +- **Modalità dry-run** — anteprima precisa di cosa verrebbe eliminato dalla rotazione, senza cancellare nulla +- **Logging strutturato** — output sempre presente su console (utile per leggere l'output di cron); opzionalmente scrive su file di log persistente +- **Supporto multi-ambiente** — switch tra configurazioni `local`, `local2` e `prod` in un unico file +- **Gestione degli errori robusta** — entry JSON malformate, percorsi mancanti, cartelle vuote ed errori di permessi vengono catturati e loggati senza interrompere l'intera esecuzione + +--- + +## Struttura del progetto +``` +backups_script/ +├── script.py # Punto di ingresso e parser degli argomenti CLI +├── functions.py # Logica principale: backup, rotazione, controlli +├── constants.py # Stato condiviso: percorsi, config caricata, timestamp +├── logger.py # Setup del logging (console + file handler opzionale) +├── init.py # Selettore di ambiente (local / prod) +├── config.json # Configurazione runtime +├── dir_backups.json # Lista dichiarativa dei percorsi da salvare +└── LICENSE # GNU GPL v3 +``` + +### Responsabilità dei moduli + +| File | Ruolo | +|---|---| +| `init.py` | Definisce `ROOT_DIR_APP` e `ROOT_DIR_BACKUPS` in base all'ambiente selezionato. Importato per primo da tutto il resto. | +| `constants.py` | Costruisce tutti i percorsi derivati (cartella backup, percorsi config), carica `config.json` e `dir_backups.json` in memoria, cattura la data odierna e l'ora corrente. | +| `logger.py` | Legge `config.json` direttamente e configura il logger root di Python con uno `StreamHandler` (sempre attivo) e un `FileHandler` opzionale. | +| `functions.py` | Contiene tutta la logica di business: `default_backup_dir()`, `check_existing_folders()`, `backups_now()`, `autorotate_backups()`, `show_enabled()`. | +| `script.py` | Inizializza il logging, poi analizza gli argomenti CLI e chiama la funzione appropriata. Senza flag, esegue backup completo + rotazione. | + +--- + +## Come funziona + +1. `script.py` chiama `setup_logger()`, che legge `config.json` e configura il logging. +2. `default_backup_dir()` verifica che la cartella di backup root e la sottocartella con il nome host esistano, creandole se necessario. +3. `check_existing_folders()` legge `dir_backups.json`, filtra le entry abilitate (`flag == 1`), verifica che ogni percorso esista su disco e lo classifica come `"folder"` o `"file"`. Le directory vuote o non leggibili vengono escluse. +4. `backups_now()` itera i percorsi verificati: + - Per le **cartelle**: crea un archivio `_YYYY-MM-DD.tar.gz` tramite il modulo `tarfile`. + - Per i **file singoli**: crea una copia compressa `_YYYY-MM-DD.gz` tramite `gzip` + `shutil.copyfileobj`. + - Se l'archivio di oggi esiste già, l'entry viene saltata. +5. `autorotate_backups()` scansiona ogni sottocartella diretta della directory di backup dell'host, ordina i file `.gz` per data di modifica (più recenti prima) ed elimina quelli oltre la soglia `keep_backups`. + +--- + +## Installazione + +Nessun pacchetto da installare. Il script usa solo la libreria standard di Python. +```bash +git clone https://gitea.sld-server.org/sld-admin/sld-filebackups-py.git +cd sld-filebackups-py +``` + +Poi imposta il tuo ambiente e i percorsi in `init.py` e `dir_backups.json`. + +--- + +## Configurazione + +### `config.json` +```json +{ + "keep_backups": 7, + "logs": false, + "logs_path": "/home/backups/logs" +} +``` + +| Chiave | Tipo | Default | Descrizione | +|---|---|---|---| +| `keep_backups` | intero | `7` | Quanti archivi recenti conservare per sottocartella. I più vecchi vengono eliminati dalla rotazione. | +| `logs` | booleano | `false` | Se `true`, viene scritto un file `backup.log` in `logs_path` oltre all'output su console. | +| `logs_path` | stringa | `~/backups/logs` | Cartella dove verrà creato `backup.log`. Viene creata automaticamente se non esiste. | + +> **Nota:** Anche quando `logs` è `false`, tutto l'output viene comunque stampato su stdout/stderr, quindi cron lo cattura normalmente tramite mail o redirezione. + +--- + +### `dir_backups.json` + +È la lista dichiarativa di tutto ciò che deve essere salvato. Ogni entry è un array JSON di esattamente tre valori: +```json +[ + [ "/percorso/assoluto/cartella", 1, "NomeBackup" ], + [ "/percorso/assoluto/file", 1, "BackupConfig" ], + [ "/percorso/disabilitato", 0, "VecchiaEntry" ] +] +``` + +| Posizione | Campo | Descrizione | +|---|---|---| +| 0 | `percorso` | Percorso assoluto del file o della cartella da salvare. | +| 1 | `abilitato` | `1` = includi nelle esecuzioni di backup. `0` = salta completamente (la entry viene letta ma mai elaborata). | +| 2 | `nome` | Identificativo breve usato come nome della sottocartella nella destinazione del backup e come prefisso del nome dell'archivio. Deve essere unico tra le entry. | + +**Suggerimenti:** +- Per disabilitare temporaneamente una entry senza eliminarla, imposta il flag a `0`. +- Il campo `nome` diventa una directory dentro `//`, quindi evita spazi e caratteri speciali. +- Le cartelle vengono salvate solo se non sono vuote e sono leggibili. + +--- + +### Ambiente (`init.py`) +```python +env = "local" # Valori disponibili: "local", "local2", "prod" +``` + +| Ambiente | `ROOT_DIR_APP` | `ROOT_DIR_BACKUPS` | +|---|---|---| +| `local` | `/home/sld-admin/Scrivania/backups_script/` | `/backups/Daily_File_Backups/` | +| `local2` | `/home/simo-positive/Desktop/backups_script/` | `/backups/Daily_File_Backups/` | +| `prod` | `/opt/sld-backups/` | `/home/backups/backups_root/Daily_File_Backups/` | + +Se viene impostato un valore sconosciuto, il script termina immediatamente con un errore. + +--- + +## Utilizzo +```bash +# Backup completo + rotazione automatica (comportamento di default, nessun flag richiesto) +python3 script.py + +# Mostra quali percorsi sono abilitati e quali disabilitati +python3 script.py --show + +# Verifica se i percorsi dichiarati esistono su disco e stampa un report +python3 script.py --check + +# Esegui il backup con output di debug verboso +python3 script.py --debug + +# Esegui solo la rotazione (nessun nuovo backup creato) +python3 script.py --rotate + +# Anteprima di cosa verrebbe eliminato dalla rotazione, senza cancellare nulla +python3 script.py --rotate --dry +``` + +### Riferimento flag CLI + +| Flag | Forma lunga | Descrizione | +|---|---|---| +| `-s` | `--show` | Stampa i percorsi abilitati e disabilitati da `dir_backups.json`. | +| `-d` | `--debug` | Esegue il backup con `debug="on"`, abilitando output verboso sul controllo dei percorsi. | +| `-c` | `--check` | Esegue `check_existing_folders()` e stampa lo stato dettagliato per ogni percorso dichiarato. | +| `-r` | `--rotate` | Esegue solo `autorotate_backups()`. Può essere combinato con `--dry`. | +| | `--dry` | Modalità dry-run per `--rotate`: logga i candidati all'eliminazione ma non cancella nulla. | + +--- + +## Struttura dei backup + +I backup vengono scritti sotto: +``` +/ +└── / + ├── Documenti/ + │ ├── Documenti_2026-03-10.tar.gz + │ ├── Documenti_2026-03-11.tar.gz + │ └── Documenti_2026-03-12.tar.gz + └── BackupConfig/ + ├── BackupConfig_2026-03-10.gz + └── BackupConfig_2026-03-11.gz +``` + +- Ogni entry in `dir_backups.json` ottiene la propria sottocartella con il nome del campo `nome`. +- Gli archivi seguono il pattern `_YYYY-MM-DD.tar.gz` (cartelle) o `_YYYY-MM-DD.gz` (file). +- Il nome host della macchina viene usato come cartella di primo livello, rendendo semplice raccogliere backup di più macchine sotto la stessa root. + +--- + +## Rotazione automatica + +La rotazione (`autorotate_backups`) viene eseguita automaticamente dopo ogni backup, oppure può essere avviata manualmente con `--rotate`. + +**Logica:** +1. Scansiona ogni sottocartella diretta di `//`. +2. Trova tutti i file `*.gz` (copre sia `.gz` che `.tar.gz`). +3. Li ordina per data di modifica, dal più recente al più vecchio. +4. Conserva i primi `keep_backups` (default: 7) ed elimina i restanti. + +**Dry-run** (`--rotate --dry`) logga esattamente quali file verrebbero eliminati, senza nessuna modifica al filesystem. Utile per verificare l'impostazione di retention prima di applicarla. + +--- + +## Logging + +Tutte le funzioni usano il modulo standard `logging` di Python tramite un logger con nome (`__name__`). Il logger root viene configurato da `logger.py` all'avvio. + +- **Output su console** sempre attivo (via `StreamHandler`), indipendentemente dall'impostazione `logs`. +- **Output su file** aggiunto quando `"logs": true` è impostato in `config.json`. Il file di log è `/backup.log` e viene aggiunto ad ogni esecuzione. +- Formato log: `YYYY-MM-DD HH:MM:SS [LIVELLO] messaggio` + +--- + +## Esecuzione come cron job + +Per eseguire un backup completo ogni giorno alle 2:00: +```bash +crontab -e +``` +``` +0 2 * * * /usr/bin/python3 /opt/sld-backups/script.py >> /home/backups/logs/cron.log 2>&1 +``` + +Poiché il script scrive sempre su stdout, la redirezione dell'output di cron cattura il log completo dell'esecuzione anche se il log su file è disabilitato in `config.json`. + +--- + +## Requisiti + +- Python **3.6+** +- **Nessun pacchetto di terze parti** — usa solo la libreria standard: + - `tarfile`, `gzip`, `shutil` — archiviazione e compressione + - `logging` — output strutturato + - `argparse` — parsing degli argomenti CLI + - `pathlib` — gestione dei percorsi + - `socket` — rilevamento del nome host + - `json` — caricamento della configurazione + +--- + +## Licenza + +GNU General Public License v3.0 — vedi [LICENSE](LICENSE) per i termini completi. \ No newline at end of file diff --git a/__pycache__/constants.cpython-313.pyc b/__pycache__/constants.cpython-313.pyc index bc3bbfe..a0c6824 100644 Binary files a/__pycache__/constants.cpython-313.pyc and b/__pycache__/constants.cpython-313.pyc differ diff --git a/__pycache__/functions.cpython-313.pyc b/__pycache__/functions.cpython-313.pyc index 7079255..fffaa5d 100644 Binary files a/__pycache__/functions.cpython-313.pyc and b/__pycache__/functions.cpython-313.pyc differ diff --git a/constants.py b/constants.py index 301b04f..ef97beb 100644 --- a/constants.py +++ b/constants.py @@ -1,9 +1,10 @@ -# constants.py +# constants.py (modificato) from init import * import socket import json from datetime import date, datetime import os +from pathlib import Path LISTFILE = os.path.join(ROOT_DIR_APP, "dir_backups.json") CONF_FILE = os.path.join(ROOT_DIR_APP, "config.json") @@ -13,11 +14,17 @@ DATETODAY = date.today() TIMENOW = datetime.now().strftime("%H:%M:%S") TIMEDATA = '[ ' + str(DATETODAY) + ' - ' + str(TIMENOW) + ' ]' -with open(LISTFILE, "r") as listfile: - JSON_LIST = json.load(listfile) +# safer load +try: + with open(LISTFILE, "r") as listfile: + JSON_LIST = json.load(listfile) +except Exception: + JSON_LIST = [] -with open(CONF_FILE, "r") as conf_file: - JSON_CONF = json.load(conf_file) - -print(TIMEDATA) +try: + with open(CONF_FILE, "r") as conf_file: + JSON_CONF = json.load(conf_file) +except Exception: + JSON_CONF = {} +# removed print(TIMEDATA) at module import time \ No newline at end of file diff --git a/dir_backups.json b/dir_backups.json index d4503ae..8b72802 100644 --- a/dir_backups.json +++ b/dir_backups.json @@ -1,4 +1,5 @@ [ [ "/folder/to/backups", 1, "BackupName" ], - [ "/file/to/backups/disabled/by/the/0/flag/next/to/this", 0, "BackupName2" ] + [ "/file/to/backups/disabled/by/the/0/flag/next/to/this", 0, "BackupName2" ], + [ "/home/sld-admin/Documents", 1, "Doucuments" ] ] diff --git a/functions.py b/functions.py index 58f482e..26bee59 100644 --- a/functions.py +++ b/functions.py @@ -1,167 +1,257 @@ -from constants import * +# functions.py from pathlib import Path -import os, gzip, tarfile, shutil - -## Create the backup default folders -def default_backup_dir(): - os.makedirs(HOST_BACKUP_FOLDER, exist_ok=True) - -from pathlib import Path -from constants import * +import logging import os +import gzip +import tarfile +import shutil +from typing import List, Tuple -def autorotate_backups(dry_run: bool = False): - """ - Scansiona tutte le sottocartelle immediate di HOST_BACKUP_FOLDER. - Per ogni sottocartella prende i file *.gz (inclusi .tar.gz), li ordina - per mtime (più nuovi prima), mantiene i primi `keep_backups` e rimuove - gli altri (a meno che dry_run==True). - Restituisce (candidates_found, actually_deleted). - """ +# Import Costants: (ROOT_DIR, JSON_LIST, JSON_CONF, HOST_BACKUP_FOLDER, DATETODAY, ...) +from constants import * +_LOG = logging.getLogger(__name__) + +def default_backup_dir() -> None: + """ + Ensure the host backup folder exists. + """ + try: + Path(HOST_BACKUP_FOLDER).mkdir(parents=True, exist_ok=True) + _LOG.info("Backup base directory ensured: %s", HOST_BACKUP_FOLDER) + except Exception: + _LOG.exception("Failed to create HOST_BACKUP_FOLDER: %s", HOST_BACKUP_FOLDER) + + +## Backup files rotation +def autorotate_backups(dry_run: bool = False) -> Tuple[int, int]: + """ + Rotate backup files in each immediate subfolder of HOST_BACKUP_FOLDER. + + Behavior: + - For each immediate subfolder, find files matching *.gz (this includes .tar.gz), + sort them by modification time (newest first), keep the first `keep_backups` + and delete the older ones. + - If dry_run is True, only log what would be deleted. + + Returns: + (candidates_found, actually_deleted) + """ base = Path(HOST_BACKUP_FOLDER) if not base.exists(): - print("ERROR: HOST_BACKUP_FOLDER does not exist:", base) + _LOG.error("HOST_BACKUP_FOLDER does not exist: %s", base) return 0, 0 - keep = int(JSON_CONF.get("keep_backups", 7)) + try: + keep = int(JSON_CONF.get("keep_backups", 7)) + except Exception: + keep = 7 + _LOG.warning("Invalid keep_backups value in config, falling back to %d", keep) total_candidates = 0 total_deleted = 0 - # ottengo tutte le directory immediate dentro HOST_BACKUP_FOLDER + # immediate subdirectories targets = sorted([p for p in base.iterdir() if p.is_dir()]) if not targets: - print("No subfolders found in HOST_BACKUP_FOLDER:", base) + _LOG.info("No subfolders found in HOST_BACKUP_FOLDER: %s", base) return 0, 0 for folder in targets: - # prendi solo file (evita di includere directory per errore) - backups = sorted( - (f for f in folder.glob("*.gz") if f.is_file()), - key=lambda f: f.stat().st_mtime, - reverse=True - ) + try: + backups = sorted( + (f for f in folder.glob("*.gz") if f.is_file()), + key=lambda f: f.stat().st_mtime, + reverse=True + ) + except Exception: + _LOG.exception("Failed to list backups in folder: %s", folder) + continue old_backups = backups[keep:] - print("\nFolder:", folder) - print("Total backups:", len(backups)) - print("Keep:", keep) - print("Old to remove:", len(old_backups)) + _LOG.info("Folder: %s", folder) + _LOG.info(" Total backups found: %d", len(backups)) + _LOG.info(" Keep: %d", keep) + _LOG.info(" Old backups to remove: %d", len(old_backups)) for b in old_backups: - print(" Old backup:", b) + _LOG.info(" Candidate for removal: %s", b) - # elimina se non dry_run if not dry_run and old_backups: for b in old_backups: try: b.unlink() total_deleted += 1 - print(" -> deleted") - except Exception as e: - print(f" -> failed to delete {b}: {e}") + _LOG.info(" -> deleted: %s", b) + except Exception: + _LOG.exception(" -> failed to delete: %s", b) total_candidates += len(old_backups) - print("\nSummary:") - print(f" Candidates found: {total_candidates}") - print(f" Actually deleted: {total_deleted} (dry_run={dry_run})") + _LOG.info("Rotation summary: candidates_found=%d, actually_deleted=%d (dry_run=%s)", + total_candidates, total_deleted, dry_run) return total_candidates, total_deleted - - -## Show what backups path are enabled or disabled -def show_enabled(): - print() - print("### ENABLED PATHS ###") - for path, flag, name in JSON_LIST: - if flag > 0: - print(f"- {path}") - print ("") - print("### DISABLED PATHS ###") - for path, flag, name in JSON_LIST: - if flag == 0: - print(f"- {path}") - -## Checking which of the enabled path are available for a backup -def check_existing_folders(debug="off"): - checked_paths = [] - correct_folder = [] - correct_file = [] - notexists = [] - empty = [] - - - - for path, flag, namepath in JSON_LIST: - if flag != 1: +## Show what is enabled in the file json +def show_enabled() -> None: + """ + Log enabled and disabled paths defined in JSON_LIST. + """ + _LOG.info("### ENABLED PATHS ###") + for entry in JSON_LIST: + try: + path, flag, name = entry + except Exception: + _LOG.warning("Malformed entry in dir_backups.json: %s", entry) continue - pathnow = Path(path) + if flag and int(flag) > 0: + _LOG.info("- %s (name: %s)", path, name) + print("") + _LOG.info("### DISABLED PATHS ###") + for entry in JSON_LIST: + try: + path, flag, name = entry + except Exception: + continue + if int(flag) == 0: + _LOG.info("- %s (name: %s)", path, name) + + +## Check if the declared folder exists +def check_existing_folders(debug: str = "off") -> List[Tuple[Path, str, str]]: + """ + Check which enabled paths exist and classify them as 'folder' or 'file'. + + Returns a list of tuples: (Path(path), name, "folder"|"file") + + If a path is a directory, it is considered valid only if it contains at least one entry. + """ + checked_paths: List[Tuple[Path, str, str]] = [] + correct_folder: List[str] = [] + correct_file: List[str] = [] + notexists: List[str] = [] + empty: List[str] = [] + + for entry in JSON_LIST: + try: + path_str, flag, namepath = entry + except Exception: + _LOG.warning("Skipping malformed entry: %s", entry) + continue + + try: + if int(flag) != 1: + continue + except Exception: + _LOG.warning("Invalid flag for entry %s, skipping", entry) + continue + + pathnow = Path(path_str) + if pathnow.exists(): - if pathnow.is_dir() and any(pathnow.iterdir()): - checked_paths.append([pathnow, namepath, "folder"]) - correct_folder.append(f"- Folder exists: {pathnow}") - elif pathnow.is_file(): - checked_paths.append([pathnow, namepath, "file"]) - correct_file.append(f"- File exists: {pathnow}") - else: - empty.append(f"- Empty folder or special file: {pathnow}") + try: + if pathnow.is_dir(): + try: + # consider non-empty directory only + if any(pathnow.iterdir()): + checked_paths.append((pathnow, namepath, "folder")) + correct_folder.append(f"- Folder exists: {pathnow}") + else: + empty.append(f"- Empty folder: {pathnow}") + except PermissionError: + _LOG.warning("Permission denied reading directory: %s", pathnow) + empty.append(f"- Unreadable/empty folder: {pathnow}") + elif pathnow.is_file(): + checked_paths.append((pathnow, namepath, "file")) + correct_file.append(f"- File exists: {pathnow}") + else: + empty.append(f"- Special file / unknown type: {pathnow}") + except Exception: + _LOG.exception("Error while checking path: %s", pathnow) else: notexists.append(f"- Path does not exist: {pathnow}") - if debug=="on": - print("###### CHECKING EXISTING FOLDERS/FILES ######") - print() - print(f"# FOLDERS CHECK OK - [ {len(correct_folder)} ] #") + if debug == "on": + _LOG.debug("###### CHECKING EXISTING FOLDERS/FILES ######") + _LOG.debug("# FOLDERS CHECK OK - [ %d ]", len(correct_folder)) for folder in correct_folder: - print(folder) - print("") - - print(f"# FILES CHECK OK - [ {len(correct_file)} ] #") + _LOG.debug(folder) + _LOG.debug("# FILES CHECK OK - [ %d ]", len(correct_file)) for file in correct_file: - print(file) - print("") - - print(f"# FOLDERS EMPTY - [ {len(empty)} ] #") + _LOG.debug(file) + _LOG.debug("# FOLDERS EMPTY - [ %d ]", len(empty)) for emptyfold in empty: - print(emptyfold) - print("") - - print(f"# FILES / FOLDERS NOT EXISTS - [ {len(notexists)} ] #") + _LOG.debug(emptyfold) + _LOG.debug("# FILES / FOLDERS NOT EXISTS - [ %d ]", len(notexists)) for not_exists in notexists: - print(not_exists) - print("") + _LOG.debug(not_exists) return checked_paths -## Function available for the backup -def backups_now(debug="off"): - listnow = check_existing_folders() + +## Backups action +def backups_now(debug: str = "off") -> None: + """ + Perform backups for each valid path discovered by check_existing_folders. + + - Directories are archived as tar.gz + - Single files are compressed as .gz + + If debug == "on", additional logging is emitted. + """ + listnow = check_existing_folders(debug=debug) base_backup = Path(HOST_BACKUP_FOLDER) - base_backup.mkdir(parents=True, exist_ok=True) + try: + base_backup.mkdir(parents=True, exist_ok=True) + except Exception: + _LOG.exception("Failed to ensure base backup directory: %s", base_backup) + return + + date_str = str(DATETODAY) # DATETODAY is provided by constants.py (date object) for path, name, backtype in listnow: pathbackup = base_backup / name - pathbackup.mkdir(parents=True, exist_ok=True) + try: + pathbackup.mkdir(parents=True, exist_ok=True) + except Exception: + _LOG.exception("Failed to create backup subfolder: %s", pathbackup) + continue if backtype == "folder": - tar_path = pathbackup / f"{name}_{DATETODAY}.tar.gz" - if not tar_path.exists(): - if debug=="on": - print(f"Backing up folder: {path}") + tar_filename = f"{name}_{date_str}.tar.gz" + tar_path = pathbackup / tar_filename + if tar_path.exists(): + _LOG.info("Folder backup already exists, skipping: %s", tar_path) + continue + + _LOG.info("Backing up folder: %s -> %s", path, tar_path) + try: + # create a tar.gz archive; arcname preserves only the folder name with tarfile.open(tar_path, "w:gz") as tar: tar.add(path, arcname=path.name) + _LOG.info("Successfully created archive: %s", tar_path) + except Exception: + _LOG.exception("Failed to create tar.gz for folder: %s", path) elif backtype == "file": - gz_path = pathbackup / f"{name}_{DATETODAY}.gz" - if not gz_path.exists(): - if debug=="on": - print(f"Backing up file: {path}") + gz_filename = f"{name}_{date_str}.gz" + gz_path = pathbackup / gz_filename + if gz_path.exists(): + _LOG.info("File backup already exists, skipping: %s", gz_path) + continue + + _LOG.info("Backing up file: %s -> %s", path, gz_path) + try: + # open source file and compress into gzip file with open(path, "rb") as f_in, gzip.open(gz_path, "wb") as f_out: - shutil.copyfileobj(f_in, f_out) \ No newline at end of file + shutil.copyfileobj(f_in, f_out) + _LOG.info("Successfully created gzip: %s", gz_path) + except Exception: + _LOG.exception("Failed to create gzip for file: %s", path) + else: + _LOG.warning("Unknown backtype '%s' for path: %s", backtype, path) \ No newline at end of file diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..0c47170 --- /dev/null +++ b/logger.py @@ -0,0 +1,50 @@ +# logger.py +import logging +import json +from pathlib import Path +import os + +def _load_config(): + cfg_path = Path(__file__).parent / "config.json" + if cfg_path.exists(): + try: + return json.loads(cfg_path.read_text()) + except Exception: + return {} + return {} + +def setup_logger(): + cfg = _load_config() + logs_enabled = bool(cfg.get("logs", False)) + logs_path = cfg.get("logs_path", None) + + root_logger = logging.getLogger() + # reset handlers (utile se viene richiamato più volte) + root_logger.handlers = [] + + formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") + + # always add a stream handler so messages appear on console (useful for cron output) + sh = logging.StreamHandler() + sh.setFormatter(formatter) + sh.setLevel(logging.INFO) + root_logger.addHandler(sh) + + if logs_enabled: + if logs_path is None: + # default fallback + logs_path = str(Path.home() / "backups" / "logs") + log_dir = Path(logs_path) + log_dir.mkdir(parents=True, exist_ok=True) + fh = logging.FileHandler(log_dir / "backup.log") + fh.setFormatter(formatter) + fh.setLevel(logging.INFO) + root_logger.addHandler(fh) + + root_logger.setLevel(logging.INFO) + + # informative note when file logging is disabled + if not logs_enabled: + root_logger.info("File logging disabled (config.json: logs=false)") + else: + root_logger.info(f"File logging enabled, log file: {logs_path}/backup.log") \ No newline at end of file diff --git a/script.py b/script.py index 09866ca..3b6542e 100755 --- a/script.py +++ b/script.py @@ -1,5 +1,12 @@ #!/bin/python3 import argparse + +# configure logging first (logger reads config.json located next to this file) +from logger import setup_logger +setup_logger() + +import logging +# ora importiamo le funzioni (che useranno logging invece di print) from functions import * default_backup_dir() @@ -18,9 +25,7 @@ elif args.debug: backups_now(debug="on") elif args.check: checked = check_existing_folders(debug="on") - #print(checked) elif args.rotate: - # passa il flag dry al chiamante; default è delete se non specifichi --dry autorotate_backups(dry_run=args.dry) else: backups_now()