diff --git a/README.md b/README.md new file mode 100644 index 0000000..e53e566 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# teledrive_restore + +A command-line utility to restore files from a flat TeleDrive export dump back into their original folder hierarchy, using the metadata exported from TeleDrive as a `files.json` file. + +## Requirements + +- Python 3.6 or newer (no third-party packages required) + +## Background + +TeleDrive stores all uploaded files in Telegram as flat message attachments. When you export your data, you get: + +1. A `files.json` metadata file describing every file and folder, including parent-child relationships. +2. A flat directory of downloaded files, all sitting in the same folder. + +This script reads the metadata and reconstructs the full directory tree, placing every file in its correct folder. + +## Usage + +``` +python3 teledrive_restore.py +``` + +### Arguments + +| Argument | Description | +|---|---| +| `operation` | `mv` to **move** files, or `cp` to **copy** files | +| `files_json` | Full path to the TeleDrive JSON export (any filename is accepted) | +| `source_dir` | Path to the flat directory containing all your downloaded TeleDrive files | +| `dest_dir` | Path to the root destination directory where the folder tree will be created | + +### Help + +``` +python3 teledrive_restore.py --help +``` + +## Examples + +**Copy** all files into a restored tree (original flat dump is kept intact): + +```bash +python3 teledrive_restore.py cp /home/user/files.json /mnt/teledrive_dump /mnt/restored +``` + +**Move** all files into a restored tree: + +```bash +python3 teledrive_restore.py mv /home/user/files.json /mnt/teledrive_dump /mnt/restored +``` + +## Behaviour + +- The destination directory is created automatically if it does not exist. +- Nested folders of arbitrary depth are supported. +- Files at the root level (no parent folder) are placed directly in `dest_dir`. +- If a file **cannot be found** in `source_dir`, a `WARNING` is printed and the script continues with the next file. +- If a file **already exists** at the destination, a `WARNING` is printed and the file is skipped. +- A summary of moved/copied, missing, and skipped files is printed at the end. + +## Output example + +``` +Loaded 6800 entries from /home/user/files.json +Files to process: 5440 +WARNING: File not found in source, skipping: some_missing_file.pdf +... + +--- Summary --- +Copied: 5438 +Missing (not found in source): 1 +Skipped (already at destination): 1 +``` diff --git a/teledrive_restore.py b/teledrive_restore.py new file mode 100755 index 0000000..e7c433a --- /dev/null +++ b/teledrive_restore.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +teledrive_restore.py - Restore TeleDrive files to their original folder structure. + +Usage: + python3 teledrive_restore.py + python3 teledrive_restore.py --help +""" + +import sys +import json +import os +import shutil + +HELP_TEXT = """ +teledrive_restore.py - Restore TeleDrive files to their original folder structure + +USAGE: + python3 teledrive_restore.py + +ARGUMENTS: + operation Action to perform on each file: + mv - Move files into the restored folder structure + cp - Copy files into the restored folder structure + + files_json Full path to the TeleDrive export file (typically files.json). + The file may have any name as long as it contains valid TeleDrive JSON. + + source_dir Path to the directory that contains the flat dump of all TeleDrive + files. The script will look for each file by its original name inside + this directory. + + dest_dir Path to the destination root directory where the folder tree will be + created and files placed. The directory will be created if it does not + exist. + +BEHAVIOUR: + - The script reads the TeleDrive metadata and reconstructs the full folder hierarchy + (including arbitrarily nested sub-folders) under dest_dir. + - Each file is looked up in source_dir by its original filename (the "name" field in + the JSON). If the file is not found a WARNING is printed and the script continues. + - If a destination file already exists it is skipped and a WARNING is printed. + - Files at the root level (parent_id = null) are placed directly in dest_dir. + - The script prints a summary of moved/copied, skipped, and missing files when done. + +EXAMPLES: + Copy all files, preserving originals: + python3 teledrive_restore.py cp /home/user/files.json /mnt/flat_dump /mnt/restored + + Move all files into the restored tree: + python3 teledrive_restore.py mv /home/user/files.json /mnt/flat_dump /mnt/restored +""" + + +def build_path_map(entries): + """Return a dict mapping entry id -> full relative path string.""" + id_map = {e["id"]: e for e in entries} + path_cache = {} + + def get_path(entry_id): + if entry_id in path_cache: + return path_cache[entry_id] + entry = id_map[entry_id] + parent_id = entry.get("parent_id") + if parent_id is None or parent_id not in id_map: + result = entry["name"] + else: + result = os.path.join(get_path(parent_id), entry["name"]) + path_cache[entry_id] = result + return result + + for e in entries: + get_path(e["id"]) + + return path_cache + + +def main(): + if len(sys.argv) == 2 and sys.argv[1] in ("--help", "-h"): + print(HELP_TEXT) + sys.exit(0) + + if len(sys.argv) != 5: + print("ERROR: Wrong number of arguments.", file=sys.stderr) + print("Run with --help for usage information.", file=sys.stderr) + sys.exit(1) + + operation, json_path, source_dir, dest_dir = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] + + if operation not in ("mv", "cp"): + print(f"ERROR: First argument must be 'mv' or 'cp', got '{operation}'.", file=sys.stderr) + sys.exit(1) + + if not os.path.isfile(json_path): + print(f"ERROR: JSON file not found: {json_path}", file=sys.stderr) + sys.exit(1) + + if not os.path.isdir(source_dir): + print(f"ERROR: Source directory not found: {source_dir}", file=sys.stderr) + sys.exit(1) + + with open(json_path, "r", encoding="utf-8") as f: + try: + entries = json.load(f) + except json.JSONDecodeError as e: + print(f"ERROR: Failed to parse JSON file: {e}", file=sys.stderr) + sys.exit(1) + + print(f"Loaded {len(entries)} entries from {json_path}") + + path_map = build_path_map(entries) + + files = [e for e in entries if e.get("type") != "folder"] + print(f"Files to process: {len(files)}") + + stats = {"done": 0, "missing": 0, "skipped": 0} + + for entry in files: + filename = entry["name"] + src = os.path.join(source_dir, filename) + + if not os.path.isfile(src): + print(f"WARNING: File not found in source, skipping: {filename}") + stats["missing"] += 1 + continue + + rel_path = path_map[entry["id"]] + dest_file = os.path.join(dest_dir, rel_path) + dest_folder = os.path.dirname(dest_file) + + os.makedirs(dest_folder, exist_ok=True) + + if os.path.exists(dest_file): + print(f"WARNING: Destination already exists, skipping: {dest_file}") + stats["skipped"] += 1 + continue + + if operation == "cp": + shutil.copy2(src, dest_file) + else: + shutil.move(src, dest_file) + + stats["done"] += 1 + + action_word = "Moved" if operation == "mv" else "Copied" + print() + print("--- Summary ---") + print(f"{action_word}: {stats['done']}") + print(f"Missing (not found in source): {stats['missing']}") + print(f"Skipped (already at destination): {stats['skipped']}") + + +if __name__ == "__main__": + main()