blog/index_gen.py

import re
import json
import subprocess
from urllib.parse import urlparse

def get_local_path_from_url(url):
    """
    Extracts the relative file path from the blog URL.
    Assumes the file structure matches URLs.
    """
    # e.g., https://git.xargana.tr/glitchy/blog/src/branch/main/blog_init.md
    # -> blog/blog_init.md
    path = urlparse(url).path  # /glitchy/blog/src/branch/main/blog_init.md
    parts = path.strip("/").split("/")

    try:
        idx = parts.index("main")
        relpath = "/".join(parts[idx + 1:])
        return f"blog/{relpath}"
    except ValueError:
        return None

def get_commit_info(filepath):
    try:
        result = subprocess.run(
            ["git", "log", "-1", "--pretty=%s%n%cI", "--", filepath],
            stdout=subprocess.PIPE,
            text=True,
            check=True
        )
        msg, date = result.stdout.strip().split("\n", 1)
        return msg, date
    except subprocess.CalledProcessError:
        return "No commit info", "unknown"

def parse_blog_links(md_text):
    pattern = re.compile(r"- \[(.*?)\]\((.*?)\)")
    return [{"title": title, "url": url} for title, url in pattern.findall(md_text)]

def main():
    with open("README.md", "r", encoding="utf-8") as f:
        md_content = f.read()

    blog_entries = parse_blog_links(md_content)

    for entry in blog_entries:
        local_path = get_local_path_from_url(entry["url"])
        if local_path:
            msg, date = get_commit_info(local_path)
            entry["last_commit"] = msg
            entry["commit_date"] = date
        else:
            entry["last_commit"] = "unknown"
            entry["commit_date"] = "unknown"

    with open("index.json", "w", encoding="utf-8") as f:
        json.dump(blog_entries, f, indent=2)

if __name__ == "__main__":
    main()