blog/index_gen.py
2025-05-22 16:44:34 +00:00

62 lines
1.7 KiB
Python

import re
import json
import subprocess
from urllib.parse import urlparse
def get_local_path_from_url(url):
"""
Converts blog URL to local file path assuming structure:
https://.../src/branch/main/<file> --> blog/<file>
"""
path = urlparse(url).path
parts = path.strip("/").split("/")
try:
idx = parts.index("main")
relpath = "/".join(parts[idx + 1:])
return f"blog/{relpath}"
except ValueError:
return None
def get_commit_info(filepath):
try:
result = subprocess.run(
["git", "log", "-1", "--pretty=%s%n%cI", "--", filepath],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
check=True
)
output = result.stdout.strip().split("\n")
if len(output) < 2:
return "uncommitted or no history", "unknown"
return output[0], output[1]
except subprocess.CalledProcessError:
return "uncommitted or error", "unknown"
def parse_blog_links(md_text):
pattern = re.compile(r"- \[(.*?)\]\((.*?)\)")
return [{"title": title, "url": url} for title, url in pattern.findall(md_text)]
def main():
with open("README.md", "r", encoding="utf-8") as f:
md_content = f.read()
blog_entries = parse_blog_links(md_content)
for entry in blog_entries:
local_path = get_local_path_from_url(entry["url"])
if local_path:
msg, date = get_commit_info(local_path)
else:
msg, date = "invalid url", "unknown"
entry["last_commit"] = msg
entry["commit_date"] = date
with open("index.json", "w", encoding="utf-8") as f:
json.dump(blog_entries, f, indent=2)
if __name__ == "__main__":
main()