import re import json import subprocess from urllib.parse import urlparse def get_local_path_from_url(url): """ Converts blog URL to local file path assuming structure: https://.../src/branch/main/ --> blog/ """ path = urlparse(url).path parts = path.strip("/").split("/") try: idx = parts.index("main") relpath = "/".join(parts[idx + 1:]) return f"{relpath}" except ValueError: return None def get_commit_info(filepath): try: result = subprocess.run( ["git", "log", "-1", "--pretty=%s%n%cI", "--", filepath], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, check=True ) output = result.stdout.strip().split("\n") if len(output) < 2: return "uncommitted or no history", "unknown" return output[0], output[1] except subprocess.CalledProcessError: return "uncommitted or error", "unknown" def parse_blog_links(md_text): pattern = re.compile(r"- \[(.*?)\]\((.*?)\)") return [{"title": title, "url": url} for title, url in pattern.findall(md_text)] def main(): with open("README.md", "r", encoding="utf-8") as f: md_content = f.read() blog_entries = parse_blog_links(md_content) for entry in blog_entries: local_path = get_local_path_from_url(entry["url"]) if local_path: msg, date = get_commit_info(local_path) else: msg, date = "invalid url", "unknown" entry["last_commit"] = msg entry["commit_date"] = date with open("index.json", "w", encoding="utf-8") as f: json.dump(blog_entries, f, indent=2) if __name__ == "__main__": main()