import glob import os import re from datetime import datetime, timedelta, timezone JST = timezone(timedelta(hours=9)) def fix_lastupdated_date(content): """最終更新日のUTC表記をJSTに補正する""" # を検出 pattern = r'(最終更新日: )' match = re.search(pattern, content) if not match: return content prefix = match.group(1) iso_str = match.group(2) middle = match.group(3) display_text = match.group(4) suffix = match.group(5) utc_dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00")) jst_dt = utc_dt.astimezone(JST) new_iso = jst_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z").replace("Z", f"+09:00") new_display = jst_dt.strftime("%Y/%m/%d %H:%M") return content.replace( match.group(0), prefix + new_iso + middle + new_display + suffix ) def remove_null_characters_from_html(directory, ignore_files=[], ignore_directories=[]): html_files = glob.glob(os.path.join(directory, "**/*.html"), recursive=True) for file_path in html_files: if ( os.path.basename(file_path) in ignore_files or os.path.dirname(file_path) in ignore_directories ): continue with open(file_path, "r", encoding="utf-8") as file: content = file.read() content = content.replace("\0", "") content = fix_lastupdated_date(content) with open(file_path, "w", encoding="utf-8") as file: file.write(content) if __name__ == "__main__": target_directory = "./dist" ignore_files = ["404.html"] ignore_directories = ["assets", "img"] remove_null_characters_from_html(target_directory, ignore_files, ignore_directories)