diff options
Diffstat (limited to 'removeNullCharacters.py')
| -rw-r--r-- | removeNullCharacters.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/removeNullCharacters.py b/removeNullCharacters.py new file mode 100644 index 0000000..758d03d --- /dev/null +++ b/removeNullCharacters.py @@ -0,0 +1,58 @@ +import glob +import os +import re +from datetime import datetime, timedelta, timezone + +JST = timezone(timedelta(hours=9)) + + +def fix_lastupdated_date(content): + """最終更新日のUTC表記をJSTに補正する""" + # <time datetime="2026-06-13T16:57:17.000Z">2026/06/13</time> を検出 + pattern = r'(最終更新日: <time datetime=")([^"]+)(">)([^<]+)(</time>)' + match = re.search(pattern, content) + if not match: + return content + + prefix = match.group(1) + iso_str = match.group(2) + middle = match.group(3) + display_text = match.group(4) + suffix = match.group(5) + + utc_dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00")) + jst_dt = utc_dt.astimezone(JST) + + new_iso = jst_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z").replace("Z", f"+09:00") + new_display = jst_dt.strftime("%Y/%m/%d %H:%M") + + return content.replace( + match.group(0), prefix + new_iso + middle + new_display + suffix + ) + + +def remove_null_characters_from_html(directory, ignore_files=[], ignore_directories=[]): + html_files = glob.glob(os.path.join(directory, "**/*.html"), recursive=True) + + for file_path in html_files: + if ( + os.path.basename(file_path) in ignore_files + or os.path.dirname(file_path) in ignore_directories + ): + continue + + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + + content = content.replace("\0", "") + content = fix_lastupdated_date(content) + + with open(file_path, "w", encoding="utf-8") as file: + file.write(content) + + +if __name__ == "__main__": + target_directory = "./dist" + ignore_files = ["404.html"] + ignore_directories = ["assets", "img"] + remove_null_characters_from_html(target_directory, ignore_files, ignore_directories) |
