aboutsummaryrefslogtreecommitdiffhomepage
path: root/removeNullCharacters.py
diff options
context:
space:
mode:
Diffstat (limited to 'removeNullCharacters.py')
-rw-r--r--removeNullCharacters.py58
1 files changed, 58 insertions, 0 deletions
diff --git a/removeNullCharacters.py b/removeNullCharacters.py
new file mode 100644
index 0000000..758d03d
--- /dev/null
+++ b/removeNullCharacters.py
@@ -0,0 +1,58 @@
+import glob
+import os
+import re
+from datetime import datetime, timedelta, timezone
+
+JST = timezone(timedelta(hours=9))
+
+
+def fix_lastupdated_date(content):
+ """最終更新日のUTC表記をJSTに補正する"""
+ # <time datetime="2026-06-13T16:57:17.000Z">2026/06/13</time> を検出
+ pattern = r'(最終更新日: <time datetime=")([^"]+)(">)([^<]+)(</time>)'
+ match = re.search(pattern, content)
+ if not match:
+ return content
+
+ prefix = match.group(1)
+ iso_str = match.group(2)
+ middle = match.group(3)
+ display_text = match.group(4)
+ suffix = match.group(5)
+
+ utc_dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
+ jst_dt = utc_dt.astimezone(JST)
+
+ new_iso = jst_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z").replace("Z", f"+09:00")
+ new_display = jst_dt.strftime("%Y/%m/%d %H:%M")
+
+ return content.replace(
+ match.group(0), prefix + new_iso + middle + new_display + suffix
+ )
+
+
+def remove_null_characters_from_html(directory, ignore_files=[], ignore_directories=[]):
+ html_files = glob.glob(os.path.join(directory, "**/*.html"), recursive=True)
+
+ for file_path in html_files:
+ if (
+ os.path.basename(file_path) in ignore_files
+ or os.path.dirname(file_path) in ignore_directories
+ ):
+ continue
+
+ with open(file_path, "r", encoding="utf-8") as file:
+ content = file.read()
+
+ content = content.replace("\0", "")
+ content = fix_lastupdated_date(content)
+
+ with open(file_path, "w", encoding="utf-8") as file:
+ file.write(content)
+
+
+if __name__ == "__main__":
+ target_directory = "./dist"
+ ignore_files = ["404.html"]
+ ignore_directories = ["assets", "img"]
+ remove_null_characters_from_html(target_directory, ignore_files, ignore_directories)