aboutsummaryrefslogtreecommitdiffhomepage
path: root/removeNullCharacters.py
blob: 758d03d26f9c7fd3079dd9c89b871f69c7193049 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import glob
import os
import re
from datetime import datetime, timedelta, timezone

JST = timezone(timedelta(hours=9))


def fix_lastupdated_date(content):
    """最終更新日のUTC表記をJSTに補正する"""
    # <time datetime="2026-06-13T16:57:17.000Z">2026/06/13</time> を検出
    pattern = r'(最終更新日: <time datetime=")([^"]+)(">)([^<]+)(</time>)'
    match = re.search(pattern, content)
    if not match:
        return content

    prefix = match.group(1)
    iso_str = match.group(2)
    middle = match.group(3)
    display_text = match.group(4)
    suffix = match.group(5)

    utc_dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
    jst_dt = utc_dt.astimezone(JST)

    new_iso = jst_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z").replace("Z", f"+09:00")
    new_display = jst_dt.strftime("%Y/%m/%d %H:%M")

    return content.replace(
        match.group(0), prefix + new_iso + middle + new_display + suffix
    )


def remove_null_characters_from_html(directory, ignore_files=[], ignore_directories=[]):
    html_files = glob.glob(os.path.join(directory, "**/*.html"), recursive=True)

    for file_path in html_files:
        if (
            os.path.basename(file_path) in ignore_files
            or os.path.dirname(file_path) in ignore_directories
        ):
            continue

        with open(file_path, "r", encoding="utf-8") as file:
            content = file.read()

        content = content.replace("\0", "")
        content = fix_lastupdated_date(content)

        with open(file_path, "w", encoding="utf-8") as file:
            file.write(content)


if __name__ == "__main__":
    target_directory = "./dist"
    ignore_files = ["404.html"]
    ignore_directories = ["assets", "img"]
    remove_null_characters_from_html(target_directory, ignore_files, ignore_directories)