diff options
| -rw-r--r-- | package.json | 2 | ||||
| -rw-r--r-- | public/llms.txt | 51 | ||||
| -rw-r--r-- | scripts/generate-llms-full.mjs | 144 |
3 files changed, 196 insertions, 1 deletions
diff --git a/package.json b/package.json index 5fa0846..ad8b7c0 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "scripts": { "dev": "astro dev", "start": "astro dev", - "build": "astro build && py -c \"import shutil; shutil.copy('public/.htaccess', 'dist/.htaccess')\" && py removeNullCharacters.py", + "build": "astro build && py -c \"import shutil; shutil.copy('public/.htaccess', 'dist/.htaccess')\" && py removeNullCharacters.py && node scripts/generate-llms-full.mjs", "deploy": "sh deploy.sh", "preview": "astro preview", "postinstall": "patch-package", diff --git a/public/llms.txt b/public/llms.txt new file mode 100644 index 0000000..bfbe905 --- /dev/null +++ b/public/llms.txt @@ -0,0 +1,51 @@ +# 安竹洋平 公式サイト(小平市議会議員) +> 小平市議会議員 安竹洋平の公式サイトです。一般質問や議会での活動、いじめ、発達関連の情報をまとめています。 + +## 基本情報 +- サイトURL: https://yasutakeyohei.com/ +- 運営者: 安竹洋平(小平市議会議員、完全無所属) +- お問い合わせ: yohei@yasutakeyohei.com + +## メインページ +- ホーム: https://yasutakeyohei.com/ +- 実績概要: https://yasutakeyohei.com/jisseki/ +- 私の方針: https://yasutakeyohei.com/policy/ +- ご支援: https://yasutakeyohei.com/support/ +- コンタクト: https://yasutakeyohei.com/contact/ + +## 実績 +- 公文書管理の不正追及の軌跡: https://yasutakeyohei.com/koubunsyo-kanri/ +- いじめ重大事態への対応の軌跡: https://yasutakeyohei.com/ijime-judai-jitai/ +- 障害者福祉施設における虐待通報対応の軌跡: https://yasutakeyohei.com/fukushi-shisetsu-gyakutai/ +- 合気公園の軌跡: https://yasutakeyohei.com/aiki-kouen/ +- 情緒固定級の軌跡: https://yasutakeyohei.com/joutyo-koteikyu/ +- 過剰な制限緩和の軌跡: https://yasutakeyohei.com/kajo-seigen-kanwa/ +- 東京サレジオ学園北側開発問題の軌跡: https://yasutakeyohei.com/saresio-kaihatu/ +- ワクチン副反応救済制度の適正化の軌跡: https://yasutakeyohei.com/vaccine-kyuusai-tekiseika/ +- ディスレクシア(読み書き障害)対応の軌跡: https://yasutakeyohei.com/dislexia-taiou/ + +## 一般質問 +- 一覧: https://yasutakeyohei.com/ippan-situmon/ +- 令和7年度: https://yasutakeyohei.com/ippan-situmon/r7d/ +- 令和6年度: https://yasutakeyohei.com/ippan-situmon/r6d/ +- 令和5年度: https://yasutakeyohei.com/ippan-situmon/r5d/ +- 令和4年度: https://yasutakeyohei.com/ippan-situmon/r4d/ +- 令和3年度: https://yasutakeyohei.com/ippan-situmon/r3d/ +- 令和2年度: https://yasutakeyohei.com/ippan-situmon/r2d/ +- 令和元年度: https://yasutakeyohei.com/ippan-situmon/r1d/ + +## ディスレクシアについて +- 概要: https://yasutakeyohei.com/about-dyslexia/ +- ディスレクシアとは: https://yasutakeyohei.com/about-dyslexia/what-is-dyslexia/ +- ディスレクシアの著名人: https://yasutakeyohei.com/about-dyslexia/celebrities/ +- 本人や保護者の声: https://yasutakeyohei.com/about-dyslexia/voices/ +- 潜在的人数と文科省調査: https://yasutakeyohei.com/about-dyslexia/potential-number/ +- 見過ごさないためのアセスメント: https://yasutakeyohei.com/about-dyslexia/assessment/ +- デイジー教科書と音声教材: https://yasutakeyohei.com/about-dyslexia/daisy-and-onsei/ +- 合理的配慮と関連法律: https://yasutakeyohei.com/about-dyslexia/reasonable-accommodation/ +- 小平市の状況: https://yasutakeyohei.com/about-dyslexia/kodaira/ +- 就学相談・通級指導: https://yasutakeyohei.com/about-dyslexia/school-consultation/ +- 情緒固定級とは: https://yasutakeyohei.com/about-dyslexia/emotional-support-class/ +- チャレンジスクール等: https://yasutakeyohei.com/about-dyslexia/high-school-options/ +- 長期欠席支援シート: https://yasutakeyohei.com/about-dyslexia/support-sheet/ +- 支援リソース・参考情報: https://yasutakeyohei.com/about-dyslexia/support-resources/ diff --git a/scripts/generate-llms-full.mjs b/scripts/generate-llms-full.mjs new file mode 100644 index 0000000..cc3fdd4 --- /dev/null +++ b/scripts/generate-llms-full.mjs @@ -0,0 +1,144 @@ +import { readFileSync, writeFileSync } from "node:fs"; +import { readdir, readFile } from "node:fs/promises"; +import { join, extname, relative } from "node:path"; + +const DOCS_DIR = join(import.meta.dirname, "..", "src", "content", "docs"); +const OUTPUT_FILE = join(import.meta.dirname, "..", "dist", "llms-full.txt"); +const SITE_URL = "https://yasutakeyohei.com"; + +/** + * Recursively collect all .mdx files, excluding files starting with "_" (partials). + */ +async function collectMdxFiles(dir) { + const entries = await readdir(dir, { withFileTypes: true }); + const files = []; + + for (const entry of entries) { + if (entry.name.startsWith("_")) continue; + const fullPath = join(dir, entry.name); + if (entry.isDirectory()) { + const subFiles = await collectMdxFiles(fullPath); + files.push(...subFiles); + } else if (extname(entry.name) === ".mdx") { + files.push(fullPath); + } + } + + return files; +} + +/** + * Derive the page URL from the file path. + * Converts Starlight docs convention to URL path. + */ +function pathToUrl(filePath) { + const relPath = relative(DOCS_DIR, filePath).replace(/\\/g, "/"); + let urlPath = relPath.replace(/\.mdx$/, ""); + + // index.mdx → directory URL + if (urlPath.endsWith("/index")) { + urlPath = urlPath.replace(/\/index$/, "/"); + } + + // Root index → / + if (urlPath === "index") { + urlPath = ""; + } + + return `${SITE_URL}/${urlPath}`; +} + +/** + * Strip frontmatter (content between --- delimiters). + * Returns the body content after the second "---" line. + */ +function stripFrontmatter(content) { + // Match frontmatter delimited by --- at the start of the file + const match = content.match(/^---\r?\n[\s\S]*?\r?\n---/); + if (match) { + return content.slice(match[0].length).trimStart(); + } + return content; +} + +/** + * Strip JSX components (self-closing and paired tags with uppercase names). + * This is a simple heuristic that removes Astro/MDX components. + */ +function stripJsxComponents(content) { + // Remove import/export statements + content = content.replace( + /^import\s+[\s\S]*?(?:from\s+['"][^'"]+['"]|['"][^'"]+['"])\s*;?\s*$/gm, + "", + ); + content = content.replace( + /^export\s+(?:const|let|var|function|default|async)\s+[\s\S]*?$/gm, + "", + ); + + return content; +} + +/** + * Extract the title from the frontmatter of an MDX file. + */ +function extractTitle(content) { + const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!match) return null; + const frontmatter = match[1]; + const titleMatch = frontmatter.match(/^title:\s*(.+)$/m); + return titleMatch ? titleMatch[1].trim().replace(/['"]/g, "") : null; +} + +async function main() { + console.log("Collecting MDX files..."); + const files = await collectMdxFiles(DOCS_DIR); + + // Sort: top-level files first, then by path + files.sort((a, b) => { + const aRel = relative(DOCS_DIR, a).replace(/\\/g, "/"); + const bRel = relative(DOCS_DIR, b).replace(/\\/g, "/"); + const aDepth = (aRel.match(/\//g) || []).length; + const bDepth = (bRel.match(/\//g) || []).length; + if (aDepth !== bDepth) return aDepth - bDepth; + return aRel.localeCompare(bRel); + }); + + console.log(`Found ${files.length} MDX files.`); + + const sections = []; + + for (const file of files) { + const rawContent = readFileSync(file, "utf-8"); + const title = extractTitle(rawContent) || pathToUrl(file); + const url = pathToUrl(file); + let body = stripFrontmatter(rawContent); + body = stripJsxComponents(body); + + // Skip empty or near-empty pages + if (body.trim().length < 10) { + console.log(` Skipping (too short): ${relative(DOCS_DIR, file)}`); + continue; + } + + sections.push(`# ${title}\n> ${url}\n\n${body.trim()}\n\n---\n`); + } + + const fullContent = [ + "# 安竹洋平 公式サイト - 全コンテンツ\n", + `> このファイルは AI(LLM)による学習・参照用に自動生成されています。\n`, + `> 生成元: ${SITE_URL}\n`, + `> 更新日: ${new Date().toISOString().split("T")[0]}\n\n`, + ...sections, + ].join(""); + + writeFileSync(OUTPUT_FILE, fullContent, "utf-8"); + console.log( + `Generated: ${OUTPUT_FILE} (${fullContent.length.toLocaleString()} chars)`, + ); +} + +main().catch((err) => { + console.error("Error:", err); + process.exit(1); +}); |
