aboutsummaryrefslogtreecommitdiffhomepage
path: root/scripts
diff options
context:
space:
mode:
authorYasutake Yohei <61961825+yasutakeyohei@users.noreply.github.com>2026-06-21 19:05:27 +0900
committerYasutake Yohei <61961825+yasutakeyohei@users.noreply.github.com>2026-06-21 19:05:27 +0900
commit32e2ab7749480d294b79e1e550daae07b778d1d1 (patch)
treeab3fe085cc8be7771b3b770d83a4b58d4aa58f9c /scripts
parentcb3175dfc4f69ee26815d1bcb0d4511b377dddf4 (diff)
AI向けに llms.txt と llms-full.txt を生成する仕組みを追加
AI(LLM)がサイトを参照・学習しやすくするため、以下を追加: - public/llms.txt: 全ページをカテゴリ別に列挙した AI 向けサイトマップ - scripts/generate-llms-full.mjs: 全 MDX ファイルの本文を収集・結合する生成スクリプト - package.json の build スクリプトに生成処理を追加 ビルド時に dist/llms.txt と dist/llms-full.txt が自動生成される。
Diffstat (limited to 'scripts')
-rw-r--r--scripts/generate-llms-full.mjs144
1 files changed, 144 insertions, 0 deletions
diff --git a/scripts/generate-llms-full.mjs b/scripts/generate-llms-full.mjs
new file mode 100644
index 0000000..cc3fdd4
--- /dev/null
+++ b/scripts/generate-llms-full.mjs
@@ -0,0 +1,144 @@
+import { readFileSync, writeFileSync } from "node:fs";
+import { readdir, readFile } from "node:fs/promises";
+import { join, extname, relative } from "node:path";
+
+const DOCS_DIR = join(import.meta.dirname, "..", "src", "content", "docs");
+const OUTPUT_FILE = join(import.meta.dirname, "..", "dist", "llms-full.txt");
+const SITE_URL = "https://yasutakeyohei.com";
+
+/**
+ * Recursively collect all .mdx files, excluding files starting with "_" (partials).
+ */
+async function collectMdxFiles(dir) {
+ const entries = await readdir(dir, { withFileTypes: true });
+ const files = [];
+
+ for (const entry of entries) {
+ if (entry.name.startsWith("_")) continue;
+ const fullPath = join(dir, entry.name);
+ if (entry.isDirectory()) {
+ const subFiles = await collectMdxFiles(fullPath);
+ files.push(...subFiles);
+ } else if (extname(entry.name) === ".mdx") {
+ files.push(fullPath);
+ }
+ }
+
+ return files;
+}
+
+/**
+ * Derive the page URL from the file path.
+ * Converts Starlight docs convention to URL path.
+ */
+function pathToUrl(filePath) {
+ const relPath = relative(DOCS_DIR, filePath).replace(/\\/g, "/");
+ let urlPath = relPath.replace(/\.mdx$/, "");
+
+ // index.mdx → directory URL
+ if (urlPath.endsWith("/index")) {
+ urlPath = urlPath.replace(/\/index$/, "/");
+ }
+
+ // Root index → /
+ if (urlPath === "index") {
+ urlPath = "";
+ }
+
+ return `${SITE_URL}/${urlPath}`;
+}
+
+/**
+ * Strip frontmatter (content between --- delimiters).
+ * Returns the body content after the second "---" line.
+ */
+function stripFrontmatter(content) {
+ // Match frontmatter delimited by --- at the start of the file
+ const match = content.match(/^---\r?\n[\s\S]*?\r?\n---/);
+ if (match) {
+ return content.slice(match[0].length).trimStart();
+ }
+ return content;
+}
+
+/**
+ * Strip JSX components (self-closing and paired tags with uppercase names).
+ * This is a simple heuristic that removes Astro/MDX components.
+ */
+function stripJsxComponents(content) {
+ // Remove import/export statements
+ content = content.replace(
+ /^import\s+[\s\S]*?(?:from\s+['"][^'"]+['"]|['"][^'"]+['"])\s*;?\s*$/gm,
+ "",
+ );
+ content = content.replace(
+ /^export\s+(?:const|let|var|function|default|async)\s+[\s\S]*?$/gm,
+ "",
+ );
+
+ return content;
+}
+
+/**
+ * Extract the title from the frontmatter of an MDX file.
+ */
+function extractTitle(content) {
+ const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
+ if (!match) return null;
+ const frontmatter = match[1];
+ const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
+ return titleMatch ? titleMatch[1].trim().replace(/['"]/g, "") : null;
+}
+
+async function main() {
+ console.log("Collecting MDX files...");
+ const files = await collectMdxFiles(DOCS_DIR);
+
+ // Sort: top-level files first, then by path
+ files.sort((a, b) => {
+ const aRel = relative(DOCS_DIR, a).replace(/\\/g, "/");
+ const bRel = relative(DOCS_DIR, b).replace(/\\/g, "/");
+ const aDepth = (aRel.match(/\//g) || []).length;
+ const bDepth = (bRel.match(/\//g) || []).length;
+ if (aDepth !== bDepth) return aDepth - bDepth;
+ return aRel.localeCompare(bRel);
+ });
+
+ console.log(`Found ${files.length} MDX files.`);
+
+ const sections = [];
+
+ for (const file of files) {
+ const rawContent = readFileSync(file, "utf-8");
+ const title = extractTitle(rawContent) || pathToUrl(file);
+ const url = pathToUrl(file);
+ let body = stripFrontmatter(rawContent);
+ body = stripJsxComponents(body);
+
+ // Skip empty or near-empty pages
+ if (body.trim().length < 10) {
+ console.log(` Skipping (too short): ${relative(DOCS_DIR, file)}`);
+ continue;
+ }
+
+ sections.push(`# ${title}\n> ${url}\n\n${body.trim()}\n\n---\n`);
+ }
+
+ const fullContent = [
+ "# 安竹洋平 公式サイト - 全コンテンツ\n",
+ `> このファイルは AI(LLM)による学習・参照用に自動生成されています。\n`,
+ `> 生成元: ${SITE_URL}\n`,
+ `> 更新日: ${new Date().toISOString().split("T")[0]}\n\n`,
+ ...sections,
+ ].join("");
+
+ writeFileSync(OUTPUT_FILE, fullContent, "utf-8");
+ console.log(
+ `Generated: ${OUTPUT_FILE} (${fullContent.length.toLocaleString()} chars)`,
+ );
+}
+
+main().catch((err) => {
+ console.error("Error:", err);
+ process.exit(1);
+});