From 32e2ab7749480d294b79e1e550daae07b778d1d1 Mon Sep 17 00:00:00 2001
From: Yasutake Yohei <61961825+yasutakeyohei@users.noreply.github.com>
Date: Sun, 21 Jun 2026 19:05:27 +0900
Subject: AI向けに llms.txt と llms-full.txt を生成する仕組みを追加
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AI（LLM）がサイトを参照・学習しやすくするため、以下を追加:

- public/llms.txt: 全ページをカテゴリ別に列挙した AI 向けサイトマップ
- scripts/generate-llms-full.mjs: 全 MDX ファイルの本文を収集・結合する生成スクリプト
- package.json の build スクリプトに生成処理を追加

ビルド時に dist/llms.txt と dist/llms-full.txt が自動生成される。
---
 scripts/generate-llms-full.mjs | 144 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 scripts/generate-llms-full.mjs

(limited to 'scripts/generate-llms-full.mjs')

diff --git a/scripts/generate-llms-full.mjs b/scripts/generate-llms-full.mjs
new file mode 100644
index 0000000..cc3fdd4
--- /dev/null
+++ b/scripts/generate-llms-full.mjs
@@ -0,0 +1,144 @@
+import { readFileSync, writeFileSync } from "node:fs";
+import { readdir, readFile } from "node:fs/promises";
+import { join, extname, relative } from "node:path";
+
+const DOCS_DIR = join(import.meta.dirname, "..", "src", "content", "docs");
+const OUTPUT_FILE = join(import.meta.dirname, "..", "dist", "llms-full.txt");
+const SITE_URL = "https://yasutakeyohei.com";
+
+/**
+ * Recursively collect all .mdx files, excluding files starting with "_" (partials).
+ */
+async function collectMdxFiles(dir) {
+  const entries = await readdir(dir, { withFileTypes: true });
+  const files = [];
+
+  for (const entry of entries) {
+    if (entry.name.startsWith("_")) continue;
+    const fullPath = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      const subFiles = await collectMdxFiles(fullPath);
+      files.push(...subFiles);
+    } else if (extname(entry.name) === ".mdx") {
+      files.push(fullPath);
+    }
+  }
+
+  return files;
+}
+
+/**
+ * Derive the page URL from the file path.
+ * Converts Starlight docs convention to URL path.
+ */
+function pathToUrl(filePath) {
+  const relPath = relative(DOCS_DIR, filePath).replace(/\\/g, "/");
+  let urlPath = relPath.replace(/\.mdx$/, "");
+
+  // index.mdx → directory URL
+  if (urlPath.endsWith("/index")) {
+    urlPath = urlPath.replace(/\/index$/, "/");
+  }
+
+  // Root index → /
+  if (urlPath === "index") {
+    urlPath = "";
+  }
+
+  return `${SITE_URL}/${urlPath}`;
+}
+
+/**
+ * Strip frontmatter (content between --- delimiters).
+ * Returns the body content after the second "---" line.
+ */
+function stripFrontmatter(content) {
+  // Match frontmatter delimited by --- at the start of the file
+  const match = content.match(/^---\r?\n[\s\S]*?\r?\n---/);
+  if (match) {
+    return content.slice(match[0].length).trimStart();
+  }
+  return content;
+}
+
+/**
+ * Strip JSX components (self-closing and paired tags with uppercase names).
+ * This is a simple heuristic that removes Astro/MDX components.
+ */
+function stripJsxComponents(content) {
+  // Remove import/export statements
+  content = content.replace(
+    /^import\s+[\s\S]*?(?:from\s+['"][^'"]+['"]|['"][^'"]+['"])\s*;?\s*$/gm,
+    "",
+  );
+  content = content.replace(
+    /^export\s+(?:const|let|var|function|default|async)\s+[\s\S]*?$/gm,
+    "",
+  );
+
+  return content;
+}
+
+/**
+ * Extract the title from the frontmatter of an MDX file.
+ */
+function extractTitle(content) {
+  const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
+  if (!match) return null;
+  const frontmatter = match[1];
+  const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
+  return titleMatch ? titleMatch[1].trim().replace(/['"]/g, "") : null;
+}
+
+async function main() {
+  console.log("Collecting MDX files...");
+  const files = await collectMdxFiles(DOCS_DIR);
+
+  // Sort: top-level files first, then by path
+  files.sort((a, b) => {
+    const aRel = relative(DOCS_DIR, a).replace(/\\/g, "/");
+    const bRel = relative(DOCS_DIR, b).replace(/\\/g, "/");
+    const aDepth = (aRel.match(/\//g) || []).length;
+    const bDepth = (bRel.match(/\//g) || []).length;
+    if (aDepth !== bDepth) return aDepth - bDepth;
+    return aRel.localeCompare(bRel);
+  });
+
+  console.log(`Found ${files.length} MDX files.`);
+
+  const sections = [];
+
+  for (const file of files) {
+    const rawContent = readFileSync(file, "utf-8");
+    const title = extractTitle(rawContent) || pathToUrl(file);
+    const url = pathToUrl(file);
+    let body = stripFrontmatter(rawContent);
+    body = stripJsxComponents(body);
+
+    // Skip empty or near-empty pages
+    if (body.trim().length < 10) {
+      console.log(`  Skipping (too short): ${relative(DOCS_DIR, file)}`);
+      continue;
+    }
+
+    sections.push(`# ${title}\n> ${url}\n\n${body.trim()}\n\n---\n`);
+  }
+
+  const fullContent = [
+    "# 安竹洋平 公式サイト - 全コンテンツ\n",
+    `> このファイルは AI（LLM）による学習・参照用に自動生成されています。\n`,
+    `> 生成元: ${SITE_URL}\n`,
+    `> 更新日: ${new Date().toISOString().split("T")[0]}\n\n`,
+    ...sections,
+  ].join("");
+
+  writeFileSync(OUTPUT_FILE, fullContent, "utf-8");
+  console.log(
+    `Generated: ${OUTPUT_FILE} (${fullContent.length.toLocaleString()} chars)`,
+  );
+}
+
+main().catch((err) => {
+  console.error("Error:", err);
+  process.exit(1);
+});
-- 
cgit v1.3.1