1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
import { readFileSync, writeFileSync } from "node:fs";
import { readdir, readFile } from "node:fs/promises";
import { join, extname, relative } from "node:path";
const DOCS_DIR = join(import.meta.dirname, "..", "src", "content", "docs");
const OUTPUT_FILE = join(import.meta.dirname, "..", "dist", "llms-full.txt");
const SITE_URL = "https://yasutakeyohei.com";
/**
* Recursively collect all .mdx files, excluding files starting with "_" (partials).
*/
async function collectMdxFiles(dir) {
const entries = await readdir(dir, { withFileTypes: true });
const files = [];
for (const entry of entries) {
if (entry.name.startsWith("_")) continue;
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
const subFiles = await collectMdxFiles(fullPath);
files.push(...subFiles);
} else if (extname(entry.name) === ".mdx") {
files.push(fullPath);
}
}
return files;
}
/**
* Derive the page URL from the file path.
* Converts Starlight docs convention to URL path.
*/
function pathToUrl(filePath) {
const relPath = relative(DOCS_DIR, filePath).replace(/\\/g, "/");
let urlPath = relPath.replace(/\.mdx$/, "");
// index.mdx → directory URL
if (urlPath.endsWith("/index")) {
urlPath = urlPath.replace(/\/index$/, "/");
}
// Root index → /
if (urlPath === "index") {
urlPath = "";
}
return `${SITE_URL}/${urlPath}`;
}
/**
* Strip frontmatter (content between --- delimiters).
* Returns the body content after the second "---" line.
*/
function stripFrontmatter(content) {
// Match frontmatter delimited by --- at the start of the file
const match = content.match(/^---\r?\n[\s\S]*?\r?\n---/);
if (match) {
return content.slice(match[0].length).trimStart();
}
return content;
}
/**
* Strip JSX components (self-closing and paired tags with uppercase names).
* This is a simple heuristic that removes Astro/MDX components.
*/
function stripJsxComponents(content) {
// Remove import/export statements
content = content.replace(
/^import\s+[\s\S]*?(?:from\s+['"][^'"]+['"]|['"][^'"]+['"])\s*;?\s*$/gm,
"",
);
content = content.replace(
/^export\s+(?:const|let|var|function|default|async)\s+[\s\S]*?$/gm,
"",
);
return content;
}
/**
* Extract the title from the frontmatter of an MDX file.
*/
function extractTitle(content) {
const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
if (!match) return null;
const frontmatter = match[1];
const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
return titleMatch ? titleMatch[1].trim().replace(/['"]/g, "") : null;
}
async function main() {
console.log("Collecting MDX files...");
const files = await collectMdxFiles(DOCS_DIR);
// Sort: top-level files first, then by path
files.sort((a, b) => {
const aRel = relative(DOCS_DIR, a).replace(/\\/g, "/");
const bRel = relative(DOCS_DIR, b).replace(/\\/g, "/");
const aDepth = (aRel.match(/\//g) || []).length;
const bDepth = (bRel.match(/\//g) || []).length;
if (aDepth !== bDepth) return aDepth - bDepth;
return aRel.localeCompare(bRel);
});
console.log(`Found ${files.length} MDX files.`);
const sections = [];
for (const file of files) {
const rawContent = readFileSync(file, "utf-8");
const title = extractTitle(rawContent) || pathToUrl(file);
const url = pathToUrl(file);
let body = stripFrontmatter(rawContent);
body = stripJsxComponents(body);
// Skip empty or near-empty pages
if (body.trim().length < 10) {
console.log(` Skipping (too short): ${relative(DOCS_DIR, file)}`);
continue;
}
sections.push(`# ${title}\n> ${url}\n\n${body.trim()}\n\n---\n`);
}
const fullContent = [
"# 安竹洋平 公式サイト - 全コンテンツ\n",
`> このファイルは AI(LLM)による学習・参照用に自動生成されています。\n`,
`> 生成元: ${SITE_URL}\n`,
`> 更新日: ${new Date().toISOString().split("T")[0]}\n\n`,
...sections,
].join("");
writeFileSync(OUTPUT_FILE, fullContent, "utf-8");
console.log(
`Generated: ${OUTPUT_FILE} (${fullContent.length.toLocaleString()} chars)`,
);
}
main().catch((err) => {
console.error("Error:", err);
process.exit(1);
});
|