import { readFileSync, writeFileSync } from "node:fs";
import { readdir, readFile } from "node:fs/promises";
import { join, extname, relative } from "node:path";

const DOCS_DIR = join(import.meta.dirname, "..", "src", "content", "docs");
const OUTPUT_FILE = join(import.meta.dirname, "..", "dist", "llms-full.txt");
const SITE_URL = "https://yasutakeyohei.com";

/**
 * Recursively collect all .mdx files, excluding files starting with "_" (partials).
 */
async function collectMdxFiles(dir) {
  const entries = await readdir(dir, { withFileTypes: true });
  const files = [];

  for (const entry of entries) {
    if (entry.name.startsWith("_")) continue;
    const fullPath = join(dir, entry.name);
    if (entry.isDirectory()) {
      const subFiles = await collectMdxFiles(fullPath);
      files.push(...subFiles);
    } else if (extname(entry.name) === ".mdx") {
      files.push(fullPath);
    }
  }

  return files;
}

/**
 * Derive the page URL from the file path.
 * Converts Starlight docs convention to URL path.
 */
function pathToUrl(filePath) {
  const relPath = relative(DOCS_DIR, filePath).replace(/\\/g, "/");
  let urlPath = relPath.replace(/\.mdx$/, "");

  // index.mdx → directory URL
  if (urlPath.endsWith("/index")) {
    urlPath = urlPath.replace(/\/index$/, "/");
  }

  // Root index → /
  if (urlPath === "index") {
    urlPath = "";
  }

  return `${SITE_URL}/${urlPath}`;
}

/**
 * Strip frontmatter (content between --- delimiters).
 * Returns the body content after the second "---" line.
 */
function stripFrontmatter(content) {
  // Match frontmatter delimited by --- at the start of the file
  const match = content.match(/^---\r?\n[\s\S]*?\r?\n---/);
  if (match) {
    return content.slice(match[0].length).trimStart();
  }
  return content;
}

/**
 * Strip JSX components (self-closing and paired tags with uppercase names).
 * This is a simple heuristic that removes Astro/MDX components.
 */
function stripJsxComponents(content) {
  // Remove import/export statements
  content = content.replace(
    /^import\s+[\s\S]*?(?:from\s+['"][^'"]+['"]|['"][^'"]+['"])\s*;?\s*$/gm,
    "",
  );
  content = content.replace(
    /^export\s+(?:const|let|var|function|default|async)\s+[\s\S]*?$/gm,
    "",
  );

  return content;
}

/**
 * Extract the title from the frontmatter of an MDX file.
 */
function extractTitle(content) {
  const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
  if (!match) return null;
  const frontmatter = match[1];
  const titleMatch = frontmatter.match(/^title:\s*(.+)$/m);
  return titleMatch ? titleMatch[1].trim().replace(/['"]/g, "") : null;
}

async function main() {
  console.log("Collecting MDX files...");
  const files = await collectMdxFiles(DOCS_DIR);

  // Sort: top-level files first, then by path
  files.sort((a, b) => {
    const aRel = relative(DOCS_DIR, a).replace(/\\/g, "/");
    const bRel = relative(DOCS_DIR, b).replace(/\\/g, "/");
    const aDepth = (aRel.match(/\//g) || []).length;
    const bDepth = (bRel.match(/\//g) || []).length;
    if (aDepth !== bDepth) return aDepth - bDepth;
    return aRel.localeCompare(bRel);
  });

  console.log(`Found ${files.length} MDX files.`);

  const sections = [];

  for (const file of files) {
    const rawContent = readFileSync(file, "utf-8");
    const title = extractTitle(rawContent) || pathToUrl(file);
    const url = pathToUrl(file);
    let body = stripFrontmatter(rawContent);
    body = stripJsxComponents(body);

    // Skip empty or near-empty pages
    if (body.trim().length < 10) {
      console.log(`  Skipping (too short): ${relative(DOCS_DIR, file)}`);
      continue;
    }

    sections.push(`# ${title}\n> ${url}\n\n${body.trim()}\n\n---\n`);
  }

  const fullContent = [
    "# 安竹洋平 公式サイト - 全コンテンツ\n",
    `> このファイルは AI（LLM）による学習・参照用に自動生成されています。\n`,
    `> 生成元: ${SITE_URL}\n`,
    `> 更新日: ${new Date().toISOString().split("T")[0]}\n\n`,
    ...sections,
  ].join("");

  writeFileSync(OUTPUT_FILE, fullContent, "utf-8");
  console.log(
    `Generated: ${OUTPUT_FILE} (${fullContent.length.toLocaleString()} chars)`,
  );
}

main().catch((err) => {
  console.error("Error:", err);
  process.exit(1);
});