From 5e1f2669e6c55d4e73ad8637f66d5752bdb437c6 Mon Sep 17 00:00:00 2001 From: Marc Mintel Date: Sun, 8 Mar 2026 14:01:37 +0100 Subject: [PATCH] feat(kabelfachmann-mcp): add local Ollama support for KABELFACHMANN_LLM_PROVIDER --- .env | 6 + package.json | 5 +- packages/gitea-mcp/src/index.ts | 57 ++++++++-- packages/kabelfachmann-mcp/src/index.ts | 4 +- packages/kabelfachmann-mcp/src/ingest.ts | 139 +++++++++++++++-------- packages/kabelfachmann-mcp/src/llm.ts | 51 ++++++++- 6 files changed, 199 insertions(+), 63 deletions(-) diff --git a/.env b/.env index b0fb29d..1e221df 100644 --- a/.env +++ b/.env @@ -3,6 +3,7 @@ IMAGE_TAG=v1.8.19 PROJECT_NAME=at-mintel PROJECT_COLOR=#82ed20 GITEA_TOKEN=ccce002e30fe16a31a6c9d5a414740af2f72a582 +GITEA_HOST=https://git.infra.mintel.me OPENROUTER_API_KEY=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 ZYTE_API_KEY=1f0f74726f044f55aaafc7ead32cd489 REPLICATE_API_KEY=r8_W3grtpXMRfi0u3AM9VdkKbuWdZMmhwU2Tn0yt @@ -11,6 +12,11 @@ DATA_FOR_SEO_API_KEY=bWFyY0BtaW50ZWwubWU6MjQ0YjBjZmIzOGY3NTIzZA== DATA_FOR_SEO_LOGIN=marc@mintel.me DATA_FOR_SEO_PASSWORD=244b0cfb38f7523d +# Kabelfachmann LLM Configuration +KABELFACHMANN_LLM_PROVIDER=ollama +KABELFACHMANN_OLLAMA_MODEL=qwen3.5 +KABELFACHMANN_OLLAMA_HOST=http://127.0.0.1:11434 + # Authentication GATEKEEPER_PASSWORD=mintel AUTH_COOKIE_NAME=mintel_gatekeeper_session diff --git a/package.json b/package.json index cbc1f1e..517a951 100644 --- a/package.json +++ b/package.json @@ -6,11 +6,12 @@ "build": "pnpm -r build", "dev": "pnpm -r dev", "dev:gatekeeper": "bash -c 'trap \"COMPOSE_PROJECT_NAME=gatekeeper docker-compose -f docker-compose.gatekeeper.yml down\" EXIT INT TERM; docker network create infra 2>/dev/null || true && COMPOSE_PROJECT_NAME=gatekeeper docker-compose -f docker-compose.gatekeeper.yml down && COMPOSE_PROJECT_NAME=gatekeeper docker-compose -f docker-compose.gatekeeper.yml up --build --remove-orphans'", - "dev:mcps:up": "docker-compose -f docker-compose.mcps.yml up -d", + "dev:mcps:up": "docker-compose -f docker-compose.mcps.yml up -d --build --remove-orphans", "dev:mcps:down": "docker-compose -f docker-compose.mcps.yml down", "dev:mcps:watch": "pnpm -r --filter=\"./packages/*-mcp\" exec tsc -w", "dev:mcps": "npm run dev:mcps:up && npm run dev:mcps:watch", "start:mcps": "npm run dev:mcps:up", + "start:mcps:force": "docker-compose -f docker-compose.mcps.yml up -d --build --force-recreate --remove-orphans", "lint": "pnpm -r --filter='./packages/**' --filter='./apps/**' lint", "test": "pnpm -r test", "changeset": "changeset", @@ -74,4 +75,4 @@ "@sentry/nextjs": "10.38.0" } } -} +} \ No newline at end of file diff --git a/packages/gitea-mcp/src/index.ts b/packages/gitea-mcp/src/index.ts index e9a39d2..aab79ab 100644 --- a/packages/gitea-mcp/src/index.ts +++ b/packages/gitea-mcp/src/index.ts @@ -503,7 +503,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }, ); - const runs = (runsResponse.data.workflow_runs || []) as any[]; + const runs = (runsResponse.data.workflow_runs || []).slice(0, limit) as any[]; const enhancedRuns = await Promise.all( runs.map(async (run: any) => { try { @@ -557,17 +557,50 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { `/repos/${owner}/${repo}/actions/runs/${run_id}/jobs`, ); const jobs = (jobsResponse.data.jobs || []) as any[]; - const logs = jobs.map((job: any) => ({ - job_id: job.id, - job_name: job.name, - status: job.status, - conclusion: job.conclusion, - steps: (job.steps || []).map((step: any) => ({ - name: step.name, - status: step.status, - conclusion: step.conclusion, - })), - })); + const logs = await Promise.all( + jobs.map(async (job: any) => { + let console_log = undefined; + + // Fetch log text if job failed to provide context + if (job.conclusion === "failure" || job.status === "failure") { + try { + const logResponse = await giteaClient.get( + `/repos/${owner}/${repo}/actions/jobs/${job.id}/logs`, + ); + if (typeof logResponse.data === "string") { + let fullLog = logResponse.data; + // Strip ANSI escape codes + fullLog = fullLog.replace(/\x1B\[[0-9;]*[a-zA-Z]/g, ""); + + // Safely strip non-printable ASCII control characters (0x00-0x1F, except 0x09 \t, 0x0A \n, 0x0D \r). + // DO NOT strip characters > 0x7F, as they are part of valid UTF-8 multibyte characters (like emojis 🧹). + fullLog = fullLog.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, ""); + + // Truncate to 10000 characters to prevent message size limits + if (fullLog.length > 10000) { + fullLog = "...[truncated]...\n" + fullLog.slice(-10000); + } + console_log = fullLog; + } + } catch (err: any) { + console_log = `Error fetching raw console log: ${err.message}`; + } + } + + return { + job_id: job.id, + job_name: job.name, + status: job.status, + conclusion: job.conclusion, + console_log, // appended to the response + steps: (job.steps || []).map((step: any) => ({ + name: step.name, + status: step.status, + conclusion: step.conclusion, + })), + }; + }), + ); return { content: [{ type: "text", text: JSON.stringify(logs, null, 2) }], diff --git a/packages/kabelfachmann-mcp/src/index.ts b/packages/kabelfachmann-mcp/src/index.ts index b5d1851..d1de808 100644 --- a/packages/kabelfachmann-mcp/src/index.ts +++ b/packages/kabelfachmann-mcp/src/index.ts @@ -3,7 +3,7 @@ import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; import express from "express"; import { z } from "zod"; import { QdrantMemoryService } from "./qdrant.js"; -import { askOpenRouter } from "./llm.js"; +import { askKabelfachmannLLM } from "./llm.js"; async function main() { const server = new McpServer({ @@ -57,7 +57,7 @@ Hier ist der Kontext aus dem Handbuch: ${contextText}`; try { - const answer = await askOpenRouter(systemPrompt, args.query); + const answer = await askKabelfachmannLLM(systemPrompt, args.query); return { content: [{ type: "text", text: answer }], }; diff --git a/packages/kabelfachmann-mcp/src/ingest.ts b/packages/kabelfachmann-mcp/src/ingest.ts index e70dc1e..e8b6053 100644 --- a/packages/kabelfachmann-mcp/src/ingest.ts +++ b/packages/kabelfachmann-mcp/src/ingest.ts @@ -1,71 +1,118 @@ import fs from "fs"; +import fsPromises from "fs/promises"; import path from "path"; import pdf from "pdf-parse"; import { QdrantMemoryService } from "./qdrant.js"; -async function start() { - const qdrant = new QdrantMemoryService( - process.env.QDRANT_URL || "http://localhost:6333", +async function findPdfs(dir: string): Promise { + const entries = await fsPromises.readdir(dir, { withFileTypes: true }); + const files = await Promise.all( + entries.map((entry) => { + const res = path.resolve(dir, entry.name); + return entry.isDirectory() ? findPdfs(res) : res; + }), ); + return Array.prototype + .concat(...files) + .filter((file: string) => file.toLowerCase().endsWith(".pdf")); +} + +async function start() { + const qdrantUrl = process.env.QDRANT_URL || "http://localhost:6333"; + console.error(`Initializing Qdrant at ${qdrantUrl}...`); + const qdrant = new QdrantMemoryService(qdrantUrl); await qdrant.initialize(); - const pdfPath = path.join(process.cwd(), "data", "pdf", "kabelhandbuch.pdf"); - console.error(`Reading PDF from ${pdfPath}...`); + const dataDir = + process.env.PDF_DATA_DIR || path.join(process.cwd(), "data", "pdf"); + console.error(`Scanning for PDFs in ${dataDir}...`); - let dataBuffer; + let pdfPaths: string[] = []; try { - dataBuffer = fs.readFileSync(pdfPath); + pdfPaths = await findPdfs(dataDir); } catch (e) { - console.error( - "PDF file not found. Ensure it exists at data/pdf/kabelhandbuch.pdf", - ); + console.error(`Failed to read directory ${dataDir}. Error:`, e); process.exit(1); } - const data = await pdf(dataBuffer); - const text = data.text; - - // chunk text - // A simple chunking strategy by paragraph or chunks of ~1000 characters - const paragraphs = text - .split(/\n\s*\n/) - .map((p) => p.trim()) - .filter((p) => p.length > 50); - - let currentChunk = ""; - const chunks: string[] = []; - const MAX_CHUNK_LENGTH = 1500; - - for (const p of paragraphs) { - if (currentChunk.length + p.length > MAX_CHUNK_LENGTH) { - chunks.push(currentChunk); - currentChunk = p; - } else { - currentChunk += (currentChunk.length ? "\n\n" : "") + p; - } - } - if (currentChunk.length > 0) { - chunks.push(currentChunk); + if (pdfPaths.length === 0) { + console.error(`No PDFs found in ${dataDir}`); + process.exit(0); } - console.error( - `Split PDF into ${chunks.length} chunks. Ingesting to Qdrant...`, - ); + console.error(`Found ${pdfPaths.length} PDFs. Starting ingestion...`); - let successCount = 0; - for (let i = 0; i < chunks.length; i++) { - const chunk = chunks[i]; - const success = await qdrant.storeMemory(`Handbuch Teil ${i + 1}`, chunk); - if (success) { - successCount++; + let totalSuccess = 0; + let totalChunks = 0; + + for (const pdfPath of pdfPaths) { + console.error(`\nProcessing: ${pdfPath}`); + const filename = path.basename(pdfPath); + + let dataBuffer; + try { + dataBuffer = fs.readFileSync(pdfPath); + } catch (e) { + console.error(`Failed to read ${pdfPath}. Skipping...`); + continue; } - if ((i + 1) % 10 === 0) { - console.error(`Ingested ${i + 1}/${chunks.length} chunks...`); + + try { + const data = await pdf(dataBuffer); + const text = data.text; + + // chunk text + // A simple chunking strategy by paragraph or chunks of ~1000 characters + const paragraphs = text + .split(/\n\s*\n/) + .map((p) => p.trim()) + .filter((p) => p.length > 50); + + let currentChunk = ""; + const chunks: string[] = []; + const MAX_CHUNK_LENGTH = 1500; + + for (const p of paragraphs) { + if (currentChunk.length + p.length > MAX_CHUNK_LENGTH) { + chunks.push(currentChunk); + currentChunk = p; + } else { + currentChunk += (currentChunk.length ? "\n\n" : "") + p; + } + } + if (currentChunk.length > 0) { + chunks.push(currentChunk); + } + + console.error( + `Split ${filename} into ${chunks.length} chunks. Ingesting to Qdrant...`, + ); + + let fileSuccessCount = 0; + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + const success = await qdrant.storeMemory( + `${filename} - Teil ${i + 1}`, + chunk, + ); + if (success) { + fileSuccessCount++; + totalSuccess++; + } + if ((i + 1) % 10 === 0) { + console.error(`Ingested ${i + 1}/${chunks.length} chunks for ${filename}...`); + } + } + totalChunks += chunks.length; + + console.error(`Finished ${filename}: stored ${fileSuccessCount}/${chunks.length} chunks.`); + } catch (e) { + console.error(`Error processing ${pdfPath}:`, e); } } console.error( - `Ingestion complete! Successfully stored ${successCount}/${chunks.length} chunks.`, + `\nIngestion complete! Successfully stored ${totalSuccess}/${totalChunks} chunks across ${pdfPaths.length} files.`, ); process.exit(0); } diff --git a/packages/kabelfachmann-mcp/src/llm.ts b/packages/kabelfachmann-mcp/src/llm.ts index cfb4932..cd98dbd 100644 --- a/packages/kabelfachmann-mcp/src/llm.ts +++ b/packages/kabelfachmann-mcp/src/llm.ts @@ -1,6 +1,55 @@ import fetch from "node-fetch"; -export async function askOpenRouter( +export async function askKabelfachmannLLM( + systemPrompt: string, + userPrompt: string, +): Promise { + const provider = process.env.KABELFACHMANN_LLM_PROVIDER || "openrouter"; + + if (provider === "ollama") { + return askOllama(systemPrompt, userPrompt); + } else { + return askOpenRouter(systemPrompt, userPrompt); + } +} + +async function askOllama( + systemPrompt: string, + userPrompt: string, +): Promise { + const host = process.env.KABELFACHMANN_OLLAMA_HOST || "http://127.0.0.1:11434"; + const model = process.env.KABELFACHMANN_OLLAMA_MODEL || "qwen2.5:32b"; + + const response = await fetch(`${host}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + stream: false, + }), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `Ollama API error: ${response.status} ${response.statusText} - ${text}`, + ); + } + + const data = (await response.json()) as any; + if (!data.message || !data.message.content) { + throw new Error("Invalid response from Ollama API"); + } + return data.message.content; +} + +async function askOpenRouter( systemPrompt: string, userPrompt: string, ): Promise {