diff --git a/apps/image-service/Dockerfile b/apps/image-service/Dockerfile index 54c109d..fd77686 100644 --- a/apps/image-service/Dockerfile +++ b/apps/image-service/Dockerfile @@ -6,39 +6,21 @@ RUN npm install -g pnpm@10.30.1 FROM base AS build WORKDIR /app COPY . . -# Note: Canvas needs build tools on Debian -RUN apt-get update && apt-get install -y python3 make g++ libcairo2-dev libpango1.0-dev libjpeg-dev libgif-dev librsvg2-dev -# Delete the prebuilt binary and force a clean rebuild from source for the correct container architecture -ENV npm_config_arch=arm64 -ENV npm_config_target_arch=arm64 +# We only need standard pnpm install now, no C++ tools needed for basic Sharp RUN pnpm install --frozen-lockfile -RUN for dir in $(find /app/node_modules -type d -path "*/@tensorflow/tfjs-node"); do \ - cd $dir && \ - rm -rf lib/napi-v8/* && \ - npm_config_build_from_source=true npm_config_arch=arm64 npm_config_target_arch=arm64 npm run install; \ - done -# Generate models explicitly for Docker -RUN ls -la packages/image-processor/scripts || true -RUN pnpm dlx tsx packages/image-processor/scripts/download-models.ts RUN pnpm --filter @mintel/image-processor build RUN pnpm --filter image-service build -# Generated locally for caching FROM base WORKDIR /app COPY --from=build /app/node_modules ./node_modules COPY --from=build /app/apps/image-service/node_modules ./apps/image-service/node_modules COPY --from=build /app/packages/image-processor/node_modules ./packages/image-processor/node_modules -# Make sure directories exist to prevent COPY errors -RUN mkdir -p /app/packages/image-processor/models /app/apps/image-service/dist +RUN mkdir -p /app/apps/image-service/dist COPY --from=build /app/apps/image-service/dist ./apps/image-service/dist COPY --from=build /app/apps/image-service/package.json ./apps/image-service/package.json COPY --from=build /app/packages/image-processor/dist ./packages/image-processor/dist COPY --from=build /app/packages/image-processor/package.json ./packages/image-processor/package.json -COPY --from=build /app/packages/image-processor/models ./packages/image-processor/models - -# Need runtime dependencies for canvas/sharp on Debian -RUN apt-get update && apt-get install -y libcairo2 libpango-1.0-0 libjpeg62-turbo libgif7 librsvg2-2 && rm -rf /var/lib/apt/lists/* EXPOSE 8080 WORKDIR /app/apps/image-service diff --git a/apps/image-service/src/index.ts b/apps/image-service/src/index.ts index f7d228c..4c7e69a 100644 --- a/apps/image-service/src/index.ts +++ b/apps/image-service/src/index.ts @@ -35,11 +35,9 @@ fastify.get("/process", async (request, reply) => { try { const response = await fetch(url); if (!response.ok) { - return reply - .status(response.status) - .send({ - error: `Failed to fetch source image: ${response.statusText}`, - }); + return reply.status(response.status).send({ + error: `Failed to fetch source image: ${response.statusText}`, + }); } const arrayBuffer = await response.arrayBuffer(); @@ -50,6 +48,7 @@ fastify.get("/process", async (request, reply) => { height, format, quality, + openRouterApiKey: process.env.OPENROUTER_API_KEY, }); reply.header("Content-Type", `image/${format}`); diff --git a/packages/image-processor/package.json b/packages/image-processor/package.json index cf78d3c..bff7eb0 100644 --- a/packages/image-processor/package.json +++ b/packages/image-processor/package.json @@ -18,9 +18,6 @@ "lint": "eslint src" }, "dependencies": { - "@tensorflow/tfjs-node": "^4.22.0", - "@vladmandic/face-api": "^1.7.13", - "canvas": "^2.11.2", "sharp": "^0.33.2" }, "devDependencies": { diff --git a/packages/image-processor/src/processor.ts b/packages/image-processor/src/processor.ts index 349d255..c0d6ab0 100644 --- a/packages/image-processor/src/processor.ts +++ b/packages/image-processor/src/processor.ts @@ -1,51 +1,102 @@ -import * as faceapi from "@vladmandic/face-api"; -// Provide Canvas fallback for face-api in Node.js -import { Canvas, Image, ImageData } from "canvas"; import sharp from "sharp"; -import * as path from "node:path"; -import { fileURLToPath } from "node:url"; - -// @ts-expect-error FaceAPI does not have type definitions for monkeyPatch -faceapi.env.monkeyPatch({ Canvas, Image, ImageData }); - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); - -// Path to the downloaded models -const MODELS_PATH = path.join(__dirname, "..", "models"); - -let isModelsLoaded = false; - -async function loadModels() { - if (isModelsLoaded) return; - await faceapi.nets.tinyFaceDetector.loadFromDisk(MODELS_PATH); - isModelsLoaded = true; -} export interface ProcessImageOptions { width: number; height: number; format?: "webp" | "jpeg" | "png" | "avif"; quality?: number; + openRouterApiKey?: string; +} + +interface FaceDetection { + x: number; + y: number; + width: number; + height: number; +} + +/** + * Detects faces using OpenRouter Vision API. + * Uses a small preview to save bandwidth and tokens. + */ +async function detectFacesWithCloud( + inputBuffer: Buffer, + apiKey: string, +): Promise { + try { + // Generate a small preview for vision API (max 512px) + const preview = await sharp(inputBuffer) + .resize(512, 512, { fit: "inside" }) + .jpeg({ quality: 60 }) + .toBuffer(); + + const base64Image = preview.toString("base64"); + + const response = await fetch( + "https://openrouter.ai/api/v1/chat/completions", + { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + "HTTP-Referer": "https://mintel.me", + "X-Title": "Mintel Image Service", + }, + body: JSON.stringify({ + model: "google/gemini-2.0-flash-001", // Fast, cheap, and supports vision + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: 'Detect all human faces in this image. Return ONLY a JSON array of bounding boxes like: [{"x": 0.1, "y": 0.2, "width": 0.05, "height": 0.05}]. Coordinates must be normalized (0 to 1). If no faces, return [].', + }, + { + type: "image_url", + image_url: { + url: `data:image/jpeg;base64,${base64Image}`, + }, + }, + ], + }, + ], + response_format: { type: "json_object" }, + }), + }, + ); + + if (!response.ok) { + throw new Error(`OpenRouter API error: ${response.statusText}`); + } + + const data = (await response.json()) as any; + const content = data.choices[0]?.message?.content; + + if (!content) return []; + + // The model might return directly or wrapped in a json field + const parsed = typeof content === "string" ? JSON.parse(content) : content; + const detections = (parsed.faces || parsed.detections || parsed) as any[]; + + if (!Array.isArray(detections)) return []; + + return detections.map((d) => ({ + x: d.x, + y: d.y, + width: d.width, + height: d.height, + })); + } catch (error) { + console.error("Cloud face detection failed:", error); + return []; + } } export async function processImageWithSmartCrop( inputBuffer: Buffer, options: ProcessImageOptions, ): Promise { - await loadModels(); - - // Load image via Canvas for face-api - const img = new Image(); - img.src = inputBuffer; - - // Detect faces - const detections = await faceapi.detectAllFaces( - // @ts-expect-error FaceAPI does not have type definitions for monkeyPatch - img, - new faceapi.TinyFaceDetectorOptions(), - ); - const sharpImage = sharp(inputBuffer); const metadata = await sharpImage.metadata(); @@ -53,35 +104,36 @@ export async function processImageWithSmartCrop( throw new Error("Could not read image metadata"); } + const detections = options.openRouterApiKey + ? await detectFacesWithCloud(inputBuffer, options.openRouterApiKey) + : []; + // If faces are found, calculate the bounding box containing all faces if (detections.length > 0) { + // Map normalized coordinates back to pixels + const pixelDetections = detections.map((d) => ({ + x: d.x * (metadata.width || 0), + y: d.y * (metadata.height || 0), + width: d.width * (metadata.width || 0), + height: d.height * (metadata.height || 0), + })); + let minX = metadata.width; let minY = metadata.height; let maxX = 0; let maxY = 0; - for (const det of detections) { - const { x, y, width, height } = det.box; - if (x < minX) minX = Math.max(0, x); - if (y < minY) minY = Math.max(0, y); - if (x + width > maxX) maxX = Math.min(metadata.width, x + width); - if (y + height > maxY) maxY = Math.min(metadata.height, y + height); + for (const det of pixelDetections) { + if (det.x < minX) minX = Math.max(0, det.x); + if (det.y < minY) minY = Math.max(0, det.y); + if (det.x + det.width > maxX) + maxX = Math.min(metadata.width, det.x + det.width); + if (det.y + det.height > maxY) + maxY = Math.min(metadata.height, det.y + det.height); } - const faceBoxWidth = maxX - minX; - const faceBoxHeight = maxY - minY; - - // Calculate center of the faces - const centerX = Math.floor(minX + faceBoxWidth / 2); - const centerY = Math.floor(minY + faceBoxHeight / 2); - - // Provide this as a focus point for sharp's extract or resize - // We can use sharp's resize with `position` focusing on crop options, - // or calculate an exact bounding box. However, extracting an exact bounding box - // and then resizing usually yields the best results when focusing on a specific coordinate. - - // A simpler approach is to crop a rectangle with the target aspect ratio - // centered on the faces, then resize. Let's calculate the crop box. + const centerX = Math.floor(minX + (maxX - minX) / 2); + const centerY = Math.floor(minY + (maxY - minY) / 2); const targetRatio = options.width / options.height; const currentRatio = metadata.width / metadata.height; @@ -90,18 +142,14 @@ export async function processImageWithSmartCrop( let cropHeight = metadata.height; if (currentRatio > targetRatio) { - // Image is wider than target, calculate new width cropWidth = Math.floor(metadata.height * targetRatio); } else { - // Image is taller than target, calculate new height cropHeight = Math.floor(metadata.width / targetRatio); } - // Try to center the crop box around the faces let cropX = Math.floor(centerX - cropWidth / 2); let cropY = Math.floor(centerY - cropHeight / 2); - // Keep crop box within image bounds if (cropX < 0) cropX = 0; if (cropY < 0) cropY = 0; if (cropX + cropWidth > metadata.width) cropX = metadata.width - cropWidth; @@ -116,9 +164,7 @@ export async function processImageWithSmartCrop( }); } - // Finally, resize to the requested dimensions and format let finalImage = sharpImage.resize(options.width, options.height, { - // If faces weren't found, default to entropy/attention based cropping as fallback fit: "cover", position: detections.length > 0 ? "center" : "attention", });