add ai job queue

This commit is contained in:
Garret Patti
2026-04-13 12:29:09 -04:00
parent 8557c80c52
commit fea55594d0
18 changed files with 818 additions and 167 deletions

351
src/lib/ai-jobs.ts Normal file
View File

@@ -0,0 +1,351 @@
import crypto from 'crypto'
import { getDb } from './db'
import { getAiMaxRetries } from './app-settings'
import { tagSingleItem, generateItemDescription, extractItemText, translateItemText } from './ai-tagger'
export type AiJobType = 'tag' | 'describe' | 'extract' | 'translate'
export type AiJobStatus = 'queued' | 'running' | 'completed' | 'failed'
export interface AiJob {
id: string
itemKey: string
libraryId: string
jobType: AiJobType
status: AiJobStatus
error: string | null
attempt: number
maxRetries: number
createdAt: number
startedAt: number | null
completedAt: number | null
itemTitle: string | null
}
interface AiJobRow {
id: string
item_key: string
library_id: string
job_type: string
status: string
error: string | null
attempt: number
max_retries: number
created_at: number
started_at: number | null
completed_at: number | null
item_title: string | null
}
function rowToJob(row: AiJobRow): AiJob {
return {
id: row.id,
itemKey: row.item_key,
libraryId: row.library_id,
jobType: row.job_type as AiJobType,
status: row.status as AiJobStatus,
error: row.error,
attempt: row.attempt,
maxRetries: row.max_retries,
createdAt: row.created_at,
startedAt: row.started_at,
completedAt: row.completed_at,
itemTitle: row.item_title,
}
}
/**
* Look up the title of a media item for display purposes.
*/
function resolveItemTitle(itemKey: string): string | null {
const db = getDb()
const row = db
.prepare('SELECT title FROM media_items WHERE item_key = ?')
.get(itemKey) as { title: string | null } | undefined
return row?.title ?? null
}
// ─── Enqueue ─────────────────────────────────────────────────────────────────
/**
* Enqueue an AI job. Deduplicates: if a queued/running job with the same
* item_key + job_type already exists, returns its ID instead.
*/
export function enqueueJob(
itemKey: string,
jobType: AiJobType,
libraryId: string,
sourceLanguage?: string,
): string {
const db = getDb()
// Deduplication: check for existing queued/running job
const existing = db
.prepare(
`SELECT id FROM ai_jobs
WHERE item_key = ? AND job_type = ? AND status IN ('queued', 'running')`
)
.get(itemKey, jobType) as { id: string } | undefined
if (existing) return existing.id
const id = crypto.randomUUID()
const maxRetries = getAiMaxRetries()
const title = resolveItemTitle(itemKey)
// Store sourceLanguage in the error field temporarily for translate jobs
// (it's null at creation, so we repurpose it briefly — cleared when the job runs)
const metadata = jobType === 'translate' && sourceLanguage ? sourceLanguage : null
db.prepare(
`INSERT INTO ai_jobs (id, item_key, library_id, job_type, status, error, attempt, max_retries, created_at, item_title)
VALUES (?, ?, ?, ?, 'queued', ?, 0, ?, ?, ?)`
).run(id, itemKey, libraryId, jobType, metadata, maxRetries, Date.now(), title)
// Wake the processor
wakeProcessor()
return id
}
/**
* Enqueue jobs for all media items in a directory (for bulk operations).
* Returns the list of job IDs created.
*/
export function enqueueBulkJobs(
libraryId: string,
dirPath: string,
jobType: AiJobType,
itemTypeFilter?: string,
extFilter?: Set<string>,
): string[] {
const db = getDb()
const prefix = dirPath
? `${libraryId}:mixed_file:${encodeURIComponent(dirPath + '/')}`
: `${libraryId}:mixed_file:`
const items = db
.prepare('SELECT item_key, item_type, file_path FROM media_items WHERE item_key LIKE ? AND item_type = ?')
.all(`${prefix}%`, itemTypeFilter ?? 'mixed_file') as Array<{ item_key: string; item_type: string; file_path: string | null }>
const path = require('path')
const jobIds: string[] = []
for (const item of items) {
if (!item.file_path) continue
if (extFilter) {
const ext = path.extname(item.file_path).toLowerCase()
if (!extFilter.has(ext)) continue
}
const jobId = enqueueJob(item.item_key, jobType, libraryId)
jobIds.push(jobId)
}
return jobIds
}
// ─── Query ───────────────────────────────────────────────────────────────────
export function getJobQueue(): AiJob[] {
const db = getDb()
const rows = db
.prepare(
`SELECT * FROM ai_jobs
WHERE status IN ('running', 'queued')
ORDER BY
CASE status WHEN 'running' THEN 0 ELSE 1 END,
created_at ASC`
)
.all() as AiJobRow[]
return rows.map(rowToJob)
}
export function getJobHistory(limit = 50): AiJob[] {
const db = getDb()
const rows = db
.prepare(
`SELECT * FROM ai_jobs
WHERE status IN ('completed', 'failed')
ORDER BY completed_at DESC
LIMIT ?`
)
.all(limit) as AiJobRow[]
return rows.map(rowToJob)
}
export function getJob(jobId: string): AiJob | null {
const db = getDb()
const row = db
.prepare('SELECT * FROM ai_jobs WHERE id = ?')
.get(jobId) as AiJobRow | undefined
return row ? rowToJob(row) : null
}
// ─── Actions ─────────────────────────────────────────────────────────────────
export function retryJob(jobId: string): boolean {
const db = getDb()
const result = db
.prepare(
`UPDATE ai_jobs SET status = 'queued', error = NULL, attempt = 0, started_at = NULL, completed_at = NULL
WHERE id = ? AND status = 'failed'`
)
.run(jobId)
if (result.changes > 0) {
wakeProcessor()
return true
}
return false
}
export function cancelJob(jobId: string): boolean {
const db = getDb()
const result = db
.prepare("DELETE FROM ai_jobs WHERE id = ? AND status = 'queued'")
.run(jobId)
return result.changes > 0
}
export function cancelAllQueued(): number {
const db = getDb()
const result = db
.prepare("DELETE FROM ai_jobs WHERE status = 'queued'")
.run()
return result.changes
}
export function clearJobHistory(): number {
const db = getDb()
const result = db
.prepare("DELETE FROM ai_jobs WHERE status IN ('completed', 'failed')")
.run()
return result.changes
}
// ─── Processor ───────────────────────────────────────────────────────────────
let processorRunning = false
let processorWake: (() => void) | null = null
function wakeProcessor(): void {
if (processorWake) {
processorWake()
} else if (!processorRunning) {
runProcessor()
}
}
async function processNextJob(): Promise<boolean> {
const db = getDb()
const row = db
.prepare(
`SELECT * FROM ai_jobs
WHERE status = 'queued'
ORDER BY created_at ASC
LIMIT 1`
)
.get() as AiJobRow | undefined
if (!row) return false
const now = Date.now()
// Extract sourceLanguage for translate jobs (stored in error field at enqueue)
const sourceLanguage = row.job_type === 'translate' ? row.error : null
db.prepare(
"UPDATE ai_jobs SET status = 'running', started_at = ?, error = NULL WHERE id = ?"
).run(now, row.id)
try {
switch (row.job_type) {
case 'tag':
await tagSingleItem(row.item_key)
break
case 'describe':
await generateItemDescription(row.item_key)
break
case 'extract':
await extractItemText(row.item_key)
break
case 'translate':
await translateItemText(row.item_key, sourceLanguage || undefined)
break
}
db.prepare(
"UPDATE ai_jobs SET status = 'completed', completed_at = ? WHERE id = ?"
).run(Date.now(), row.id)
} catch (err) {
const errorMessage = err instanceof Error ? err.message : String(err)
const attempt = row.attempt + 1
if (attempt < row.max_retries) {
// Re-queue for retry
db.prepare(
"UPDATE ai_jobs SET status = 'queued', attempt = ?, error = ?, started_at = NULL WHERE id = ?"
).run(attempt, errorMessage, row.id)
} else {
// Final failure
db.prepare(
"UPDATE ai_jobs SET status = 'failed', attempt = ?, error = ?, completed_at = ? WHERE id = ?"
).run(attempt, errorMessage, Date.now(), row.id)
}
console.warn(
`[ai-jobs] Job ${row.id} (${row.job_type} for "${row.item_key}") failed (attempt ${attempt}/${row.max_retries}):`,
errorMessage
)
}
return true
}
async function runProcessor(): Promise<void> {
if (processorRunning) return
processorRunning = true
console.log('[ai-jobs] Processor started')
try {
while (true) {
const hadWork = await processNextJob()
if (!hadWork) {
// Wait for a wake signal or timeout after 60s (then check again for safety)
await new Promise<void>((resolve) => {
processorWake = resolve
setTimeout(() => {
processorWake = null
resolve()
}, 60_000)
})
processorWake = null
}
}
} catch (err) {
console.error('[ai-jobs] Processor crashed:', err)
} finally {
processorRunning = false
console.log('[ai-jobs] Processor stopped')
}
}
/**
* Initialize the job processor. Called on server startup.
* Resets any jobs stuck in 'running' state (from a previous crash) back to 'queued'.
*/
export function initJobProcessor(): void {
const db = getDb()
const result = db
.prepare("UPDATE ai_jobs SET status = 'queued', started_at = NULL WHERE status = 'running'")
.run()
if (result.changes > 0) {
console.log(`[ai-jobs] Reset ${result.changes} stuck running job(s) to queued`)
}
// Check if there are any queued jobs and start the processor
const pending = db
.prepare("SELECT COUNT(*) as count FROM ai_jobs WHERE status = 'queued'")
.get() as { count: number }
if (pending.count > 0) {
runProcessor()
}
}

View File

@@ -225,7 +225,7 @@ async function callVisionApi(
/**
* Run AI tagging for a single library. Called after the scanner finishes.
* Processes up to BATCH_LIMIT untagged items per invocation.
* Enqueues up to BATCH_LIMIT untagged items as jobs for the processor.
*/
export async function runAiTagging(library: Library, libraryRoot: string): Promise<void> {
const config = getEffectiveAiConfig(library.id)
@@ -234,14 +234,10 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
const activeCategoryIds = new Set(getActiveCategoryIdsForLibrary(library.id))
const allTags = getTags()
const allCategories = getCategories()
const tags = allTags.filter((t) => activeCategoryIds.has(t.categoryId))
const categories = allCategories.filter((c) => activeCategoryIds.has(c.id))
if (tags.length === 0) return
const validTagIds = new Set(tags.map((t) => t.id))
const db = getDb()
const untaggedItems = db
.prepare(
@@ -254,18 +250,13 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
if (untaggedItems.length === 0) return
console.log(`[ai-tagger] Processing ${untaggedItems.length} items in library "${library.name}"`)
// Import enqueueJob lazily to avoid circular dependency
const { enqueueJob } = await import('./ai-jobs')
let tagged = 0
let consecutiveFailures = 0
let enqueued = 0
const markTagged = db.prepare('UPDATE media_items SET ai_tagged_at = ? WHERE item_key = ?')
for (const item of untaggedItems) {
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
console.warn(`[ai-tagger] Aborting after ${MAX_CONSECUTIVE_FAILURES} consecutive failures`)
break
}
const resolvedMedia = resolveItemImage(libraryRoot, item)
if (!resolvedMedia) {
// No image or video available — mark as tagged so we don't retry every scan
@@ -273,48 +264,14 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
continue
}
try {
let base64Images: string[]
if (resolvedMedia.mediaType === 'video') {
const framePaths = await getVideoFramePaths(resolvedMedia.path, library.id, VIDEO_FRAME_PERCENTAGES)
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
} else {
const thumbnailPath = await getAiImagePath(resolvedMedia.path, library.id)
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
}
const { tags: currentItemTags } = getResolvedTagsForItem(item.item_key)
const aiFields = getAiFields(item.item_key)
const systemPrompt = buildTagPrompt(tags, categories, {
currentTags: currentItemTags,
mediaContext: resolvedMedia.mediaType,
aiDescription: aiFields.aiDescription,
extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
customInstruction: config.promptTagger || undefined,
})
const suggestedIds = await callVisionApi(config.endpoint, taggingModel, base64Images, systemPrompt)
// Filter to valid tags only
const validIds = suggestedIds.filter((id) => validTagIds.has(id))
for (const tagId of validIds) {
addTagToItem(item.item_key, tagId)
}
markTagged.run(Date.now(), item.item_key)
tagged++
consecutiveFailures = 0
} catch (err) {
consecutiveFailures++
console.warn(
`[ai-tagger] Failed to tag "${item.item_key}":`,
err instanceof Error ? err.message : err
)
}
enqueueJob(item.item_key, 'tag', library.id)
// Mark as tagged immediately so subsequent scans don't re-enqueue
markTagged.run(Date.now(), item.item_key)
enqueued++
}
if (tagged > 0) {
console.log(`[ai-tagger] Tagged ${tagged}/${untaggedItems.length} items in library "${library.name}"`)
if (enqueued > 0) {
console.log(`[ai-tagger] Enqueued ${enqueued} tagging jobs for library "${library.name}"`)
}
}

View File

@@ -202,3 +202,15 @@ export function getEffectiveAiConfig(libraryId: string): AiConfig {
promptTranslate: overrides.promptTranslate || global.promptTranslate,
}
}
// ─── AI Max Retries ──────────────────────────────────────────────────────────
export function getAiMaxRetries(): number {
const raw = getSetting('ai_max_retries')
const parsed = parseInt(raw ?? '3', 10)
return Number.isFinite(parsed) && parsed >= 0 ? parsed : 3
}
export function setAiMaxRetries(n: number): void {
setSetting('ai_max_retries', String(Math.max(0, Math.floor(n))))
}

View File

@@ -105,6 +105,7 @@ function initDb(db: Database.Database): void {
migrateMediaItemsAiTagged(db)
migrateMediaItemsAiFields(db)
migrateLibraryAiSettings(db)
migrateAiJobs(db)
seedAppSettings(db)
}
@@ -117,6 +118,7 @@ function seedAppSettings(db: Database.Database): void {
ai_endpoint: '',
ai_model: '',
preferred_language: 'English',
ai_max_retries: '3',
}
const insert = db.prepare(
'INSERT OR IGNORE INTO app_settings (key, value) VALUES (?, ?)'
@@ -298,3 +300,25 @@ function migrateLibrariesType(db: Database.Database): void {
`)
}
}
function migrateAiJobs(db: Database.Database): void {
db.exec(`
CREATE TABLE IF NOT EXISTS ai_jobs (
id TEXT PRIMARY KEY,
item_key TEXT NOT NULL,
library_id TEXT NOT NULL,
job_type TEXT NOT NULL CHECK(job_type IN ('tag','describe','extract','translate')),
status TEXT NOT NULL DEFAULT 'queued' CHECK(status IN ('queued','running','completed','failed')),
error TEXT,
attempt INTEGER NOT NULL DEFAULT 0,
max_retries INTEGER NOT NULL DEFAULT 3,
created_at INTEGER NOT NULL,
started_at INTEGER,
completed_at INTEGER,
item_title TEXT
);
CREATE INDEX IF NOT EXISTS ai_jobs_status ON ai_jobs(status);
CREATE INDEX IF NOT EXISTS ai_jobs_created_at ON ai_jobs(created_at);
`)
}