Train models on your domain data to create specialized agents with higher accuracy, consistent style, and deep understanding of your business.
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ Collect │───▶│ Prepare │───▶│ Train │───▶│ Deploy │
│ Data │ │ Dataset │ │ Model │ │ & Monitor │
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
│ │ │ │
▼ ▼ ▼ ▼
Conversations Format JSONL Upload & Run A/B Test
Support tickets Validate data Monitor loss Compare metrics
Domain docs Split train/val Adjust params Iterate// lib/ai/training/data-collector.ts
interface TrainingExample {
messages: {
role: 'system' | 'user' | 'assistant'
content: string
}[]
metadata?: {
source: string
quality: 'high' | 'medium' | 'low'
category: string
timestamp: Date
}
}
export async function collectTrainingData(): Promise<TrainingExample[]> {
const examples: TrainingExample[] = []
// 1. Collect from successful conversations
const conversations = await db.query(`
SELECT * FROM conversations
WHERE satisfaction_rating >= 4
AND resolved = true
AND created_at > NOW() - INTERVAL '6 months'
`)
for (const conv of conversations) {
examples.push({
messages: [
{ role: 'system', content: PROPERTY_SUPPORT_PROMPT },
...conv.messages.map(m => ({
role: m.sender === 'agent' ? 'assistant' : 'user',
content: m.content,
})),
],
metadata: {
source: 'conversations',
quality: conv.satisfaction_rating === 5 ? 'high' : 'medium',
category: conv.category,
timestamp: conv.created_at,
},
})
}
// 2. Collect from curated examples
const curatedExamples = await loadCuratedExamples()
examples.push(...curatedExamples)
// 3. Collect from domain documentation
const docExamples = await generateFromDocs()
examples.push(...docExamples)
return examples
}
// Quality filtering
export function filterHighQuality(
examples: TrainingExample[]
): TrainingExample[] {
return examples.filter(ex => {
// Check message length
const totalLength = ex.messages.reduce(
(sum, m) => sum + m.content.length, 0
)
if (totalLength < 100 || totalLength > 10000) return false
// Check for required elements
const hasSystem = ex.messages.some(m => m.role === 'system')
const hasUser = ex.messages.some(m => m.role === 'user')
const hasAssistant = ex.messages.some(m => m.role === 'assistant')
if (!hasSystem || !hasUser || !hasAssistant) return false
// Check quality rating
if (ex.metadata?.quality === 'low') return false
return true
})
}// lib/ai/training/dataset-preparer.ts
import { createWriteStream } from 'fs'
interface OpenAITrainingFormat {
messages: {
role: 'system' | 'user' | 'assistant'
content: string
}[]
}
export async function prepareDataset(
examples: TrainingExample[],
outputPath: string
): Promise<DatasetStats> {
// Shuffle examples
const shuffled = examples.sort(() => Math.random() - 0.5)
// Split 90/10 train/validation
const splitIndex = Math.floor(shuffled.length * 0.9)
const trainSet = shuffled.slice(0, splitIndex)
const valSet = shuffled.slice(splitIndex)
// Format for OpenAI
const formatExample = (ex: TrainingExample): OpenAITrainingFormat => ({
messages: ex.messages.map(m => ({
role: m.role,
content: m.content.trim(),
})),
})
// Write JSONL files
const trainStream = createWriteStream(`${outputPath}/train.jsonl`)
const valStream = createWriteStream(`${outputPath}/val.jsonl`)
for (const ex of trainSet) {
trainStream.write(JSON.stringify(formatExample(ex)) + '\n')
}
for (const ex of valSet) {
valStream.write(JSON.stringify(formatExample(ex)) + '\n')
}
trainStream.end()
valStream.end()
// Calculate statistics
return {
totalExamples: examples.length,
trainExamples: trainSet.length,
valExamples: valSet.length,
avgTokensPerExample: calculateAvgTokens(examples),
estimatedCost: estimateTrainingCost(examples),
}
}
// Validate dataset format
export async function validateDataset(
filePath: string
): Promise<ValidationResult> {
const errors: string[] = []
const warnings: string[] = []
let lineNumber = 0
const lines = await readLines(filePath)
for (const line of lines) {
lineNumber++
try {
const example = JSON.parse(line)
// Validate structure
if (!example.messages || !Array.isArray(example.messages)) {
errors.push(`Line ${lineNumber}: Missing messages array`)
continue
}
// Validate roles
const roles = example.messages.map(m => m.role)
if (!roles.includes('assistant')) {
errors.push(`Line ${lineNumber}: Missing assistant message`)
}
// Check for empty content
for (const msg of example.messages) {
if (!msg.content || msg.content.trim() === '') {
warnings.push(`Line ${lineNumber}: Empty message content`)
}
}
} catch (e) {
errors.push(`Line ${lineNumber}: Invalid JSON`)
}
}
return { valid: errors.length === 0, errors, warnings }
}// lib/ai/training/trainer.ts
import OpenAI from 'openai'
const openai = new OpenAI()
export async function startFineTuning(
trainingFile: string,
validationFile: string,
config: FineTuneConfig
): Promise<FineTuneJob> {
// 1. Upload training file
const trainFileResponse = await openai.files.create({
file: fs.createReadStream(trainingFile),
purpose: 'fine-tune',
})
// 2. Upload validation file
const valFileResponse = await openai.files.create({
file: fs.createReadStream(validationFile),
purpose: 'fine-tune',
})
// 3. Create fine-tuning job
const job = await openai.fineTuning.jobs.create({
training_file: trainFileResponse.id,
validation_file: valFileResponse.id,
model: config.baseModel, // e.g., 'gpt-4o-mini-2024-07-18'
hyperparameters: {
n_epochs: config.epochs || 3,
batch_size: config.batchSize || 'auto',
learning_rate_multiplier: config.learningRate || 'auto',
},
suffix: config.modelSuffix, // e.g., 'property-support-v1'
})
return {
id: job.id,
status: job.status,
model: job.model,
createdAt: new Date(job.created_at * 1000),
}
}
// Monitor training progress
export async function monitorTraining(
jobId: string
): Promise<TrainingStatus> {
const job = await openai.fineTuning.jobs.retrieve(jobId)
// Get training metrics
const events = await openai.fineTuning.jobs.listEvents(jobId)
const metrics = events.data
.filter(e => e.type === 'metrics')
.map(e => e.data)
return {
status: job.status,
fineTunedModel: job.fine_tuned_model,
trainedTokens: job.trained_tokens,
epochs: job.hyperparameters.n_epochs,
metrics: {
trainingLoss: metrics.map(m => m.train_loss),
validationLoss: metrics.map(m => m.valid_loss),
},
estimatedCompletion: job.estimated_finish
? new Date(job.estimated_finish * 1000)
: null,
}
}// lib/ai/training/evaluator.ts
import { generateText } from 'ai'
interface EvaluationResult {
accuracy: number
consistency: number
relevance: number
latency: number
cost: number
}
export async function evaluateModel(
modelId: string,
testCases: TestCase[]
): Promise<EvaluationResult> {
const results = await Promise.all(
testCases.map(async (testCase) => {
const startTime = Date.now()
const { text, usage } = await generateText({
model: modelId,
system: testCase.systemPrompt,
prompt: testCase.userMessage,
})
const latency = Date.now() - startTime
// Score the response
const scores = await scoreResponse(text, testCase.expectedOutput)
return {
...scores,
latency,
tokens: usage.totalTokens,
}
})
)
return {
accuracy: average(results.map(r => r.accuracy)),
consistency: average(results.map(r => r.consistency)),
relevance: average(results.map(r => r.relevance)),
latency: average(results.map(r => r.latency)),
cost: sum(results.map(r => r.tokens)) * COST_PER_TOKEN,
}
}
// A/B test fine-tuned vs base model
export async function abTest(
fineTunedModel: string,
baseModel: string,
testCases: TestCase[]
): Promise<ABTestResult> {
const [fineTunedResults, baseResults] = await Promise.all([
evaluateModel(fineTunedModel, testCases),
evaluateModel(baseModel, testCases),
])
return {
fineTuned: fineTunedResults,
base: baseResults,
improvement: {
accuracy: fineTunedResults.accuracy - baseResults.accuracy,
latency: baseResults.latency - fineTunedResults.latency,
costSavings: baseResults.cost - fineTunedResults.cost,
},
recommendation: fineTunedResults.accuracy > baseResults.accuracy
? 'Deploy fine-tuned model'
: 'Keep base model with improved prompts',
}
}Fine-tune for: Lease terminology, maintenance categories, local regulations, tenant communication style
{"messages": [
{"role": "system", "content": "You are a property management assistant..."},
{"role": "user", "content": "What's the process for bond refund?"},
{"role": "assistant", "content": "For bond refunds in NZ: 1) Complete final inspection..."}
]}Fine-tune for: Product categories, return policies, shipping terminology, customer tone
{"messages": [
{"role": "system", "content": "You are an e-commerce support specialist..."},
{"role": "user", "content": "My order shows delivered but I haven't received it"},
{"role": "assistant", "content": "I understand how frustrating this is! Let me help..."}
]}