Official consolidation of the AI Chatbot (LLM Orchestrator) feature based on the architecture artifacts and the BDD spec.
This feature is premium and requires extra API key setup plus Supabase Vector.
┌──────────────────────────────────────────────────────────────────┐
│ Frontend (React) │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
│ │ ChatInput │ │ MessageList │ │ ConversationSidebar │ │
│ └──────┬──────┘ └──────▲──────┘ └───────────┬─────────────┘ │
│ │ │ │ │
│ └────────────────┼─────────────────────┘ │
│ │ │
└──────────────────────────┼───────────────────────────────────────┘
│ Server Actions
▼
┌──────────────────────────────────────────────────────────────────┐
│ Backend (Next.js) │
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ Chat Orchestrator │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌────────────────────┐ │ │
│ │ │ Strategy │ │ Provider │ │ Conversation │ │ │
│ │ │ Router │ │ Factory │ │ Repository │ │ │
│ │ └──────┬──────┘ └──────┬──────┘ └─────────┬──────────┘ │ │
│ │ │ │ │ │ │
│ │ ▼ ▼ ▼ │ │
│ │ ┌─────────────────────────────────────────────────────┐ │ │
│ │ │ LLM Providers │ │ │
│ │ │ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ │ │ │
│ │ │ │ OpenAI │ │ Gemini │ │ Claude │ │ Ollama │ │ │ │
│ │ │ └────────┘ └────────┘ └────────┘ └────────┘ │ │ │
│ │ └─────────────────────────────────────────────────────┘ │ │
│ └─────────────────────────────────────────────────────────────┘ │
└──────────────────────────────────────────────────────────────────┘
│
▼
┌──────────────────────────────────────────────────────────────────┐
│ Supabase │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │
│ │ conversations│ │ messages │ │ embeddings (pgvector)│ │
│ └──────────────┘ └──────────────┘ └──────────────────────┘ │
└──────────────────────────────────────────────────────────────────┘src/app/
├── [locale]/(pages)/chat/
│ ├── page.tsx # Main chat page
│ ├── [conversationId]/page.tsx # Specific conversation
│ └── layout.tsx # Layout with sidebar
├── components/chat/
│ ├── ChatInput.tsx # Streaming input
│ ├── MessageList.tsx # Message list
│ ├── MessageBubble.tsx # Individual bubble
│ ├── ConversationList.tsx # Conversation sidebar
│ └── ModelSelector.tsx # Model selector
├── lib/ai/
│ ├── providers/
│ │ ├── openai.ts # OpenAI adapter
│ │ ├── gemini.ts # Gemini adapter
│ │ ├── claude.ts # Claude adapter
│ │ └── ollama.ts # Ollama adapter
│ ├── orchestrator.ts # Chat orchestrator
│ ├── strategy-router.ts # Smart routing
│ ├── provider-factory.ts # Provider factory
│ └── circuit-breaker.ts # Fallback + retry
├── server/chat/
│ ├── get-conversations.ts # List conversations
│ ├── get-messages.ts # Messages for a conversation
│ └── search-similar.ts # RAG search
├── actions/chat/
│ ├── send-message.ts # Send message
│ ├── create-conversation.ts # New conversation
│ └── delete-conversation.ts # Delete conversation
└── types/chat.ts # TypesSelects the ideal provider based on:
interface StrategyContext {
userPreference?: string
taskType?: 'code' | 'creative' | 'analysis' | 'general'
maxTokens?: number
requiresStreaming?: boolean
}
function selectProvider(context: StrategyContext): LLMProvider {
// 1. Honor user preference
if (context.userPreference && isAvailable(context.userPreference)) {
return getProvider(context.userPreference)
}
// 2. Selecionar por tipo de tarefa
switch (context.taskType) {
case 'code':
return getProvider('claude') // Best for code
case 'creative':
return getProvider('openai') // GPT-4 for creativity
default:
return getProvider('gemini') // Cost-effective
}
}Creates provider instances with a standard configuration:
class ProviderFactory {
static create(type: ProviderType): LLMProvider {
switch (type) {
case 'openai':
return new OpenAIProvider({
apiKey: process.env.OPENAI_API_KEY!,
model: 'gpt-4-turbo-preview',
})
case 'gemini':
return new GeminiProvider({
apiKey: process.env.GOOGLE_AI_API_KEY!,
model: 'gemini-pro',
})
case 'claude':
return new ClaudeProvider({
apiKey: process.env.ANTHROPIC_API_KEY!,
model: 'claude-3-sonnet',
})
case 'ollama':
return new OllamaProvider({
baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434',
model: 'llama2',
})
}
}
}Prevents cascading failures and implements fallback:
class CircuitBreaker {
private failures = 0
private lastFailure?: Date
private state: 'closed' | 'open' | 'half-open' = 'closed'
async execute<T>(fn: () => Promise<T>, fallback: () => Promise<T>): Promise<T> {
if (this.state === 'open') {
if (this.shouldRetry()) {
this.state = 'half-open'
} else {
return fallback()
}
}
try {
const result = await fn()
this.onSuccess()
return result
} catch (error) {
this.onFailure()
if (this.state === 'open') {
return fallback()
}
throw error
}
}
}# OpenAI
OPENAI_API_KEY=sk-...
# Google AI (Gemini)
GOOGLE_AI_API_KEY=...
# Anthropic (Claude)
ANTHROPIC_API_KEY=sk-ant-...
# Ollama (local)
OLLAMA_BASE_URL=http://localhost:11434
# Default provider
DEFAULT_AI_PROVIDER=openai
# RAG
ENABLE_RAG=true
EMBEDDING_MODEL=text-embedding-3-small-- Conversations
create table conversations (
id uuid primary key default gen_random_uuid(),
user_id uuid references auth.users not null,
title text,
model text default 'gpt-4',
created_at timestamptz default now(),
updated_at timestamptz default now()
);
-- Messages
create table messages (
id uuid primary key default gen_random_uuid(),
conversation_id uuid references conversations on delete cascade,
role text not null check (role in ('user', 'assistant', 'system')),
content text not null,
tokens_used int,
model text,
created_at timestamptz default now()
);
-- Embeddings for RAG
create table embeddings (
id uuid primary key default gen_random_uuid(),
content text not null,
embedding vector(1536),
metadata jsonb,
created_at timestamptz default now()
);
-- Vector search index
create index on embeddings using ivfflat (embedding vector_cosine_ops);
-- RLS
alter table conversations enable row level security;
alter table messages enable row level security;
create policy "Users can manage own conversations"
on conversations for all
using (auth.uid() = user_id);
create policy "Users can manage own messages"
on messages for all
using (conversation_id in (
select id from conversations where user_id = auth.uid()
));Streaming implementation with Server-Sent Events:
// src/app/api/chat/stream/route.ts
import { OpenAIStream, StreamingTextResponse } from 'ai'
import OpenAI from 'openai'
export async function POST(req: Request) {
const { messages, model } = await req.json()
const openai = new OpenAI()
const response = await openai.chat.completions.create({
model: model || 'gpt-4-turbo-preview',
messages,
stream: true,
})
const stream = OpenAIStream(response)
return new StreamingTextResponse(stream)
}async function indexDocument(content: string, metadata: Record<string, unknown>) {
const openai = new OpenAI()
// Generate embedding
const embeddingResponse = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: content,
})
const embedding = embeddingResponse.data[0].embedding
// Save to Supabase
const supabase = createAdminClient()
await supabase.from('embeddings').insert({
content,
embedding,
metadata,
})
}async function searchSimilar(query: string, limit = 5) {
const openai = new OpenAI()
// Generate embedding da query
const embeddingResponse = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: query,
})
const queryEmbedding = embeddingResponse.data[0].embedding
// Find similar entries
const supabase = createAdminClient()
const { data } = await supabase.rpc('match_embeddings', {
query_embedding: queryEmbedding,
match_threshold: 0.7,
match_count: limit,
})
return data
}async function moderateContent(content: string): Promise<boolean> {
const openai = new OpenAI()
const moderation = await openai.moderations.create({
input: content,
})
return !moderation.results[0].flagged
}const rateLimiter = new RateLimiter({
tokensPerMinute: 100000,
requestsPerMinute: 60,
})
async function sendMessage(userId: string, content: string) {
const allowed = await rateLimiter.check(userId)
if (!allowed) {
throw new Error('Rate limit exceeded')
}
// Process the message...
}