ignitionstack.pro v1.0 is out! Read the announcement →
Skip to Content

AI Chatbot GenAI – Architecture and Implementation Guide

Official consolidation of the AI Chatbot (LLM Orchestrator) feature based on the architecture artifacts and the BDD spec.

Overview

This feature is premium and requires extra API key setup plus Supabase Vector.

High-level Architecture

┌──────────────────────────────────────────────────────────────────┐ │ Frontend (React) │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ │ │ ChatInput │ │ MessageList │ │ ConversationSidebar │ │ │ └──────┬──────┘ └──────▲──────┘ └───────────┬─────────────┘ │ │ │ │ │ │ │ └────────────────┼─────────────────────┘ │ │ │ │ └──────────────────────────┼───────────────────────────────────────┘ │ Server Actions ┌──────────────────────────────────────────────────────────────────┐ │ Backend (Next.js) │ │ ┌─────────────────────────────────────────────────────────────┐ │ │ │ Chat Orchestrator │ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌────────────────────┐ │ │ │ │ │ Strategy │ │ Provider │ │ Conversation │ │ │ │ │ │ Router │ │ Factory │ │ Repository │ │ │ │ │ └──────┬──────┘ └──────┬──────┘ └─────────┬──────────┘ │ │ │ │ │ │ │ │ │ │ │ ▼ ▼ ▼ │ │ │ │ ┌─────────────────────────────────────────────────────┐ │ │ │ │ │ LLM Providers │ │ │ │ │ │ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ │ │ │ │ │ │ │ OpenAI │ │ Gemini │ │ Claude │ │ Ollama │ │ │ │ │ │ │ └────────┘ └────────┘ └────────┘ └────────┘ │ │ │ │ │ └─────────────────────────────────────────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────┘ ┌──────────────────────────────────────────────────────────────────┐ │ Supabase │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ │ │ conversations│ │ messages │ │ embeddings (pgvector)│ │ │ └──────────────┘ └──────────────┘ └──────────────────────┘ │ └──────────────────────────────────────────────────────────────────┘

File structure

src/app/ ├── [locale]/(pages)/chat/ │ ├── page.tsx # Main chat page │ ├── [conversationId]/page.tsx # Specific conversation │ └── layout.tsx # Layout with sidebar ├── components/chat/ │ ├── ChatInput.tsx # Streaming input │ ├── MessageList.tsx # Message list │ ├── MessageBubble.tsx # Individual bubble │ ├── ConversationList.tsx # Conversation sidebar │ └── ModelSelector.tsx # Model selector ├── lib/ai/ │ ├── providers/ │ │ ├── openai.ts # OpenAI adapter │ │ ├── gemini.ts # Gemini adapter │ │ ├── claude.ts # Claude adapter │ │ └── ollama.ts # Ollama adapter │ ├── orchestrator.ts # Chat orchestrator │ ├── strategy-router.ts # Smart routing │ ├── provider-factory.ts # Provider factory │ └── circuit-breaker.ts # Fallback + retry ├── server/chat/ │ ├── get-conversations.ts # List conversations │ ├── get-messages.ts # Messages for a conversation │ └── search-similar.ts # RAG search ├── actions/chat/ │ ├── send-message.ts # Send message │ ├── create-conversation.ts # New conversation │ └── delete-conversation.ts # Delete conversation └── types/chat.ts # Types

Design patterns

Strategy Router

Selects the ideal provider based on:

interface StrategyContext { userPreference?: string taskType?: 'code' | 'creative' | 'analysis' | 'general' maxTokens?: number requiresStreaming?: boolean } function selectProvider(context: StrategyContext): LLMProvider { // 1. Honor user preference if (context.userPreference && isAvailable(context.userPreference)) { return getProvider(context.userPreference) } // 2. Selecionar por tipo de tarefa switch (context.taskType) { case 'code': return getProvider('claude') // Best for code case 'creative': return getProvider('openai') // GPT-4 for creativity default: return getProvider('gemini') // Cost-effective } }

Provider Factory

Creates provider instances with a standard configuration:

class ProviderFactory { static create(type: ProviderType): LLMProvider { switch (type) { case 'openai': return new OpenAIProvider({ apiKey: process.env.OPENAI_API_KEY!, model: 'gpt-4-turbo-preview', }) case 'gemini': return new GeminiProvider({ apiKey: process.env.GOOGLE_AI_API_KEY!, model: 'gemini-pro', }) case 'claude': return new ClaudeProvider({ apiKey: process.env.ANTHROPIC_API_KEY!, model: 'claude-3-sonnet', }) case 'ollama': return new OllamaProvider({ baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434', model: 'llama2', }) } } }

Circuit Breaker

Prevents cascading failures and implements fallback:

class CircuitBreaker { private failures = 0 private lastFailure?: Date private state: 'closed' | 'open' | 'half-open' = 'closed' async execute<T>(fn: () => Promise<T>, fallback: () => Promise<T>): Promise<T> { if (this.state === 'open') { if (this.shouldRetry()) { this.state = 'half-open' } else { return fallback() } } try { const result = await fn() this.onSuccess() return result } catch (error) { this.onFailure() if (this.state === 'open') { return fallback() } throw error } } }

Environment variables

.env.local
# OpenAI OPENAI_API_KEY=sk-... # Google AI (Gemini) GOOGLE_AI_API_KEY=... # Anthropic (Claude) ANTHROPIC_API_KEY=sk-ant-... # Ollama (local) OLLAMA_BASE_URL=http://localhost:11434 # Default provider DEFAULT_AI_PROVIDER=openai # RAG ENABLE_RAG=true EMBEDDING_MODEL=text-embedding-3-small

Database schema

-- Conversations create table conversations ( id uuid primary key default gen_random_uuid(), user_id uuid references auth.users not null, title text, model text default 'gpt-4', created_at timestamptz default now(), updated_at timestamptz default now() ); -- Messages create table messages ( id uuid primary key default gen_random_uuid(), conversation_id uuid references conversations on delete cascade, role text not null check (role in ('user', 'assistant', 'system')), content text not null, tokens_used int, model text, created_at timestamptz default now() ); -- Embeddings for RAG create table embeddings ( id uuid primary key default gen_random_uuid(), content text not null, embedding vector(1536), metadata jsonb, created_at timestamptz default now() ); -- Vector search index create index on embeddings using ivfflat (embedding vector_cosine_ops); -- RLS alter table conversations enable row level security; alter table messages enable row level security; create policy "Users can manage own conversations" on conversations for all using (auth.uid() = user_id); create policy "Users can manage own messages" on messages for all using (conversation_id in ( select id from conversations where user_id = auth.uid() ));

Streaming response

Streaming implementation with Server-Sent Events:

// src/app/api/chat/stream/route.ts import { OpenAIStream, StreamingTextResponse } from 'ai' import OpenAI from 'openai' export async function POST(req: Request) { const { messages, model } = await req.json() const openai = new OpenAI() const response = await openai.chat.completions.create({ model: model || 'gpt-4-turbo-preview', messages, stream: true, }) const stream = OpenAIStream(response) return new StreamingTextResponse(stream) }

RAG (Retrieval-Augmented Generation)

Document indexing

async function indexDocument(content: string, metadata: Record<string, unknown>) { const openai = new OpenAI() // Generate embedding const embeddingResponse = await openai.embeddings.create({ model: 'text-embedding-3-small', input: content, }) const embedding = embeddingResponse.data[0].embedding // Save to Supabase const supabase = createAdminClient() await supabase.from('embeddings').insert({ content, embedding, metadata, }) }
async function searchSimilar(query: string, limit = 5) { const openai = new OpenAI() // Generate embedding da query const embeddingResponse = await openai.embeddings.create({ model: 'text-embedding-3-small', input: query, }) const queryEmbedding = embeddingResponse.data[0].embedding // Find similar entries const supabase = createAdminClient() const { data } = await supabase.rpc('match_embeddings', { query_embedding: queryEmbedding, match_threshold: 0.7, match_count: limit, }) return data }

Guardrails

Content Moderation

async function moderateContent(content: string): Promise<boolean> { const openai = new OpenAI() const moderation = await openai.moderations.create({ input: content, }) return !moderation.results[0].flagged }

Rate limiting

const rateLimiter = new RateLimiter({ tokensPerMinute: 100000, requestsPerMinute: 60, }) async function sendMessage(userId: string, content: string) { const allowed = await rateLimiter.check(userId) if (!allowed) { throw new Error('Rate limit exceeded') } // Process the message... }

Implementation checklist

Recursos