AI Chatbot GenAI – Architecture and Implementation Guide

Official consolidation of the AI Chatbot (LLM Orchestrator) feature based on the architecture artifacts and the BDD spec.

Overview

Goal: Deliver a ChatGPT-like experience inside ignitionstack.pro with smart routing across OpenAI, Google Gemini, self-hosted models (Ollama), native RAG, and enterprise guardrails.
Base architecture: Strategy Router + Provider Factory + Adapter Pattern + Circuit Breaker, following the Repository and ActionResult patterns.

This feature is premium and requires extra API key setup plus Supabase Vector.

High-level Architecture


┌──────────────────────────────────────────────────────────────────┐
│                        Frontend (React)                          │
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────┐  │
│  │  ChatInput  │  │ MessageList │  │  ConversationSidebar    │  │
│  └──────┬──────┘  └──────▲──────┘  └───────────┬─────────────┘  │
│         │                │                     │                 │
│         └────────────────┼─────────────────────┘                 │
│                          │                                       │
└──────────────────────────┼───────────────────────────────────────┘
                           │ Server Actions
                           ▼
┌──────────────────────────────────────────────────────────────────┐
│                      Backend (Next.js)                           │
│  ┌─────────────────────────────────────────────────────────────┐ │
│  │                    Chat Orchestrator                         │ │
│  │  ┌─────────────┐  ┌─────────────┐  ┌────────────────────┐  │ │
│  │  │ Strategy    │  │ Provider    │  │ Conversation       │  │ │
│  │  │ Router      │  │ Factory     │  │ Repository         │  │ │
│  │  └──────┬──────┘  └──────┬──────┘  └─────────┬──────────┘  │ │
│  │         │                │                   │              │ │
│  │         ▼                ▼                   ▼              │ │
│  │  ┌─────────────────────────────────────────────────────┐   │ │
│  │  │                  LLM Providers                       │   │ │
│  │  │  ┌────────┐  ┌────────┐  ┌────────┐  ┌────────┐    │   │ │
│  │  │  │ OpenAI │  │ Gemini │  │ Claude │  │ Ollama │    │   │ │
│  │  │  └────────┘  └────────┘  └────────┘  └────────┘    │   │ │
│  │  └─────────────────────────────────────────────────────┘   │ │
│  └─────────────────────────────────────────────────────────────┘ │
└──────────────────────────────────────────────────────────────────┘
                           │
                           ▼
┌──────────────────────────────────────────────────────────────────┐
│                     Supabase                                      │
│  ┌──────────────┐  ┌──────────────┐  ┌──────────────────────┐   │
│  │ conversations│  │   messages   │  │  embeddings (pgvector)│   │
│  └──────────────┘  └──────────────┘  └──────────────────────┘   │
└──────────────────────────────────────────────────────────────────┘

File structure


src/app/
├── [locale]/(pages)/chat/
│   ├── page.tsx                    # Main chat page
│   ├── [conversationId]/page.tsx   # Specific conversation
│   └── layout.tsx                  # Layout with sidebar
├── components/chat/
│   ├── ChatInput.tsx               # Streaming input
│   ├── MessageList.tsx             # Message list
│   ├── MessageBubble.tsx           # Individual bubble
│   ├── ConversationList.tsx        # Conversation sidebar
│   └── ModelSelector.tsx           # Model selector
├── lib/ai/
│   ├── providers/
│   │   ├── openai.ts               # OpenAI adapter
│   │   ├── gemini.ts               # Gemini adapter
│   │   ├── claude.ts               # Claude adapter
│   │   └── ollama.ts               # Ollama adapter
│   ├── orchestrator.ts             # Chat orchestrator
│   ├── strategy-router.ts          # Smart routing
│   ├── provider-factory.ts         # Provider factory
│   └── circuit-breaker.ts          # Fallback + retry
├── server/chat/
│   ├── get-conversations.ts        # List conversations
│   ├── get-messages.ts             # Messages for a conversation
│   └── search-similar.ts           # RAG search
├── actions/chat/
│   ├── send-message.ts             # Send message
│   ├── create-conversation.ts      # New conversation
│   └── delete-conversation.ts      # Delete conversation
└── types/chat.ts                   # Types

Design patterns

Strategy Router

Selects the ideal provider based on:

User preference
Service availability
Cost/performance
Task type (code, creative, analysis)


interface StrategyContext {
  userPreference?: string
  taskType?: 'code' | 'creative' | 'analysis' | 'general'
  maxTokens?: number
  requiresStreaming?: boolean
}
 
function selectProvider(context: StrategyContext): LLMProvider {
  // 1. Honor user preference
  if (context.userPreference && isAvailable(context.userPreference)) {
    return getProvider(context.userPreference)
  }
 
  // 2. Selecionar por tipo de tarefa
  switch (context.taskType) {
    case 'code':
      return getProvider('claude') // Best for code
    case 'creative':
      return getProvider('openai') // GPT-4 for creativity
    default:
      return getProvider('gemini') // Cost-effective
  }
}

Provider Factory

Creates provider instances with a standard configuration:


class ProviderFactory {
  static create(type: ProviderType): LLMProvider {
    switch (type) {
      case 'openai':
        return new OpenAIProvider({
          apiKey: process.env.OPENAI_API_KEY!,
          model: 'gpt-4-turbo-preview',
        })
      case 'gemini':
        return new GeminiProvider({
          apiKey: process.env.GOOGLE_AI_API_KEY!,
          model: 'gemini-pro',
        })
      case 'claude':
        return new ClaudeProvider({
          apiKey: process.env.ANTHROPIC_API_KEY!,
          model: 'claude-3-sonnet',
        })
      case 'ollama':
        return new OllamaProvider({
          baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434',
          model: 'llama2',
        })
    }
  }
}

Circuit Breaker

Prevents cascading failures and implements fallback:


class CircuitBreaker {
  private failures = 0
  private lastFailure?: Date
  private state: 'closed' | 'open' | 'half-open' = 'closed'
 
  async execute<T>(fn: () => Promise<T>, fallback: () => Promise<T>): Promise<T> {
    if (this.state === 'open') {
      if (this.shouldRetry()) {
        this.state = 'half-open'
      } else {
        return fallback()
      }
    }
 
    try {
      const result = await fn()
      this.onSuccess()
      return result
    } catch (error) {
      this.onFailure()
      if (this.state === 'open') {
        return fallback()
      }
      throw error
    }
  }
}

Environment variables

.env.local


# OpenAI
OPENAI_API_KEY=sk-...
 
# Google AI (Gemini)
GOOGLE_AI_API_KEY=...
 
# Anthropic (Claude)
ANTHROPIC_API_KEY=sk-ant-...
 
# Ollama (local)
OLLAMA_BASE_URL=http://localhost:11434
 
# Default provider
DEFAULT_AI_PROVIDER=openai
 
# RAG
ENABLE_RAG=true
EMBEDDING_MODEL=text-embedding-3-small

Database schema


-- Conversations
create table conversations (
  id uuid primary key default gen_random_uuid(),
  user_id uuid references auth.users not null,
  title text,
  model text default 'gpt-4',
  created_at timestamptz default now(),
  updated_at timestamptz default now()
);
 
-- Messages
create table messages (
  id uuid primary key default gen_random_uuid(),
  conversation_id uuid references conversations on delete cascade,
  role text not null check (role in ('user', 'assistant', 'system')),
  content text not null,
  tokens_used int,
  model text,
  created_at timestamptz default now()
);
 
-- Embeddings for RAG
create table embeddings (
  id uuid primary key default gen_random_uuid(),
  content text not null,
  embedding vector(1536),
  metadata jsonb,
  created_at timestamptz default now()
);
 
-- Vector search index
create index on embeddings using ivfflat (embedding vector_cosine_ops);
 
-- RLS
alter table conversations enable row level security;
alter table messages enable row level security;
 
create policy "Users can manage own conversations"
on conversations for all
using (auth.uid() = user_id);
 
create policy "Users can manage own messages"
on messages for all
using (conversation_id in (
  select id from conversations where user_id = auth.uid()
));

Streaming response

Streaming implementation with Server-Sent Events:


// src/app/api/chat/stream/route.ts
import { OpenAIStream, StreamingTextResponse } from 'ai'
import OpenAI from 'openai'
 
export async function POST(req: Request) {
  const { messages, model } = await req.json()
 
  const openai = new OpenAI()
 
  const response = await openai.chat.completions.create({
    model: model || 'gpt-4-turbo-preview',
    messages,
    stream: true,
  })
 
  const stream = OpenAIStream(response)
 
  return new StreamingTextResponse(stream)
}

RAG (Retrieval-Augmented Generation)

Document indexing


async function indexDocument(content: string, metadata: Record<string, unknown>) {
  const openai = new OpenAI()
 
  // Generate embedding
  const embeddingResponse = await openai.embeddings.create({
    model: 'text-embedding-3-small',
    input: content,
  })
 
  const embedding = embeddingResponse.data[0].embedding
 
  // Save to Supabase
  const supabase = createAdminClient()
  await supabase.from('embeddings').insert({
    content,
    embedding,
    metadata,
  })
}

Semantic search


async function searchSimilar(query: string, limit = 5) {
  const openai = new OpenAI()
 
  // Generate embedding da query
  const embeddingResponse = await openai.embeddings.create({
    model: 'text-embedding-3-small',
    input: query,
  })
 
  const queryEmbedding = embeddingResponse.data[0].embedding
 
  // Find similar entries
  const supabase = createAdminClient()
  const { data } = await supabase.rpc('match_embeddings', {
    query_embedding: queryEmbedding,
    match_threshold: 0.7,
    match_count: limit,
  })
 
  return data
}

Guardrails

Content Moderation


async function moderateContent(content: string): Promise<boolean> {
  const openai = new OpenAI()
 
  const moderation = await openai.moderations.create({
    input: content,
  })
 
  return !moderation.results[0].flagged
}

Rate limiting


const rateLimiter = new RateLimiter({
  tokensPerMinute: 100000,
  requestsPerMinute: 60,
})
 
async function sendMessage(userId: string, content: string) {
  const allowed = await rateLimiter.check(userId)
 
  if (!allowed) {
    throw new Error('Rate limit exceeded')
  }
 
  // Process the message...
}