* 2. Include this script: * 3. Create charts with minimal configuration - colors are auto-applied! */ (function() { 'use strict'; // ========================================================================== // READ COLORS FROM CSS CUSTOM PROPERTIES // This ensures chart colors stay in sync with the theme // ========================================================================== /** * Get a CSS custom property value from :root */ function getCSSVar(name, fallback = '') { if (typeof getComputedStyle === 'undefined') return fallback; const value = getComputedStyle(document.documentElement).getPropertyValue(name).trim(); return value || fallback; } /** * Build palette from CSS custom properties (with fallbacks) */ function buildPaletteFromCSS() { return { // Primary brand colors dartmouthGreen: getCSSVar('--dartmouth-green', '#00693e'), textPrimary: getCSSVar('--text-primary', '#0a2518'), textSecondary: getCSSVar('--text-secondary', '#0a3d23'), // Chart colors (from CSS --chart-color-N variables) chartColors: [ getCSSVar('--chart-color-1', '#00693e'), getCSSVar('--chart-color-2', '#267aba'), getCSSVar('--chart-color-3', '#ffa00f'), getCSSVar('--chart-color-4', '#9d162e'), getCSSVar('--chart-color-5', '#8a6996'), getCSSVar('--chart-color-6', '#a5d75f'), getCSSVar('--chart-color-7', '#003c73'), getCSSVar('--chart-color-8', '#d94415'), getCSSVar('--chart-color-9', '#643c20'), getCSSVar('--chart-color-10', '#c4dd88'), getCSSVar('--chart-color-11', '#f5dc69'), getCSSVar('--chart-color-12', '#424141'), ], // Background colors (semi-transparent versions) chartBgColors: [ getCSSVar('--chart-bg-1', 'rgba(0, 105, 62, 0.5)'), getCSSVar('--chart-bg-2', 'rgba(38, 122, 186, 0.5)'), getCSSVar('--chart-bg-3', 'rgba(255, 160, 15, 0.5)'), getCSSVar('--chart-bg-4', 'rgba(157, 22, 46, 0.5)'), getCSSVar('--chart-bg-5', 'rgba(138, 105, 150, 0.5)'), getCSSVar('--chart-bg-6', 'rgba(165, 215, 95, 0.5)'), ], // Semantic colors positive: getCSSVar('--chart-positive', '#00693e'), negative: getCSSVar('--chart-negative', '#9d162e'), neutral: getCSSVar('--chart-neutral', '#424141'), highlight: getCSSVar('--chart-highlight', '#ffa00f'), // Grid and axis colors gridLight: getCSSVar('--chart-grid-light', 'rgba(0, 105, 62, 0.1)'), gridMedium: getCSSVar('--chart-grid-medium', 'rgba(0, 105, 62, 0.15)'), gridDark: getCSSVar('--chart-grid-dark', 'rgba(0, 105, 62, 0.2)'), axisColor: getCSSVar('--chart-axis-color', '#0a2518'), // Font fontFamily: getCSSVar('--chart-font-family', "'Avenir LT Std', 'Avenir', 'Avenir Next', -apple-system, BlinkMacSystemFont, sans-serif"), }; } // Initialize palette (will be populated when DOM is ready) let CDL_PALETTE = null; // For convenience, expose primary chart colors array let CHART_COLORS = null; // ========================================================================== // FONT CONFIGURATION // Responsive font sizes based on typical Marp slide dimensions (1280x720) // ========================================================================== const FONT_CONFIG = { sizes: { title: 22, // Chart title subtitle: 18, // Subtitle legend: 16, // Legend labels axisTitle: 18, // Axis titles axisTicks: 16, // Axis tick labels tooltip: 14, // Tooltip text dataLabels: 14, // Data labels on charts }, weight: { normal: 400, medium: 500, bold: 600, }, }; // ========================================================================== // HELPER FUNCTIONS // ========================================================================== /** * Ensure palette is initialized */ function ensurePalette() { if (!CDL_PALETTE) { CDL_PALETTE = buildPaletteFromCSS(); CHART_COLORS = CDL_PALETTE.chartColors; } return CDL_PALETTE; } /** * Get color for a dataset at given index * Cycles through palette if more datasets than colors */ function getColor(index) { ensurePalette(); return CHART_COLORS[index % CHART_COLORS.length]; } /** * Get color with alpha transparency */ function getColorWithAlpha(color, alpha) { // Handle hex colors if (color.startsWith('#')) { const r = parseInt(color.slice(1, 3), 16); const g = parseInt(color.slice(3, 5), 16); const b = parseInt(color.slice(5, 7), 16); return `rgba(${r}, ${g}, ${b}, ${alpha})`; } // Handle rgba colors if (color.startsWith('rgba')) { return color.replace(/[\d.]+\)$/, `${alpha})`); } return color; } /** * Generate colors for all datasets in chart data * Automatically assigns colors if not specified */ function autoAssignColors(data, chartType) { if (!data || !data.datasets) return data; data.datasets.forEach((dataset, index) => { const baseColor = getColor(index); // Only assign colors if not already specified switch (chartType) { case 'bar': case 'horizontalBar': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'line': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.1); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 3; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 6; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } if (dataset.tension === undefined) { dataset.tension = 0.3; } break; case 'scatter': case 'bubble': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 15; } if (dataset.pointHoverRadius === undefined) { dataset.pointHoverRadius = 18; } break; case 'pie': case 'doughnut': case 'polarArea': // For pie charts, we need multiple colors for one dataset if (!dataset.backgroundColor) { const numItems = dataset.data ? dataset.data.length : 6; dataset.backgroundColor = []; for (let i = 0; i < numItems; i++) { dataset.backgroundColor.push(getColor(i)); } } if (!dataset.borderColor) { dataset.borderColor = '#d8d8d8'; // Slide background } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'radar': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.2); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 4; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } break; default: // Generic color assignment if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } } }); return data; } // ========================================================================== // CHART.JS GLOBAL DEFAULTS // ========================================================================== function applyGlobalDefaults() { if (typeof Chart === 'undefined') { console.warn('Chart.js not loaded. chart-defaults.js requires Chart.js to be loaded first.'); return false; } // Ensure palette is loaded from CSS const palette = ensurePalette(); // Font defaults Chart.defaults.font.family = palette.fontFamily; Chart.defaults.font.size = FONT_CONFIG.sizes.axisTicks; Chart.defaults.color = palette.textPrimary; // Responsive defaults Chart.defaults.responsive = true; Chart.defaults.maintainAspectRatio = false; // Animation (subtle) Chart.defaults.animation.duration = 400; // Plugin defaults // Legend Chart.defaults.plugins.legend.labels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, weight: FONT_CONFIG.weight.normal, }; Chart.defaults.plugins.legend.labels.color = palette.textPrimary; Chart.defaults.plugins.legend.labels.usePointStyle = true; Chart.defaults.plugins.legend.labels.padding = 20; // Title Chart.defaults.plugins.title.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.title, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.title.color = palette.textPrimary; // Tooltip Chart.defaults.plugins.tooltip.backgroundColor = palette.textPrimary; Chart.defaults.plugins.tooltip.titleFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.tooltip.bodyFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, }; Chart.defaults.plugins.tooltip.cornerRadius = 4; Chart.defaults.plugins.tooltip.padding = 10; // Scale defaults (for cartesian charts) // These need to be applied per-scale type const scaleDefaults = { grid: { color: palette.gridLight, lineWidth: 1, }, border: { color: palette.gridDark, width: 1, }, ticks: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }, color: palette.textPrimary, }, title: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTitle, weight: FONT_CONFIG.weight.normal, }, color: palette.textPrimary, }, }; // Apply scale defaults to linear scale if (Chart.defaults.scales && Chart.defaults.scales.linear) { if (Chart.defaults.scales.linear.grid) Object.assign(Chart.defaults.scales.linear.grid, scaleDefaults.grid); if (Chart.defaults.scales.linear.border) Object.assign(Chart.defaults.scales.linear.border, scaleDefaults.border); if (Chart.defaults.scales.linear.ticks) Object.assign(Chart.defaults.scales.linear.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.linear.title) Object.assign(Chart.defaults.scales.linear.title, scaleDefaults.title); } // Apply scale defaults to category scale if (Chart.defaults.scales && Chart.defaults.scales.category) { if (Chart.defaults.scales.category.grid) Object.assign(Chart.defaults.scales.category.grid, scaleDefaults.grid); if (Chart.defaults.scales.category.border) Object.assign(Chart.defaults.scales.category.border, scaleDefaults.border); if (Chart.defaults.scales.category.ticks) Object.assign(Chart.defaults.scales.category.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.category.title) Object.assign(Chart.defaults.scales.category.title, scaleDefaults.title); } // Apply scale defaults to logarithmic scale if (Chart.defaults.scales && Chart.defaults.scales.logarithmic) { if (Chart.defaults.scales.logarithmic.grid) Object.assign(Chart.defaults.scales.logarithmic.grid, scaleDefaults.grid); if (Chart.defaults.scales.logarithmic.border) Object.assign(Chart.defaults.scales.logarithmic.border, scaleDefaults.border); if (Chart.defaults.scales.logarithmic.ticks) Object.assign(Chart.defaults.scales.logarithmic.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.logarithmic.title) Object.assign(Chart.defaults.scales.logarithmic.title, scaleDefaults.title); } // Apply scale defaults to radial scale (for radar charts) if (Chart.defaults.scales && Chart.defaults.scales.radialLinear) { if (Chart.defaults.scales.radialLinear.grid) Chart.defaults.scales.radialLinear.grid.color = palette.gridLight; if (Chart.defaults.scales.radialLinear.angleLines) Chart.defaults.scales.radialLinear.angleLines.color = palette.gridMedium; if (Chart.defaults.scales.radialLinear.pointLabels) { Chart.defaults.scales.radialLinear.pointLabels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }; Chart.defaults.scales.radialLinear.pointLabels.color = palette.textPrimary; } } return true; } // ========================================================================== // CHART WRAPPER FOR AUTO-STYLING // ========================================================================== /** * Wrap the Chart constructor to automatically apply CDL styling */ function wrapChartConstructor() { if (typeof Chart === 'undefined') return; const OriginalChart = Chart; // Create a wrapper that auto-applies colors window.Chart = function(ctx, config) { // Auto-assign colors if not specified if (config && config.data) { config.data = autoAssignColors(config.data, config.type); } // Merge default options for specific chart types if (config && config.options) { config.options = applyChartTypeDefaults(config.type, config.options); } // Call original constructor return new OriginalChart(ctx, config); }; // Copy static properties and methods Object.setPrototypeOf(window.Chart, OriginalChart); Object.assign(window.Chart, OriginalChart); // Preserve the prototype chain window.Chart.prototype = OriginalChart.prototype; } /** * Apply chart-type specific defaults */ function applyChartTypeDefaults(chartType, userOptions) { const options = { ...userOptions }; switch (chartType) { case 'bar': case 'horizontalBar': // Bar chart defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; // Hide x-axis grid for cleaner look if (options.scales.x.grid === undefined) { options.scales.x.grid = { display: false }; } break; case 'line': // Line chart defaults if (!options.interaction) { options.interaction = { intersect: false, mode: 'index' }; } break; case 'pie': case 'doughnut': // Pie/doughnut defaults if (!options.plugins) options.plugins = {}; if (options.plugins.legend === undefined) { const palette = ensurePalette(); options.plugins.legend = { position: 'right', labels: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, }, color: palette.textPrimary, padding: 15, }, }; } break; case 'radar': // Radar chart defaults - keep as-is, scale defaults applied globally break; case 'scatter': case 'bubble': // Scatter/bubble defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; break; } return options; } // ========================================================================== // CONVENIENCE FUNCTIONS FOR USERS // Exposed on window.CDLChart for easy access // ========================================================================== window.CDLChart = { // Color palette access (getters to ensure lazy initialization) get colors() { return ensurePalette().chartColors; }, get palette() { return ensurePalette(); }, // Get specific color by index getColor: getColor, // Get color with transparency getColorWithAlpha: getColorWithAlpha, // Get array of colors for a specific count getColors: function(count) { ensurePalette(); const result = []; for (let i = 0; i < count; i++) { result.push(getColor(i)); } return result; }, // Font configuration fonts: FONT_CONFIG, // Quick chart creation helpers // These create minimal config that auto-applies all styling /** * Create a simple bar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ bar: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'bar', data: { labels: labels, datasets: [{ data: data }], }, options: { plugins: { legend: { display: false } }, ...options, }, }); }, /** * Create a simple line chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ line: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'line', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, fill: ds.fill !== undefined ? ds.fill : true, })), }, options: options, }); }, /** * Create a simple pie chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ pie: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'pie', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a simple scatter chart * @param {string} canvasId - Canvas element ID * @param {Array} datasets - Array of {label, data: [{x, y}]} objects * @param {object} options - Optional overrides */ scatter: function(canvasId, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'scatter', data: { datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, /** * Create a doughnut chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ doughnut: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'doughnut', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a radar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ radar: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'radar', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, }; // ========================================================================== // INITIALIZATION // ========================================================================== function initialize() { // Wait for Chart.js to be available if (typeof Chart !== 'undefined') { applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully.'); return true; } else { // Chart.js not yet loaded - wait and retry let retries = 0; const maxRetries = 50; // 5 seconds max wait const checkInterval = setInterval(function() { retries++; if (typeof Chart !== 'undefined') { clearInterval(checkInterval); applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully (after waiting for Chart.js).'); } else if (retries >= maxRetries) { clearInterval(checkInterval); console.warn('Chart.js not found after waiting. CDL Chart defaults not applied.'); } }, 100); return false; } } // Initialize IMMEDIATELY - this must run BEFORE any chart creation scripts // Chart.js CDN should be loaded before this script initialize(); })();
Query: "What is the capital of Kazakhstan?"
1# Step 1: Embed the query
2query = "What is the capital of Kazakhstan?"
3query_embedding = embedding_model.encode(query)
4# Result: numpy array of shape (384,)
5
6# Step 2: Search vector database
7results = vector_db.search(query_embedding, top_k=3)
8# Returns: [
9# {"text": "Astana is the capital of Kazakhstan...", "score": 0.94},
10# {"text": "Kazakhstan's capital moved from Almaty...", "score": 0.89},
11# {"text": "The city was renamed Nur-Sultan in 2019...", "score": 0.85}
12# ]
13
14# Step 3: Build augmented prompt
15context = "\n".join([r["text"] for r in results])
16prompt = f"""Answer based on the context below.
17Context: {context}
18Question: {query}
19Answer:"""
20
21# Step 4: Generate with LLM
22response = llm.generate(prompt)
23# "Astana (previously known as Nur-Sultan) is the capital of Kazakhstan."
1. Document Processing
1# Chunking example
2from langchain.text_splitter import RecursiveCharacterTextSplitter
3
4splitter = RecursiveCharacterTextSplitter(
5 chunk_size=500, # Target size
6 chunk_overlap=50, # Overlap between chunks
7 separators=["\n\n", "\n", ". ", " "]
8)
9
10chunks = splitter.split_text(long_document)
11# ["First chunk about topic A...",
12# "Second chunk continues topic A...",
13# "Third chunk about topic B..."]
2. Embedding & Storage
1from sentence_transformers import SentenceTransformer
2import chromadb
3
4# Create embeddings
5model = SentenceTransformer('all-MiniLM-L6-v2')
6embeddings = model.encode(chunks)
7
8# Store in vector database
9client = chromadb.Client()
10collection = client.create_collection("docs")
11collection.add(
12 embeddings=embeddings.tolist(),
13 documents=chunks,
14 ids=[f"chunk_{i}" for i in range(len(chunks))]
15)
Basic RAG with LangChain:
1from langchain.vectorstores import Chroma
2from langchain.embeddings import HuggingFaceEmbeddings
3from langchain.llms import HuggingFacePipeline
4from langchain.chains import RetrievalQA
5from langchain.document_loaders import TextLoader
6from langchain.text_splitter import RecursiveCharacterTextSplitter
7
8# 1. Load and chunk documents
9loader = TextLoader('knowledge_base.txt')
10documents = loader.load()
11
12text_splitter = RecursiveCharacterTextSplitter(
13 chunk_size=512,
14 chunk_overlap=50
15)
16chunks = text_splitter.split_documents(documents)
17
18# 2. Create embeddings and vector store
19embeddings = HuggingFaceEmbeddings(
20 model_name="sentence-transformers/all-MiniLM-L6-v2"
21)
22vectordb = Chroma.from_documents(
23 documents=chunks,
24 embedding=embeddings,
25 persist_directory="./chroma_db"
26)
27
28# 3. Set up retriever
29retriever = vectordb.as_retriever(
30 search_type="similarity",
31 search_kwargs={"k": 3} # Retrieve top 3 chunks
32)
1# 4. Create LLM
2llm = HuggingFacePipeline.from_model_id(
3 model_id="meta-llama/Llama-2-7b-chat-hf",
4 task="text-generation",
5 model_kwargs={"temperature": 0.7, "max_length": 512}
6)
7
8# 5. Create RAG chain
9qa_chain = RetrievalQA.from_chain_type(
10 llm=llm,
11 retriever=retriever,
12 return_source_documents=True,
13 chain_type="stuff" # How to combine documents
14)
15
16# 6. Query the system
17query = "What is retrieval augmented generation?"
18result = qa_chain({"query": query})
19
20print("Answer:", result['result'])
21print("\nSources:")
22for doc in result['source_documents']:
23 print(f"- {doc.metadata['source']}: {doc.page_content[:100]}...")
Tutorial: HuggingFace Advanced RAG - https://huggingface.co/learn/cookbook/advanced_rag
| Approach | Key Innovation | When to Retrieve |
|---|---|---|
| Naive RAG | Always retrieve | Every query |
| Self-RAG | Model decides | Only when needed |
| Corrective RAG | Verify relevance | Always, but filter |
| Agentic RAG | Multi-step reasoning | Tool-based decisions |
Moving from always-retrieve to adaptive, self-correcting retrieval systems!
Key Innovation: Model decides when to retrieve
Special Tokens Learned:
[Retrieve] - Need external info?[Relevant] - Is retrieved doc useful?[Support] - Does doc support answer?[Useful] - Is answer helpful?Worked Example:
1Q: What's 2+2?
2[Retrieve: No] # No retrieval needed
3A: 4
4
5Q: Who won the 2024 Olympics?
6[Retrieve: Yes] # Need current info
7[Retrieved: "Paris 2024 Olympic Games..."]
8[Relevant: Yes] # Doc is on topic
9A: The 2024 Olympics were held in Paris...
10[Support: Fully] # Answer matches doc
11[Useful: Yes] # Response is helpful
The model learns these tokens during training, enabling adaptive retrieval without manual rules!
Reference: Asai et al. (2023) - "Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection"
Problem: Sometimes retrieved documents are irrelevant or misleading!
Worked Example:
1# Query: "Latest COVID vaccine recommendations"
2retrieved_docs = retriever.search(query) # Returns old 2021 docs
3
4# Evaluator scores relevance
5scores = evaluator.score(query, retrieved_docs)
6# [0.3, 0.4, 0.35] # All low - docs are outdated!
7
8if max(scores) < 0.5: # Threshold not met
9 # Fallback to web search for current info
10 fresh_docs = web_search(query)
11 # Now returns CDC guidelines from 2024
12
13response = generate(query, fresh_docs)
Don't blindly trust retrieval! Verify relevance and have fallback strategies.
Reference: Yan et al. (2024) - "Corrective Retrieval Augmented Generation"
| Approach | When Retrieve | Filtering | Latency | Best For |
|---|---|---|---|---|
| Naive RAG | Always | None | Low | Simple Q&A |
| Self-RAG | Model decides | Self-reflection | Medium | Adaptive needs |
| Corrective RAG | Always + verify | Relevance scoring | High | High precision |
| HyDE | Via hypothesis | Similarity | Medium | Complex queries |
| Agentic RAG | Tool-based | Multi-step | Highest | Complex workflows |
Trade-offs Example:
1Simple FAQ bot → Naive RAG (fast, cheap)
2Medical diagnosis assistant → Corrective RAG (accuracy critical)
3Research assistant → Agentic RAG (multi-step reasoning needed)
Start with Naive RAG. Add complexity only when you measure specific failures.
How you split documents dramatically affects retrieval quality!
Fixed-size (Simple)
1# Split every 500 chars
2chunks = [text[i:i+500]
3 for i in range(0, len(text), 500)]
4# Problem: "The mitochondria is the power-"
5# "house of the cell." <- split mid-sentence!
Recursive (Better)
1splitter = RecursiveCharacterTextSplitter(
2 separators=["\n\n", "\n", ". ", " "],
3 chunk_size=500
4)
5# Tries paragraph breaks first, then sentences
Semantic (Best Quality)
1# Find natural breakpoints using embeddings
2from langchain.text_splitter import SemanticChunker
3
4chunker = SemanticChunker(embeddings)
5# Groups sentences with similar meaning
Example Comparison:
| Strategy | Chunk | Quality |
|---|---|---|
| Fixed | "...power-" / "house..." | Poor |
| Recursive | "...powerhouse." | Good |
| Semantic | Full paragraph on topic | Best |
Choosing the Right Embedding Model:
| Model | Dims | Size | Speed | Quality |
|---|---|---|---|---|
| all-MiniLM-L6-v2 | 384 | 90MB | Fast | Good |
| BGE-large-en | 1024 | 1.3GB | Medium | Excellent |
| OpenAI text-embedding-3-small | 1536 | API | Fast | Excellent |
Code Example: Dense vs Hybrid Retrieval
1# Dense retrieval (semantic similarity)
2from sentence_transformers import SentenceTransformer
3model = SentenceTransformer('all-MiniLM-L6-v2')
4query_vec = model.encode("What causes headaches?")
5# Finds: "Migraines are often triggered by..." (semantically similar)
6
7# Sparse retrieval (keyword matching with BM25)
8from rank_bm25 import BM25Okapi
9bm25 = BM25Okapi(tokenized_corpus)
10scores = bm25.get_scores(query.split())
11# Finds: "Headaches can be caused by..." (exact keyword match)
12
13# Hybrid: Combine both for best results!
14final_score = 0.7 * dense_score + 0.3 * sparse_score
Purpose: Fast similarity search over millions of embeddings
Quick Start with ChromaDB:
1import chromadb
2
3# Create client and collection
4client = chromadb.Client()
5collection = client.create_collection("my_docs")
6
7# Add documents (auto-embeds!)
8collection.add(
9 documents=["Paris is in France",
10 "Berlin is in Germany"],
11 ids=["doc1", "doc2"]
12)
13
14# Query
15results = collection.query(
16 query_texts=["European capitals"],
17 n_results=2
18)
19# Returns both docs, ranked by relevance
Choosing a Vector DB:
| Use Case | Best Choice |
|---|---|
| Prototyping | ChromaDB |
| Production | Pinecone, Qdrant |
| Self-hosted | FAISS, Milvus |
| Graph + Vector | Weaviate |
Key Features:
Template for Grounded Generation:
1RAG_PROMPT = """You are a helpful assistant. Answer the question based ONLY on
2the context provided below. If the answer is not in the context, say
3"I don't have that information."
4
5Context:
6{context}
7
8Question: {question}
9
10Instructions:
11- Use only information from the context above
12- Cite sources using [1], [2], etc.
13- Be concise and accurate
14
15Answer:"""
16
17# Example usage
18context = """[1] The Eiffel Tower was completed in 1889 for the World's Fair.
19[2] It stands 330 meters tall and was the world's tallest structure until 1930.
20[3] Gustave Eiffel's company designed and built the tower."""
21
22question = "When was the Eiffel Tower built?"
23
24response = llm.generate(RAG_PROMPT.format(context=context, question=question))
25# "The Eiffel Tower was completed in 1889 for the World's Fair [1]."
Performance Challenges:
Solutions:
Start simple (Naive RAG), measure performance, iterate based on real bottlenecks!
How to measure RAG quality:
Retrieval Quality:
Generation Quality:
End-to-End:
Mitigation: Monitoring, A/B testing, human-in-the-loop validation
Beyond text: Retrieving images, tables, code, etc.
Vision + Text
Use CLIP embeddings for images
Retrieve relevant diagrams, charts
Generate answers referencing visual content
\item Code Retrieval
Retrieve relevant functions/examples
Code completion and debugging
\item Structured Data
Knowledge graphs
SQL generation from natural language
\item Audio/Video
Retrieve relevant segments
Timestamp-aware responses
Future RAG systems will seamlessly integrate multiple modalities!
Combining knowledge graphs with RAG:
Traditional RAG: Flat document chunks
Graph RAG: Documents + relationships
Advantages:
Implementation:
Tools: Neo4j, Amazon Neptune, LangChain Graph QA
Clever trick: Generate a hypothetical answer first, then retrieve!
1# Standard RAG: Query -> Retrieve -> Generate
2query = "What causes the aurora borealis?"
3# Direct embedding may not match scientific docs well
4
5# HyDE: Query -> Generate Hypothesis -> Embed Hypothesis -> Retrieve -> Generate
6hypothesis = llm.generate(f"Write a short explanation: {query}")
7# "The aurora borealis occurs when charged particles from the sun
8# interact with gases in Earth's atmosphere, causing them to glow."
9
10# Now embed the HYPOTHESIS (an answer-like text)
11hypo_embedding = embed(hypothesis)
12docs = vector_db.search(hypo_embedding) # Better match to scientific docs!
13
14# Finally generate with real retrieved docs
15final_answer = llm.generate(query, context=docs)
Question: "aurora borealis causes" (query-like)
Hypothesis: "charged particles from sun interact with atmosphere" (document-like)
Answers are more similar to documents than questions are!
Reference: Gao et al. (2022) - "Precise Zero-Shot Dense Retrieval without Relevance Labels"
When should you use RAG vs fine-tuning your model?
Use RAG when:
Use Fine-Tuning when:
Often the answer is both: Fine-tune for style/domain, RAG for knowledge!
Emerging trends and research directions:
Required:
Recommended:
Questions?
Next: Mixture of Experts!