* 2. Include this script: * 3. Create charts with minimal configuration - colors are auto-applied! */ (function() { 'use strict'; // ========================================================================== // READ COLORS FROM CSS CUSTOM PROPERTIES // This ensures chart colors stay in sync with the theme // ========================================================================== /** * Get a CSS custom property value from :root */ function getCSSVar(name, fallback = '') { if (typeof getComputedStyle === 'undefined') return fallback; const value = getComputedStyle(document.documentElement).getPropertyValue(name).trim(); return value || fallback; } /** * Build palette from CSS custom properties (with fallbacks) */ function buildPaletteFromCSS() { return { // Primary brand colors dartmouthGreen: getCSSVar('--dartmouth-green', '#00693e'), textPrimary: getCSSVar('--text-primary', '#0a2518'), textSecondary: getCSSVar('--text-secondary', '#0a3d23'), // Chart colors (from CSS --chart-color-N variables) chartColors: [ getCSSVar('--chart-color-1', '#00693e'), getCSSVar('--chart-color-2', '#267aba'), getCSSVar('--chart-color-3', '#ffa00f'), getCSSVar('--chart-color-4', '#9d162e'), getCSSVar('--chart-color-5', '#8a6996'), getCSSVar('--chart-color-6', '#a5d75f'), getCSSVar('--chart-color-7', '#003c73'), getCSSVar('--chart-color-8', '#d94415'), getCSSVar('--chart-color-9', '#643c20'), getCSSVar('--chart-color-10', '#c4dd88'), getCSSVar('--chart-color-11', '#f5dc69'), getCSSVar('--chart-color-12', '#424141'), ], // Background colors (semi-transparent versions) chartBgColors: [ getCSSVar('--chart-bg-1', 'rgba(0, 105, 62, 0.5)'), getCSSVar('--chart-bg-2', 'rgba(38, 122, 186, 0.5)'), getCSSVar('--chart-bg-3', 'rgba(255, 160, 15, 0.5)'), getCSSVar('--chart-bg-4', 'rgba(157, 22, 46, 0.5)'), getCSSVar('--chart-bg-5', 'rgba(138, 105, 150, 0.5)'), getCSSVar('--chart-bg-6', 'rgba(165, 215, 95, 0.5)'), ], // Semantic colors positive: getCSSVar('--chart-positive', '#00693e'), negative: getCSSVar('--chart-negative', '#9d162e'), neutral: getCSSVar('--chart-neutral', '#424141'), highlight: getCSSVar('--chart-highlight', '#ffa00f'), // Grid and axis colors gridLight: getCSSVar('--chart-grid-light', 'rgba(0, 105, 62, 0.1)'), gridMedium: getCSSVar('--chart-grid-medium', 'rgba(0, 105, 62, 0.15)'), gridDark: getCSSVar('--chart-grid-dark', 'rgba(0, 105, 62, 0.2)'), axisColor: getCSSVar('--chart-axis-color', '#0a2518'), // Font fontFamily: getCSSVar('--chart-font-family', "'Avenir LT Std', 'Avenir', 'Avenir Next', -apple-system, BlinkMacSystemFont, sans-serif"), }; } // Initialize palette (will be populated when DOM is ready) let CDL_PALETTE = null; // For convenience, expose primary chart colors array let CHART_COLORS = null; // ========================================================================== // FONT CONFIGURATION // Responsive font sizes based on typical Marp slide dimensions (1280x720) // ========================================================================== const FONT_CONFIG = { sizes: { title: 22, // Chart title subtitle: 18, // Subtitle legend: 16, // Legend labels axisTitle: 18, // Axis titles axisTicks: 16, // Axis tick labels tooltip: 14, // Tooltip text dataLabels: 14, // Data labels on charts }, weight: { normal: 400, medium: 500, bold: 600, }, }; // ========================================================================== // HELPER FUNCTIONS // ========================================================================== /** * Ensure palette is initialized */ function ensurePalette() { if (!CDL_PALETTE) { CDL_PALETTE = buildPaletteFromCSS(); CHART_COLORS = CDL_PALETTE.chartColors; } return CDL_PALETTE; } /** * Get color for a dataset at given index * Cycles through palette if more datasets than colors */ function getColor(index) { ensurePalette(); return CHART_COLORS[index % CHART_COLORS.length]; } /** * Get color with alpha transparency */ function getColorWithAlpha(color, alpha) { // Handle hex colors if (color.startsWith('#')) { const r = parseInt(color.slice(1, 3), 16); const g = parseInt(color.slice(3, 5), 16); const b = parseInt(color.slice(5, 7), 16); return `rgba(${r}, ${g}, ${b}, ${alpha})`; } // Handle rgba colors if (color.startsWith('rgba')) { return color.replace(/[\d.]+\)$/, `${alpha})`); } return color; } /** * Generate colors for all datasets in chart data * Automatically assigns colors if not specified */ function autoAssignColors(data, chartType) { if (!data || !data.datasets) return data; data.datasets.forEach((dataset, index) => { const baseColor = getColor(index); // Only assign colors if not already specified switch (chartType) { case 'bar': case 'horizontalBar': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'line': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.1); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 3; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 6; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } if (dataset.tension === undefined) { dataset.tension = 0.3; } break; case 'scatter': case 'bubble': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 15; } if (dataset.pointHoverRadius === undefined) { dataset.pointHoverRadius = 18; } break; case 'pie': case 'doughnut': case 'polarArea': // For pie charts, we need multiple colors for one dataset if (!dataset.backgroundColor) { const numItems = dataset.data ? dataset.data.length : 6; dataset.backgroundColor = []; for (let i = 0; i < numItems; i++) { dataset.backgroundColor.push(getColor(i)); } } if (!dataset.borderColor) { dataset.borderColor = '#d8d8d8'; // Slide background } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'radar': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.2); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 4; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } break; default: // Generic color assignment if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } } }); return data; } // ========================================================================== // CHART.JS GLOBAL DEFAULTS // ========================================================================== function applyGlobalDefaults() { if (typeof Chart === 'undefined') { console.warn('Chart.js not loaded. chart-defaults.js requires Chart.js to be loaded first.'); return false; } // Ensure palette is loaded from CSS const palette = ensurePalette(); // Font defaults Chart.defaults.font.family = palette.fontFamily; Chart.defaults.font.size = FONT_CONFIG.sizes.axisTicks; Chart.defaults.color = palette.textPrimary; // Responsive defaults Chart.defaults.responsive = true; Chart.defaults.maintainAspectRatio = false; // Animation (subtle) Chart.defaults.animation.duration = 400; // Plugin defaults // Legend Chart.defaults.plugins.legend.labels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, weight: FONT_CONFIG.weight.normal, }; Chart.defaults.plugins.legend.labels.color = palette.textPrimary; Chart.defaults.plugins.legend.labels.usePointStyle = true; Chart.defaults.plugins.legend.labels.padding = 20; // Title Chart.defaults.plugins.title.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.title, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.title.color = palette.textPrimary; // Tooltip Chart.defaults.plugins.tooltip.backgroundColor = palette.textPrimary; Chart.defaults.plugins.tooltip.titleFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.tooltip.bodyFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, }; Chart.defaults.plugins.tooltip.cornerRadius = 4; Chart.defaults.plugins.tooltip.padding = 10; // Scale defaults (for cartesian charts) // These need to be applied per-scale type const scaleDefaults = { grid: { color: palette.gridLight, lineWidth: 1, }, border: { color: palette.gridDark, width: 1, }, ticks: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }, color: palette.textPrimary, }, title: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTitle, weight: FONT_CONFIG.weight.normal, }, color: palette.textPrimary, }, }; // Apply scale defaults to linear scale if (Chart.defaults.scales && Chart.defaults.scales.linear) { if (Chart.defaults.scales.linear.grid) Object.assign(Chart.defaults.scales.linear.grid, scaleDefaults.grid); if (Chart.defaults.scales.linear.border) Object.assign(Chart.defaults.scales.linear.border, scaleDefaults.border); if (Chart.defaults.scales.linear.ticks) Object.assign(Chart.defaults.scales.linear.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.linear.title) Object.assign(Chart.defaults.scales.linear.title, scaleDefaults.title); } // Apply scale defaults to category scale if (Chart.defaults.scales && Chart.defaults.scales.category) { if (Chart.defaults.scales.category.grid) Object.assign(Chart.defaults.scales.category.grid, scaleDefaults.grid); if (Chart.defaults.scales.category.border) Object.assign(Chart.defaults.scales.category.border, scaleDefaults.border); if (Chart.defaults.scales.category.ticks) Object.assign(Chart.defaults.scales.category.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.category.title) Object.assign(Chart.defaults.scales.category.title, scaleDefaults.title); } // Apply scale defaults to logarithmic scale if (Chart.defaults.scales && Chart.defaults.scales.logarithmic) { if (Chart.defaults.scales.logarithmic.grid) Object.assign(Chart.defaults.scales.logarithmic.grid, scaleDefaults.grid); if (Chart.defaults.scales.logarithmic.border) Object.assign(Chart.defaults.scales.logarithmic.border, scaleDefaults.border); if (Chart.defaults.scales.logarithmic.ticks) Object.assign(Chart.defaults.scales.logarithmic.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.logarithmic.title) Object.assign(Chart.defaults.scales.logarithmic.title, scaleDefaults.title); } // Apply scale defaults to radial scale (for radar charts) if (Chart.defaults.scales && Chart.defaults.scales.radialLinear) { if (Chart.defaults.scales.radialLinear.grid) Chart.defaults.scales.radialLinear.grid.color = palette.gridLight; if (Chart.defaults.scales.radialLinear.angleLines) Chart.defaults.scales.radialLinear.angleLines.color = palette.gridMedium; if (Chart.defaults.scales.radialLinear.pointLabels) { Chart.defaults.scales.radialLinear.pointLabels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }; Chart.defaults.scales.radialLinear.pointLabels.color = palette.textPrimary; } } return true; } // ========================================================================== // CHART WRAPPER FOR AUTO-STYLING // ========================================================================== /** * Wrap the Chart constructor to automatically apply CDL styling */ function wrapChartConstructor() { if (typeof Chart === 'undefined') return; const OriginalChart = Chart; // Create a wrapper that auto-applies colors window.Chart = function(ctx, config) { // Auto-assign colors if not specified if (config && config.data) { config.data = autoAssignColors(config.data, config.type); } // Merge default options for specific chart types if (config && config.options) { config.options = applyChartTypeDefaults(config.type, config.options); } // Call original constructor return new OriginalChart(ctx, config); }; // Copy static properties and methods Object.setPrototypeOf(window.Chart, OriginalChart); Object.assign(window.Chart, OriginalChart); // Preserve the prototype chain window.Chart.prototype = OriginalChart.prototype; } /** * Apply chart-type specific defaults */ function applyChartTypeDefaults(chartType, userOptions) { const options = { ...userOptions }; switch (chartType) { case 'bar': case 'horizontalBar': // Bar chart defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; // Hide x-axis grid for cleaner look if (options.scales.x.grid === undefined) { options.scales.x.grid = { display: false }; } break; case 'line': // Line chart defaults if (!options.interaction) { options.interaction = { intersect: false, mode: 'index' }; } break; case 'pie': case 'doughnut': // Pie/doughnut defaults if (!options.plugins) options.plugins = {}; if (options.plugins.legend === undefined) { const palette = ensurePalette(); options.plugins.legend = { position: 'right', labels: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, }, color: palette.textPrimary, padding: 15, }, }; } break; case 'radar': // Radar chart defaults - keep as-is, scale defaults applied globally break; case 'scatter': case 'bubble': // Scatter/bubble defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; break; } return options; } // ========================================================================== // CONVENIENCE FUNCTIONS FOR USERS // Exposed on window.CDLChart for easy access // ========================================================================== window.CDLChart = { // Color palette access (getters to ensure lazy initialization) get colors() { return ensurePalette().chartColors; }, get palette() { return ensurePalette(); }, // Get specific color by index getColor: getColor, // Get color with transparency getColorWithAlpha: getColorWithAlpha, // Get array of colors for a specific count getColors: function(count) { ensurePalette(); const result = []; for (let i = 0; i < count; i++) { result.push(getColor(i)); } return result; }, // Font configuration fonts: FONT_CONFIG, // Quick chart creation helpers // These create minimal config that auto-applies all styling /** * Create a simple bar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ bar: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'bar', data: { labels: labels, datasets: [{ data: data }], }, options: { plugins: { legend: { display: false } }, ...options, }, }); }, /** * Create a simple line chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ line: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'line', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, fill: ds.fill !== undefined ? ds.fill : true, })), }, options: options, }); }, /** * Create a simple pie chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ pie: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'pie', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a simple scatter chart * @param {string} canvasId - Canvas element ID * @param {Array} datasets - Array of {label, data: [{x, y}]} objects * @param {object} options - Optional overrides */ scatter: function(canvasId, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'scatter', data: { datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, /** * Create a doughnut chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ doughnut: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'doughnut', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a radar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ radar: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'radar', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, }; // ========================================================================== // INITIALIZATION // ========================================================================== function initialize() { // Wait for Chart.js to be available if (typeof Chart !== 'undefined') { applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully.'); return true; } else { // Chart.js not yet loaded - wait and retry let retries = 0; const maxRetries = 50; // 5 seconds max wait const checkInterval = setInterval(function() { retries++; if (typeof Chart !== 'undefined') { clearInterval(checkInterval); applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully (after waiting for Chart.js).'); } else if (retries >= maxRetries) { clearInterval(checkInterval); console.warn('Chart.js not found after waiting. CDL Chart defaults not applied.'); } }, 100); return false; } } // Initialize IMMEDIATELY - this must run BEFORE any chart creation scripts // Chart.js CDN should be loaded before this script initialize(); })();
PSYC 51.07: Models of Language and Communication

Lecture 24: Retrieval Augmented Generation

Grounding LLMs in External Knowledge 🔍

PSYC 51.07: Models of Language and Communication

Week 9

Week 9
PSYC 51.07: Models of Language and Communication

Today's Journey 🗺️

What we'll cover
  1. The Problem: Why parametric memory isn't enough
  2. RAG Basics: Retrieve, Augment, Generate
  3. Implementation: Building RAG systems step-by-step
  4. Advanced Techniques: Self-RAG, Corrective RAG, HyDE
  5. Production Challenges: Making RAG work in the real world
Week 9
PSYC 51.07: Models of Language and Communication

The Limits of Parametric Memory 🧠

What are the fundamental limitations of storing knowledge in model parameters?

Problems with purely parametric models:

  • ❌ Knowledge cutoff: No information after training date
  • ❌ Hallucinations: Models confidently generate false information
  • ❌ No source attribution: Can't cite where information comes from
  • ❌ Expensive updates: Retraining for new information costs millions
  • ❌ Privacy concerns: Sensitive data baked into parameters
  • ❌ Domain specificity: Limited knowledge of specialized domains
  • ❌ Outdated facts: World changes but model weights don't

Solution: Combine parametric knowledge with non-parametric retrieval! 🔍

Week 9
PSYC 51.07: Models of Language and Communication

Example: Knowledge Cutoff Problem 📅

User Query (Dec 2024)

"Who won the 2024 US Presidential election?"

Parametric-only LLM:

{❌} "I apologize, but my knowledge was last updated in April 2023, so I cannot tell you about the 2024 election results."

Or worse: Hallucinates an answer!

RAG-enhanced LLM:

{✅} "According to CNN (retrieved Nov 6, 2024), [actual winner] won the 2024 US Presidential election with [details]."

Provides: Fresh info + source!

Key Insight

Retrieval provides a dynamic, updatable knowledge base without retraining!

Week 9
PSYC 51.07: Models of Language and Communication

Retrieval Augmented Generation: Definition 📚

RAG (Lewis et al., 2020)

A technique that enhances LLMs by retrieving relevant documents from an external knowledge base and using them to inform generation.

Core Idea: Instead of relying only on learned parameters, the model can "look things up"!

Traditional LLM:

  • Question → Model → Answer
  • Only parametric knowledge
  • Fixed at training time
  • No sources

RAG Pipeline:

  • Question → Retrieve Docs
  • Docs + Question → Model
  • Grounded Answer + Citations
  • Updateable knowledge

Reference: Lewis et al. (2020) - "Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks"

Week 9
PSYC 51.07: Models of Language and Communication

RAG Architecture 🏗️

User Query Embed Query Vector Search Retrieve Docs Augment Prompt LLM Generate Response

Worked Example: "What causes the Northern Lights?"

Step Action Result
1. Embed Convert query to vector [0.12, -0.45, 0.78, ...] (384 dims)
2. Search Find similar vectors in DB Top-3 docs: scores 0.92, 0.87, 0.85
3. Retrieve Get actual text chunks "Aurora borealis occurs when..."
4. Augment Add context to prompt System + Context + Query
5. Generate LLM produces answer Grounded response with citations
Week 9
PSYC 51.07: Models of Language and Communication

RAG: Step-by-Step Walkthrough 🔍

Query: "What is the capital of Kazakhstan?"


1# Step 1: Embed the query
2query = "What is the capital of Kazakhstan?"
3query_embedding = embedding_model.encode(query)
4# Result: numpy array of shape (384,)
5
6# Step 2: Search vector database
7results = vector_db.search(query_embedding, top_k=3)
8# Returns: [
9#   {"text": "Astana is the capital of Kazakhstan...", "score": 0.94},
10#   {"text": "Kazakhstan's capital moved from Almaty...", "score": 0.89},
11#   {"text": "The city was renamed Nur-Sultan in 2019...", "score": 0.85}
12# ]
13
14# Step 3: Build augmented prompt
15context = "\n".join([r["text"] for r in results])
16prompt = f"""Answer based on the context below.
17Context: {context}
18Question: {query}
19Answer:"""
20
continued...
Week 9
PSYC 51.07: Models of Language and Communication

RAG: Step-by-Step Walkthrough 🔍


21# Step 4: Generate with LLM
22response = llm.generate(prompt)
23# "Astana (previously known as Nur-Sultan) is the capital of Kazakhstan."
...continued
Week 9
PSYC 51.07: Models of Language and Communication

RAG Components Deep Dive 🔬

1. Document Processing


1# Chunking example
2from langchain.text_splitter import RecursiveCharacterTextSplitter
3
4splitter = RecursiveCharacterTextSplitter(
5    chunk_size=500,      # Target size
6    chunk_overlap=50,    # Overlap between chunks
7    separators=["\n\n", "\n", ". ", " "]
8)
9
10chunks = splitter.split_text(long_document)
11# ["First chunk about topic A...",
12#  "Second chunk continues topic A...",
13#  "Third chunk about topic B..."]

2. Embedding & Storage


1from sentence_transformers import SentenceTransformer
2import chromadb
3
4# Create embeddings
5model = SentenceTransformer('all-MiniLM-L6-v2')
6embeddings = model.encode(chunks)
7
8# Store in vector database
9client = chromadb.Client()
10collection = client.create_collection("docs")
11collection.add(
12    embeddings=embeddings.tolist(),
13    documents=chunks,
14    ids=[f"chunk_{i}" for i in range(len(chunks))]
15)
Week 9
PSYC 51.07: Models of Language and Communication

RAG Implementation Example 💻

Basic RAG with LangChain:


1from langchain.vectorstores import Chroma
2from langchain.embeddings import HuggingFaceEmbeddings
3from langchain.llms import HuggingFacePipeline
4from langchain.chains import RetrievalQA
5from langchain.document_loaders import TextLoader
6from langchain.text_splitter import RecursiveCharacterTextSplitter
7
8# 1. Load and chunk documents
9loader = TextLoader('knowledge_base.txt')
10documents = loader.load()
11
12text_splitter = RecursiveCharacterTextSplitter(
13    chunk_size=512,
14    chunk_overlap=50
15)
16chunks = text_splitter.split_documents(documents)
17
18# 2. Create embeddings and vector store
19embeddings = HuggingFaceEmbeddings(
20    model_name="sentence-transformers/all-MiniLM-L6-v2"
continued...
Week 9
PSYC 51.07: Models of Language and Communication

RAG Implementation Example 💻


21)
22vectordb = Chroma.from_documents(
23    documents=chunks,
24    embedding=embeddings,
25    persist_directory="./chroma_db"
26)
27
28# 3. Set up retriever
29retriever = vectordb.as_retriever(
30    search_type="similarity",
31    search_kwargs={"k": 3}  # Retrieve top 3 chunks
32)
...continued
Week 9
PSYC 51.07: Models of Language and Communication

RAG Implementation (cont.) 💻


1# 4. Create LLM
2llm = HuggingFacePipeline.from_model_id(
3    model_id="meta-llama/Llama-2-7b-chat-hf",
4    task="text-generation",
5    model_kwargs={"temperature": 0.7, "max_length": 512}
6)
7
8# 5. Create RAG chain
9qa_chain = RetrievalQA.from_chain_type(
10    llm=llm,
11    retriever=retriever,
12    return_source_documents=True,
13    chain_type="stuff"  # How to combine documents
14)
15
16# 6. Query the system
17query = "What is retrieval augmented generation?"
18result = qa_chain({"query": query})
19
20print("Answer:", result['result'])
continued...
Week 9
PSYC 51.07: Models of Language and Communication

RAG Implementation (cont.) 💻


21print("\nSources:")
22for doc in result['source_documents']:
23    print(f"- {doc.metadata['source']}: {doc.page_content[:100]}...")
...continued

Tutorial: HuggingFace Advanced RAG - https://huggingface.co/learn/cookbook/advanced_rag

Week 9
PSYC 51.07: Models of Language and Communication

Evolution of RAG Approaches 📈

Naive RAG Self-RAG Corrective RAG Agentic RAG
Approach Key Innovation When to Retrieve
Naive RAG Always retrieve Every query
Self-RAG Model decides Only when needed
Corrective RAG Verify relevance Always, but filter
Agentic RAG Multi-step reasoning Tool-based decisions
Trend

Moving from always-retrieve to adaptive, self-correcting retrieval systems!

Week 9
PSYC 51.07: Models of Language and Communication

Self-RAG: Adaptive Retrieval 🎯

Key Innovation: Model decides when to retrieve

Special Tokens Learned:

  • [Retrieve] - Need external info?
  • [Relevant] - Is retrieved doc useful?
  • [Support] - Does doc support answer?
  • [Useful] - Is answer helpful?

Worked Example:


1Q: What's 2+2?
2[Retrieve: No]  # No retrieval needed
3A: 4
4
5Q: Who won the 2024 Olympics?
6[Retrieve: Yes]  # Need current info
7[Retrieved: "Paris 2024 Olympic Games..."]
8[Relevant: Yes]  # Doc is on topic
9A: The 2024 Olympics were held in Paris...
10[Support: Fully]  # Answer matches doc
11[Useful: Yes]  # Response is helpful
Key Insight

The model learns these tokens during training, enabling adaptive retrieval without manual rules!

Reference: Asai et al. (2023) - "Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection"

Week 9
PSYC 51.07: Models of Language and Communication

Corrective RAG (CRAG) 🔧

Problem: Sometimes retrieved documents are irrelevant or misleading!

Query Retrieve Evaluate Relevance Generate Filter & Refine Generate Web Search Generate

Worked Example:


1# Query: "Latest COVID vaccine recommendations"
2retrieved_docs = retriever.search(query)  # Returns old 2021 docs
3
4# Evaluator scores relevance
5scores = evaluator.score(query, retrieved_docs)
6# [0.3, 0.4, 0.35]  # All low - docs are outdated!
7
8if max(scores) < 0.5:  # Threshold not met
9    # Fallback to web search for current info
10    fresh_docs = web_search(query)
11    # Now returns CDC guidelines from 2024
12
13response = generate(query, fresh_docs)
Key Idea

Don't blindly trust retrieval! Verify relevance and have fallback strategies.

Reference: Yan et al. (2024) - "Corrective Retrieval Augmented Generation"

Week 9
PSYC 51.07: Models of Language and Communication

Comparing RAG Approaches 📊

Approach When Retrieve Filtering Latency Best For
Naive RAG Always None Low Simple Q&A
Self-RAG Model decides Self-reflection Medium Adaptive needs
Corrective RAG Always + verify Relevance scoring High High precision
HyDE Via hypothesis Similarity Medium Complex queries
Agentic RAG Tool-based Multi-step Highest Complex workflows

Trade-offs Example:


1Simple FAQ bot → Naive RAG (fast, cheap)
2Medical diagnosis assistant → Corrective RAG (accuracy critical)
3Research assistant → Agentic RAG (multi-step reasoning needed)
Rule of Thumb

Start with Naive RAG. Add complexity only when you measure specific failures.

Week 9
PSYC 51.07: Models of Language and Communication

Chunking Strategies 📄

How you split documents dramatically affects retrieval quality!

Fixed-size (Simple)


1# Split every 500 chars
2chunks = [text[i:i+500]
3          for i in range(0, len(text), 500)]
4# Problem: "The mitochondria is the power-"
5# "house of the cell." <- split mid-sentence!

Recursive (Better)


1splitter = RecursiveCharacterTextSplitter(
2    separators=["\n\n", "\n", ". ", " "],
3    chunk_size=500
4)
5# Tries paragraph breaks first, then sentences

Semantic (Best Quality)


1# Find natural breakpoints using embeddings
2from langchain.text_splitter import SemanticChunker
3
4chunker = SemanticChunker(embeddings)
5# Groups sentences with similar meaning

Example Comparison:

Strategy Chunk Quality
Fixed "...power-" / "house..." Poor
Recursive "...powerhouse." Good
Semantic Full paragraph on topic Best
Week 9
PSYC 51.07: Models of Language and Communication

Embedding Models for Retrieval 🎯

Choosing the Right Embedding Model:

Model Dims Size Speed Quality
all-MiniLM-L6-v2 384 90MB Fast Good
BGE-large-en 1024 1.3GB Medium Excellent
OpenAI text-embedding-3-small 1536 API Fast Excellent

Code Example: Dense vs Hybrid Retrieval


1# Dense retrieval (semantic similarity)
2from sentence_transformers import SentenceTransformer
3model = SentenceTransformer('all-MiniLM-L6-v2')
4query_vec = model.encode("What causes headaches?")
5# Finds: "Migraines are often triggered by..." (semantically similar)
6
7# Sparse retrieval (keyword matching with BM25)
8from rank_bm25 import BM25Okapi
9bm25 = BM25Okapi(tokenized_corpus)
10scores = bm25.get_scores(query.split())
11# Finds: "Headaches can be caused by..." (exact keyword match)
12
13# Hybrid: Combine both for best results!
14final_score = 0.7 * dense_score + 0.3 * sparse_score
Week 9
PSYC 51.07: Models of Language and Communication

Vector Databases 🗄️

Purpose: Fast similarity search over millions of embeddings

Quick Start with ChromaDB:


1import chromadb
2
3# Create client and collection
4client = chromadb.Client()
5collection = client.create_collection("my_docs")
6
7# Add documents (auto-embeds!)
8collection.add(
9    documents=["Paris is in France",
10               "Berlin is in Germany"],
11    ids=["doc1", "doc2"]
12)
13
14# Query
15results = collection.query(
16    query_texts=["European capitals"],
17    n_results=2
18)
19# Returns both docs, ranked by relevance

Choosing a Vector DB:

Use Case Best Choice
Prototyping ChromaDB
Production Pinecone, Qdrant
Self-hosted FAISS, Milvus
Graph + Vector Weaviate

Key Features:

  • Query latency (<50ms for 1M docs)
  • Metadata filtering
  • Persistence & backups
  • Scalability
Week 9
PSYC 51.07: Models of Language and Communication

Prompt Engineering for RAG 📝

Template for Grounded Generation:


1RAG_PROMPT = """You are a helpful assistant. Answer the question based ONLY on
2the context provided below. If the answer is not in the context, say
3"I don't have that information."
4
5Context:
6{context}
7
8Question: {question}
9
10Instructions:
11- Use only information from the context above
12- Cite sources using [1], [2], etc.
13- Be concise and accurate
14
15Answer:"""
16
17# Example usage
18context = """[1] The Eiffel Tower was completed in 1889 for the World's Fair.
19[2] It stands 330 meters tall and was the world's tallest structure until 1930.
20[3] Gustave Eiffel's company designed and built the tower."""
continued...
Week 9
PSYC 51.07: Models of Language and Communication

Prompt Engineering for RAG 📝


21
22question = "When was the Eiffel Tower built?"
23
24response = llm.generate(RAG_PROMPT.format(context=context, question=question))
25# "The Eiffel Tower was completed in 1889 for the World's Fair [1]."
...continued
Key Elements
  1. Explicit grounding instruction, 2. Source citation format, 3. Fallback for missing info
Week 9
PSYC 51.07: Models of Language and Communication

Production Challenges 🏭

Performance Challenges:

  • 💰 Cost: Embedding generation + storage + inference
  • ⚡ Latency: Retrieval adds 50-200ms
  • 📏 Context limits: LLM window size
  • 🎯 Quality: Retrieval accuracy
  • 🔄 Freshness: Keeping index up-to-date

Solutions:

  • Cache embeddings
  • Semantic caching (similar queries)
  • Incremental indexing
  • Re-ranking pipelines
  • Efficient embedding models
  • Hybrid search
  • Streaming responses
Production Tip

Start simple (Naive RAG), measure performance, iterate based on real bottlenecks!

Week 9
PSYC 51.07: Models of Language and Communication

Evaluation Metrics for RAG 📊

How to measure RAG quality:

Retrieval Quality:

  • Recall@k: Are relevant docs in top-k?
  • MRR (Mean Reciprocal Rank): Where is first relevant doc?
  • NDCG (Normalized Discounted Cumulative Gain): Ranked quality

Generation Quality:

  • Factual accuracy: Are answers correct?
  • Faithfulness: Does answer match retrieved docs?
  • Relevance: Does answer address the question?
  • Citation quality: Are sources correctly attributed?

End-to-End:

  • Answer correctness: Human evaluation or LLM-as-judge
  • Groundedness: Verifiable in retrieved docs
  • User satisfaction: Thumbs up/down, A/B testing
Week 9
PSYC 51.07: Models of Language and Communication

Common RAG Failure Modes ⚠️

  1. Retrieval Failures
    • Wrong documents retrieved
  • Relevant docs not in knowledge base
  • Poor query formulation
  1. Context Problems
    • Too much irrelevant context
  • Context too long for LLM
  • Important info not in retrieved chunks
  1. Generation Issues
    • Ignores retrieved context
  • Hallucinates despite good context
  • Incorrect citations
  • Overly dependent on parametric knowledge
  1. System Issues
    • High latency
  • Embedding drift
  • Stale index

Mitigation: Monitoring, A/B testing, human-in-the-loop validation

Week 9
PSYC 51.07: Models of Language and Communication

Multimodal RAG 🖼️📄

Beyond text: Retrieving images, tables, code, etc.

  • Vision + Text

  • Use CLIP embeddings for images

  • Retrieve relevant diagrams, charts

  • Generate answers referencing visual content

    \item Code Retrieval

    • Embed code snippets
  • Retrieve relevant functions/examples

  • Code completion and debugging

    \item Structured Data

    • Tables, databases
  • Knowledge graphs

  • SQL generation from natural language

    \item Audio/Video

    • Transcribe and embed
  • Retrieve relevant segments

  • Timestamp-aware responses

Future RAG systems will seamlessly integrate multiple modalities!

Week 9
PSYC 51.07: Models of Language and Communication

Graph-Based RAG 🕸️

Combining knowledge graphs with RAG:

Traditional RAG: Flat document chunks

Graph RAG: Documents + relationships

Advantages:

  • Capture entity relationships
  • Multi-hop reasoning (A → B → C)
  • Better for complex queries
  • Explicit knowledge structure

Implementation:

  1. Extract entities and relations from documents
  2. Build knowledge graph
  3. Traverse graph during retrieval
  4. Combine graph paths with document chunks

Tools: Neo4j, Amazon Neptune, LangChain Graph QA

Week 9
PSYC 51.07: Models of Language and Communication

HyDE: Hypothetical Document Embeddings 💭

Clever trick: Generate a hypothetical answer first, then retrieve!


1# Standard RAG: Query -> Retrieve -> Generate
2query = "What causes the aurora borealis?"
3# Direct embedding may not match scientific docs well
4
5# HyDE: Query -> Generate Hypothesis -> Embed Hypothesis -> Retrieve -> Generate
6hypothesis = llm.generate(f"Write a short explanation: {query}")
7# "The aurora borealis occurs when charged particles from the sun
8#  interact with gases in Earth's atmosphere, causing them to glow."
9
10# Now embed the HYPOTHESIS (an answer-like text)
11hypo_embedding = embed(hypothesis)
12docs = vector_db.search(hypo_embedding)  # Better match to scientific docs!
13
14# Finally generate with real retrieved docs
15final_answer = llm.generate(query, context=docs)
Why It Works

Question: "aurora borealis causes" (query-like)
Hypothesis: "charged particles from sun interact with atmosphere" (document-like)

Answers are more similar to documents than questions are!

Reference: Gao et al. (2022) - "Precise Zero-Shot Dense Retrieval without Relevance Labels"

Week 9
PSYC 51.07: Models of Language and Communication

RAG vs Fine-Tuning 🤔

When should you use RAG vs fine-tuning your model?

Use RAG when:

  • ✅ Knowledge changes frequently
  • ✅ Need citations/provenance
  • ✅ Privacy concerns (data in DB, not weights)
  • ✅ Large knowledge base
  • ✅ Multi-domain applications
  • ✅ Want to update without retraining

Use Fine-Tuning when:

  • ✅ Need specific style/behavior
  • ✅ Low latency critical
  • ✅ Small, stable knowledge domain
  • ✅ Specialized reasoning
  • ✅ Domain-specific language
  • ✅ Want fully self-contained model
Best Practice

Often the answer is both: Fine-tune for style/domain, RAG for knowledge!

Week 9
PSYC 51.07: Models of Language and Communication

Future of RAG 🔮

Emerging trends and research directions:

  1. Agentic RAG
    • LLM decides retrieval strategy
  • Multi-step reasoning with retrieval
  • Tool use (web search, APIs, databases)
  1. Long-context RAG
    • Models with 1M+ token windows
  • Entire books as context
  • Retrieval still useful for efficiency
  1. Personalized RAG
    • User-specific knowledge bases
  • Privacy-preserving retrieval
  • Federated learning
  1. Real-time RAG
    • Live web scraping
  • Streaming document updates
  • Event-driven retrieval
Week 9
PSYC 51.07: Models of Language and Communication

Key Takeaways 🔑

  1. RAG solves fundamental LLM limitations
    • Knowledge cutoff, hallucination, no citations
  2. Core pipeline: Retrieve → Augment → Generate
    • Vector search for relevant documents
  • Incorporate into prompt
  1. Many variants exist
    • Naive RAG → Self-RAG → Corrective RAG → Agentic RAG
  2. Key components matter
    • Chunking strategy, embedding model, vector DB
  3. Production requires careful engineering
    • Latency, cost, quality evaluation
  4. RAG + Fine-tuning is powerful combo
    • Fine-tune for style, RAG for knowledge
Week 9
PSYC 51.07: Models of Language and Communication

Readings 📖

Required:

  1. Lewis et al. (2020): Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
    [arXiv]
  2. Asai et al. (2023): Self-RAG: Learning to Retrieve, Generate, and Critique
    [arXiv]

Recommended:

  • Yan et al. (2024): Corrective RAG [arXiv]
  • Gao et al. (2022): Precise Zero-Shot Dense Retrieval (HyDE) [arXiv]
  • HuggingFace RAG Tutorial [Tutorial]
  • LangChain RAG Docs [Docs]
Week 9
PSYC 51.07: Models of Language and Communication

Questions? 💬

Next: Mixture of Experts!

Week 9