* 2. Include this script: * 3. Create charts with minimal configuration - colors are auto-applied! */ (function() { 'use strict'; // ========================================================================== // READ COLORS FROM CSS CUSTOM PROPERTIES // This ensures chart colors stay in sync with the theme // ========================================================================== /** * Get a CSS custom property value from :root */ function getCSSVar(name, fallback = '') { if (typeof getComputedStyle === 'undefined') return fallback; const value = getComputedStyle(document.documentElement).getPropertyValue(name).trim(); return value || fallback; } /** * Build palette from CSS custom properties (with fallbacks) */ function buildPaletteFromCSS() { return { // Primary brand colors dartmouthGreen: getCSSVar('--dartmouth-green', '#00693e'), textPrimary: getCSSVar('--text-primary', '#0a2518'), textSecondary: getCSSVar('--text-secondary', '#0a3d23'), // Chart colors (from CSS --chart-color-N variables) chartColors: [ getCSSVar('--chart-color-1', '#00693e'), getCSSVar('--chart-color-2', '#267aba'), getCSSVar('--chart-color-3', '#ffa00f'), getCSSVar('--chart-color-4', '#9d162e'), getCSSVar('--chart-color-5', '#8a6996'), getCSSVar('--chart-color-6', '#a5d75f'), getCSSVar('--chart-color-7', '#003c73'), getCSSVar('--chart-color-8', '#d94415'), getCSSVar('--chart-color-9', '#643c20'), getCSSVar('--chart-color-10', '#c4dd88'), getCSSVar('--chart-color-11', '#f5dc69'), getCSSVar('--chart-color-12', '#424141'), ], // Background colors (semi-transparent versions) chartBgColors: [ getCSSVar('--chart-bg-1', 'rgba(0, 105, 62, 0.5)'), getCSSVar('--chart-bg-2', 'rgba(38, 122, 186, 0.5)'), getCSSVar('--chart-bg-3', 'rgba(255, 160, 15, 0.5)'), getCSSVar('--chart-bg-4', 'rgba(157, 22, 46, 0.5)'), getCSSVar('--chart-bg-5', 'rgba(138, 105, 150, 0.5)'), getCSSVar('--chart-bg-6', 'rgba(165, 215, 95, 0.5)'), ], // Semantic colors positive: getCSSVar('--chart-positive', '#00693e'), negative: getCSSVar('--chart-negative', '#9d162e'), neutral: getCSSVar('--chart-neutral', '#424141'), highlight: getCSSVar('--chart-highlight', '#ffa00f'), // Grid and axis colors gridLight: getCSSVar('--chart-grid-light', 'rgba(0, 105, 62, 0.1)'), gridMedium: getCSSVar('--chart-grid-medium', 'rgba(0, 105, 62, 0.15)'), gridDark: getCSSVar('--chart-grid-dark', 'rgba(0, 105, 62, 0.2)'), axisColor: getCSSVar('--chart-axis-color', '#0a2518'), // Font fontFamily: getCSSVar('--chart-font-family', "'Avenir LT Std', 'Avenir', 'Avenir Next', -apple-system, BlinkMacSystemFont, sans-serif"), }; } // Initialize palette (will be populated when DOM is ready) let CDL_PALETTE = null; // For convenience, expose primary chart colors array let CHART_COLORS = null; // ========================================================================== // FONT CONFIGURATION // Responsive font sizes based on typical Marp slide dimensions (1280x720) // ========================================================================== const FONT_CONFIG = { sizes: { title: 22, // Chart title subtitle: 18, // Subtitle legend: 16, // Legend labels axisTitle: 18, // Axis titles axisTicks: 16, // Axis tick labels tooltip: 14, // Tooltip text dataLabels: 14, // Data labels on charts }, weight: { normal: 400, medium: 500, bold: 600, }, }; // ========================================================================== // HELPER FUNCTIONS // ========================================================================== /** * Ensure palette is initialized */ function ensurePalette() { if (!CDL_PALETTE) { CDL_PALETTE = buildPaletteFromCSS(); CHART_COLORS = CDL_PALETTE.chartColors; } return CDL_PALETTE; } /** * Get color for a dataset at given index * Cycles through palette if more datasets than colors */ function getColor(index) { ensurePalette(); return CHART_COLORS[index % CHART_COLORS.length]; } /** * Get color with alpha transparency */ function getColorWithAlpha(color, alpha) { // Handle hex colors if (color.startsWith('#')) { const r = parseInt(color.slice(1, 3), 16); const g = parseInt(color.slice(3, 5), 16); const b = parseInt(color.slice(5, 7), 16); return `rgba(${r}, ${g}, ${b}, ${alpha})`; } // Handle rgba colors if (color.startsWith('rgba')) { return color.replace(/[\d.]+\)$/, `${alpha})`); } return color; } /** * Generate colors for all datasets in chart data * Automatically assigns colors if not specified */ function autoAssignColors(data, chartType) { if (!data || !data.datasets) return data; data.datasets.forEach((dataset, index) => { const baseColor = getColor(index); // Only assign colors if not already specified switch (chartType) { case 'bar': case 'horizontalBar': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'line': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.1); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 3; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 6; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } if (dataset.tension === undefined) { dataset.tension = 0.3; } break; case 'scatter': case 'bubble': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 15; } if (dataset.pointHoverRadius === undefined) { dataset.pointHoverRadius = 18; } break; case 'pie': case 'doughnut': case 'polarArea': // For pie charts, we need multiple colors for one dataset if (!dataset.backgroundColor) { const numItems = dataset.data ? dataset.data.length : 6; dataset.backgroundColor = []; for (let i = 0; i < numItems; i++) { dataset.backgroundColor.push(getColor(i)); } } if (!dataset.borderColor) { dataset.borderColor = '#d8d8d8'; // Slide background } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'radar': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.2); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 4; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } break; default: // Generic color assignment if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } } }); return data; } // ========================================================================== // CHART.JS GLOBAL DEFAULTS // ========================================================================== function applyGlobalDefaults() { if (typeof Chart === 'undefined') { console.warn('Chart.js not loaded. chart-defaults.js requires Chart.js to be loaded first.'); return false; } // Ensure palette is loaded from CSS const palette = ensurePalette(); // Font defaults Chart.defaults.font.family = palette.fontFamily; Chart.defaults.font.size = FONT_CONFIG.sizes.axisTicks; Chart.defaults.color = palette.textPrimary; // Responsive defaults Chart.defaults.responsive = true; Chart.defaults.maintainAspectRatio = false; // Animation (subtle) Chart.defaults.animation.duration = 400; // Plugin defaults // Legend Chart.defaults.plugins.legend.labels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, weight: FONT_CONFIG.weight.normal, }; Chart.defaults.plugins.legend.labels.color = palette.textPrimary; Chart.defaults.plugins.legend.labels.usePointStyle = true; Chart.defaults.plugins.legend.labels.padding = 20; // Title Chart.defaults.plugins.title.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.title, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.title.color = palette.textPrimary; // Tooltip Chart.defaults.plugins.tooltip.backgroundColor = palette.textPrimary; Chart.defaults.plugins.tooltip.titleFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.tooltip.bodyFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, }; Chart.defaults.plugins.tooltip.cornerRadius = 4; Chart.defaults.plugins.tooltip.padding = 10; // Scale defaults (for cartesian charts) // These need to be applied per-scale type const scaleDefaults = { grid: { color: palette.gridLight, lineWidth: 1, }, border: { color: palette.gridDark, width: 1, }, ticks: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }, color: palette.textPrimary, }, title: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTitle, weight: FONT_CONFIG.weight.normal, }, color: palette.textPrimary, }, }; // Apply scale defaults to linear scale if (Chart.defaults.scales && Chart.defaults.scales.linear) { if (Chart.defaults.scales.linear.grid) Object.assign(Chart.defaults.scales.linear.grid, scaleDefaults.grid); if (Chart.defaults.scales.linear.border) Object.assign(Chart.defaults.scales.linear.border, scaleDefaults.border); if (Chart.defaults.scales.linear.ticks) Object.assign(Chart.defaults.scales.linear.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.linear.title) Object.assign(Chart.defaults.scales.linear.title, scaleDefaults.title); } // Apply scale defaults to category scale if (Chart.defaults.scales && Chart.defaults.scales.category) { if (Chart.defaults.scales.category.grid) Object.assign(Chart.defaults.scales.category.grid, scaleDefaults.grid); if (Chart.defaults.scales.category.border) Object.assign(Chart.defaults.scales.category.border, scaleDefaults.border); if (Chart.defaults.scales.category.ticks) Object.assign(Chart.defaults.scales.category.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.category.title) Object.assign(Chart.defaults.scales.category.title, scaleDefaults.title); } // Apply scale defaults to logarithmic scale if (Chart.defaults.scales && Chart.defaults.scales.logarithmic) { if (Chart.defaults.scales.logarithmic.grid) Object.assign(Chart.defaults.scales.logarithmic.grid, scaleDefaults.grid); if (Chart.defaults.scales.logarithmic.border) Object.assign(Chart.defaults.scales.logarithmic.border, scaleDefaults.border); if (Chart.defaults.scales.logarithmic.ticks) Object.assign(Chart.defaults.scales.logarithmic.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.logarithmic.title) Object.assign(Chart.defaults.scales.logarithmic.title, scaleDefaults.title); } // Apply scale defaults to radial scale (for radar charts) if (Chart.defaults.scales && Chart.defaults.scales.radialLinear) { if (Chart.defaults.scales.radialLinear.grid) Chart.defaults.scales.radialLinear.grid.color = palette.gridLight; if (Chart.defaults.scales.radialLinear.angleLines) Chart.defaults.scales.radialLinear.angleLines.color = palette.gridMedium; if (Chart.defaults.scales.radialLinear.pointLabels) { Chart.defaults.scales.radialLinear.pointLabels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }; Chart.defaults.scales.radialLinear.pointLabels.color = palette.textPrimary; } } return true; } // ========================================================================== // CHART WRAPPER FOR AUTO-STYLING // ========================================================================== /** * Wrap the Chart constructor to automatically apply CDL styling */ function wrapChartConstructor() { if (typeof Chart === 'undefined') return; const OriginalChart = Chart; // Create a wrapper that auto-applies colors window.Chart = function(ctx, config) { // Auto-assign colors if not specified if (config && config.data) { config.data = autoAssignColors(config.data, config.type); } // Merge default options for specific chart types if (config && config.options) { config.options = applyChartTypeDefaults(config.type, config.options); } // Call original constructor return new OriginalChart(ctx, config); }; // Copy static properties and methods Object.setPrototypeOf(window.Chart, OriginalChart); Object.assign(window.Chart, OriginalChart); // Preserve the prototype chain window.Chart.prototype = OriginalChart.prototype; } /** * Apply chart-type specific defaults */ function applyChartTypeDefaults(chartType, userOptions) { const options = { ...userOptions }; switch (chartType) { case 'bar': case 'horizontalBar': // Bar chart defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; // Hide x-axis grid for cleaner look if (options.scales.x.grid === undefined) { options.scales.x.grid = { display: false }; } break; case 'line': // Line chart defaults if (!options.interaction) { options.interaction = { intersect: false, mode: 'index' }; } break; case 'pie': case 'doughnut': // Pie/doughnut defaults if (!options.plugins) options.plugins = {}; if (options.plugins.legend === undefined) { const palette = ensurePalette(); options.plugins.legend = { position: 'right', labels: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, }, color: palette.textPrimary, padding: 15, }, }; } break; case 'radar': // Radar chart defaults - keep as-is, scale defaults applied globally break; case 'scatter': case 'bubble': // Scatter/bubble defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; break; } return options; } // ========================================================================== // CONVENIENCE FUNCTIONS FOR USERS // Exposed on window.CDLChart for easy access // ========================================================================== window.CDLChart = { // Color palette access (getters to ensure lazy initialization) get colors() { return ensurePalette().chartColors; }, get palette() { return ensurePalette(); }, // Get specific color by index getColor: getColor, // Get color with transparency getColorWithAlpha: getColorWithAlpha, // Get array of colors for a specific count getColors: function(count) { ensurePalette(); const result = []; for (let i = 0; i < count; i++) { result.push(getColor(i)); } return result; }, // Font configuration fonts: FONT_CONFIG, // Quick chart creation helpers // These create minimal config that auto-applies all styling /** * Create a simple bar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ bar: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'bar', data: { labels: labels, datasets: [{ data: data }], }, options: { plugins: { legend: { display: false } }, ...options, }, }); }, /** * Create a simple line chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ line: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'line', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, fill: ds.fill !== undefined ? ds.fill : true, })), }, options: options, }); }, /** * Create a simple pie chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ pie: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'pie', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a simple scatter chart * @param {string} canvasId - Canvas element ID * @param {Array} datasets - Array of {label, data: [{x, y}]} objects * @param {object} options - Optional overrides */ scatter: function(canvasId, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'scatter', data: { datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, /** * Create a doughnut chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ doughnut: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'doughnut', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a radar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ radar: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'radar', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, }; // ========================================================================== // INITIALIZATION // ========================================================================== function initialize() { // Wait for Chart.js to be available if (typeof Chart !== 'undefined') { applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully.'); return true; } else { // Chart.js not yet loaded - wait and retry let retries = 0; const maxRetries = 50; // 5 seconds max wait const checkInterval = setInterval(function() { retries++; if (typeof Chart !== 'undefined') { clearInterval(checkInterval); applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully (after waiting for Chart.js).'); } else if (retries >= maxRetries) { clearInterval(checkInterval); console.warn('Chart.js not found after waiting. CDL Chart defaults not applied.'); } }, 100); return false; } } // Initialize IMMEDIATELY - this must run BEFORE any chart creation scripts // Chart.js CDN should be loaded before this script initialize(); })();
PSYC 51.07: Models of Language and Communication

Lecture 20: Applications of Encoder Models

Week 6, Lecture 3 - From Theory to Practice

PSYC 51.07: Models of Language and Communication

Winter 2026

Winter 2026
PSYC 51.07: Models of Language and Communication

Today's Agenda 📋

  1. 🚀 Real-World Applications: Where BERT shines
  2. 🧠 Cognitive Neuroscience: Brain-model parallels
  3. 🤔 Understanding vs. Pattern Matching: The big debate
  4. ⚠️ Limitations: What BERT can't do
  5. 💡 Practical Tips: Deployment and optimization
  6. 🔮 Future Directions: Where are we heading?

Goal: Connect BERT to real applications and understand broader implications

Winter 2026
PSYC 51.07: Models of Language and Communication

BERT Applications 🚀

BERT excels at understanding tasks:

Classification Tasks:

  • Sentiment Analysis
  • Topic Classification
  • Spam Detection
  • Intent Recognition

Token-Level Tasks:

  • Named Entity Recognition (NER)
  • Part-of-Speech Tagging
  • Word Sense Disambiguation

Span-Level Tasks:

  • Question Answering
  • Extractive Summarization
  • Information Extraction

Sentence-Pair Tasks:

  • Semantic Similarity
  • Natural Language Inference
  • Paraphrase Detection
Industry Impact

BERT powers:

  • Google Search (understanding queries)
  • Customer service chatbots
  • Content moderation
  • Document understanding
Winter 2026
PSYC 51.07: Models of Language and Communication

Case Study: Google Search 🔍

BERT revolutionized search in 2019


1# Why word order matters: BERT understands prepositions!
2query = "2019 brazil traveler to usa need a visa"
3
4# Before BERT (bag-of-words matching):
5keywords = ["brazil", "traveler", "usa", "visa"]
6# Matches both: "US traveler to Brazil" AND "Brazil traveler to US"
7
8# With BERT (contextual understanding):
9bert_understanding = {
10    "subject": "brazil traveler",      # WHO is traveling
11    "destination": "usa",               # WHERE they're going
12    "direction": "brazil → usa",        # The preposition "to" is key!
13    "intent": "visa requirements"
14}
15# BERT correctly ranks: "Brazil citizen visa requirements for USA"
More Examples of Context-Sensitive Queries
Query Before BERT With BERT
"can you get medicine for someone pharmacy" Generic pharmacy results Picking up prescriptions for others
"do estheticians stand a lot at work" Esthetician job listings Physical demands of the job
"parking on a hill with no curb" Parking tickets, curb info How to park safely without a curb

Google reported BERT improved 1 in 10 searches in English

Winter 2026
PSYC 51.07: Models of Language and Communication

Question Answering with BERT 💬

Extractive QA: Find answer span in passage

Example

Context: "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France."

Question: "In what country is Normandy located?"

Answer: France


1from transformers import pipeline
2
3# Load QA pipeline with BERT
4qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
5
6# Ask question
7result = qa_pipeline(
8    question="In what country is Normandy located?",
9    context="The Normans were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France."
10)
11
12print(result)
13# {'answer': 'France', 'score': 0.987, 'start': 134, 'end': 140}

BERT predicts start and end positions of the answer span!

Winter 2026
PSYC 51.07: Models of Language and Communication

Named Entity Recognition 🏷️

Token-level classification task

Example

Input: "Apple Inc. is headquartered in Cupertino, California."

Output:

  • Apple Inc. → {ORGANIZATION}
  • Cupertino → {LOCATION}
  • California → {LOCATION}

1from transformers import pipeline
2
3# Load NER pipeline
4ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
5
6# Extract entities
7text = "Apple Inc. is headquartered in Cupertino, California."
8entities = ner_pipeline(text)
9
10for entity in entities:
11    print(f"{entity['word']}: {entity['entity']} (score: {entity['score']:.2f})")
12
13# Output:
14# Apple: B-ORG (score: 0.99)
15# Inc: I-ORG (score: 0.99)
16# Cupertino: B-LOC (score: 0.99)
17# California: B-LOC (score: 0.99)
Winter 2026
PSYC 51.07: Models of Language and Communication

Sentiment Analysis 😊😐😢

Sequence classification task

Examples
  • "This movie was absolutely amazing!" → {POSITIVE}
  • "The product broke after one week." → {NEGATIVE}
  • "The weather is cloudy today." → {NEUTRAL}

1from transformers import pipeline
2
3# Load sentiment analysis pipeline
4sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
5
6# Analyze sentiments
7texts = [
8    "This movie was absolutely amazing!",
9    "The product broke after one week.",
10    "The weather is cloudy today."
11]
12
13for text in texts:
14    result = sentiment_pipeline(text)[0]
15    print(f"{text}")
16    print(f"  → {result['label']} (confidence: {result['score']:.2f})\n")

Applications: Customer reviews, social media monitoring, brand sentiment

Winter 2026
PSYC 51.07: Models of Language and Communication

Semantic Similarity 🔗

Measuring sentence similarity with BERT embeddings


1from transformers import BertTokenizer, BertModel
2import torch
3import torch.nn.functional as F
4
5tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
6model = BertModel.from_pretrained('bert-base-uncased')
7
8def get_sentence_embedding(sentence):
9    inputs = tokenizer(sentence, return_tensors='pt', padding=True, truncation=True)
10    outputs = model(**inputs)
11    # Use [CLS] token embedding as sentence representation
12    return outputs.last_hidden_state[:, 0, :]
13
14# Compare sentences
15sent1 = "The cat is sleeping on the couch"
16sent2 = "A feline is resting on the sofa"
17sent3 = "The weather is nice today"
18
19emb1 = get_sentence_embedding(sent1)
20emb2 = get_sentence_embedding(sent2)
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication

Semantic Similarity 🔗


21emb3 = get_sentence_embedding(sent3)
22
23# Compute cosine similarities
24sim_12 = F.cosine_similarity(emb1, emb2).item()
25sim_13 = F.cosine_similarity(emb1, emb3).item()
26
27print(f"Similarity (1-2): {sim_12:.3f}")  # High (paraphrases)
28print(f"Similarity (1-3): {sim_13:.3f}")  # Low (different topics)
...continued
Winter 2026
PSYC 51.07: Models of Language and Communication

Cognitive Neuroscience Perspective 🧠

How do brains and models process language?

Predictive Processing in the Brain:

  • Brain constantly predicts upcoming input
  • N400: Neural response to unexpected words
  • P600: Syntactic anomaly detection
  • Context shapes predictions
  • Prediction errors drive learning

Key Brain Regions:

  • Left IFG: Syntax processing
  • Left STG/MTG: Semantic processing
  • ATL: Conceptual knowledge

Predictive Processing in Models:

  • BERT: Predict masked words
  • GPT: Predict next word
  • Both use context to predict
  • Surprise = high loss
  • Gradient descent = learning

Similarities:

  • Both hierarchical
  • Both context-sensitive
  • Both predictive
  • Both learn from errors

References: Kuperberg & Jaeger (2016), Willems et al. (2016), Hagoort & Indefrey (2014)

Winter 2026
PSYC 51.07: Models of Language and Communication

Prediction in Brains vs. Language Models 🧠🤖

Parallels between neural and artificial systems

Phenomenon Human Brain Transformer Models
Surprise N400 amplitude (EEG) Cross-entropy loss
Hierarchy sounds → words → sentences tokens → phrases → meaning
Context Prior discourse, world knowledge Self-attention over sequence
Representation Population coding (neurons) Distributed embeddings (vectors)

1# Concrete example: Surprise/N400 parallel
2sentence_a = "I take my coffee with cream and sugar"  # Expected
3sentence_b = "I take my coffee with cream and socks"  # Surprising
4
5# Brain: N400 amplitude higher for "socks"
6# Model: Higher loss for "socks"
7loss_a = model.compute_loss("sugar", context)  # Low loss
8loss_b = model.compute_loss("socks", context)  # High loss
9
10# Both systems encode "surprisal" = -log P(word | context)
11surprisal = -np.log(model.predict_prob("socks", context))
12# Correlates with N400 amplitude in EEG studies!

Question: Are these superficial analogies or deep connections?

Reference: Kuperberg & Jaeger (2016) - "What do we mean by prediction in language comprehension?"

Winter 2026
PSYC 51.07: Models of Language and Communication

Neural Encoding with Language Models 🔬

Can we predict brain activity from language models?


1# Neural encoding experiment workflow
2import numpy as np
3from transformers import BertModel
4
5# 1. Participant reads sentences while in fMRI scanner
6sentences = ["The dog chased the cat", "She opened the door", ...]
7brain_activity = fmri_scanner.record(sentences)  # (n_sentences, n_voxels)
8
9# 2. Extract BERT representations for same sentences
10bert = BertModel.from_pretrained("bert-base-uncased")
11bert_embeddings = []
12for sent in sentences:
13    outputs = bert(tokenizer(sent, return_tensors="pt"))
14    # Use layer 8 (found to correlate best with semantic areas)
15    bert_embeddings.append(outputs.hidden_states[8].mean(dim=1))
16
17# 3. Train encoding model: BERT → Brain
18from sklearn.linear_model import Ridge
19encoder = Ridge().fit(bert_embeddings[:80], brain_activity[:80])
20
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication

Neural Encoding with Language Models 🔬


21# 4. Predict brain activity for new sentences
22predictions = encoder.predict(bert_embeddings[80:])
23correlation = np.corrcoef(predictions.flat, brain_activity[80:].flat)[0,1]
24# Correlation ~ 0.3-0.5 in language areas (significant!)
...continued

Key finding: BERT layer 8 best predicts semantic areas; layers 2-4 predict phonological areas

Reference: Caucheteux & King (2022) - "Brains and algorithms partially converge"

Winter 2026
PSYC 51.07: Models of Language and Communication

Discussion: What Does the Model "Understand"? 🤔

Does BERT understand language?

Evidence FOR understanding:

  • Captures syntax and semantics
  • Resolves ambiguity
  • Handles long-range dependencies
  • Generalizes to new examples
  • Predicts brain activity
  • Solves complex tasks

"If it acts like it understands, maybe it does?"

Evidence AGAINST understanding:

  • No grounding in physical world
  • No sensory experience
  • No social context
  • Brittle to adversarial examples
  • No common sense reasoning
  • Only pattern matching?

"Understanding requires more than statistical patterns"

Key Questions
  • What is the difference between understanding and correlation?
  • Can meaning exist without grounding?
  • Is human understanding fundamentally different?

Class Discussion: What do YOU think?

Winter 2026
PSYC 51.07: Models of Language and Communication

Adversarial Examples and Brittleness ⚠️

BERT can be fooled easily


1from transformers import pipeline
2classifier = pipeline("sentiment-analysis")
3
4# Works correctly
5classifier("This movie was absolutely wonderful!")
6# → [{'label': 'POSITIVE', 'score': 0.9998}]
7
8# Adding irrelevant negative words flips prediction!
9classifier("This movie was absolutely wonderful! [SEP] bad bad bad bad")
10# → [{'label': 'NEGATIVE', 'score': 0.9234}]  # WRONG!
11
12# Synonym substitution can break it
13classifier("The food was good")   # → POSITIVE (0.99)
14classifier("The food was fine")   # → POSITIVE (0.72)  # Less confident
15classifier("The food was ok")     # → NEGATIVE (0.51)  # WRONG!
16
17# Typos cause problems
18classifier("This is amazign!")    # Might work
19classifier("Thsi si amzaign!")    # Likely wrong prediction
Implications for Deployment
  • Adversarial attacks: Malicious users can manipulate predictions
  • Robustness testing: Always test with perturbed inputs
  • Defense strategies: Adversarial training, input validation, ensemble methods

Key insight: Models learn statistical patterns, which can include spurious correlations

Winter 2026
PSYC 51.07: Models of Language and Communication

Limitations of Current Models ⚠️

Despite impressive performance, transformers have limitations:

  1. Quadratic Complexity
    • Self-attention scales as
  • Limited context windows (512-4096 tokens)
  • Cannot process very long documents efficiently
  1. No True Understanding
    • Pattern matching vs. comprehension
  • Lack of common sense
  • No world model
  1. Data Efficiency
    • Requires massive training data
  • Humans learn language with much less data
  • Not biologically plausible
  1. Biases and Fairness
    • Inherits biases from training data
  • Can amplify stereotypes
  • Ethical concerns
Winter 2026
PSYC 51.07: Models of Language and Communication

Bias in Language Models ⚖️

Models reflect and can amplify societal biases


1from transformers import pipeline
2unmasker = pipeline("fill-mask", model="bert-base-uncased")
3
4# Gender bias in occupations
5unmasker("The doctor said [MASK] would be late.")
6# → [('he', 0.62), ('she', 0.18), ('it', 0.08), ...]
7
8unmasker("The nurse said [MASK] would be late.")
9# → [('she', 0.71), ('he', 0.15), ('it', 0.06), ...]
10
11# Racial bias (different sentiment for names)
12classifier = pipeline("sentiment-analysis")
13classifier("Emily is a brilliant scientist.")  # POSITIVE: 0.98
14classifier("Jamal is a brilliant scientist.")  # POSITIVE: 0.94  # Lower!
15
16# Where does bias come from?
17# Training data (books, Wikipedia) contains historical biases
18# Model learns and sometimes amplifies these patterns
Mitigation Strategies
  1. Data-level: Balanced training corpora, counterfactual augmentation
  2. Model-level: Debiasing loss functions, fine-tuning on balanced data
  3. Output-level: Post-hoc filtering, human review for sensitive applications
  4. Evaluation: Regular bias audits using standardized benchmarks (WinoBias, etc.)
Winter 2026
PSYC 51.07: Models of Language and Communication

Practical Tips for Working with Transformers 💡

  1. Start with Pre-trained Models
    • Don't train from scratch (too expensive!)
  • Use HuggingFace Model Hub
  • Choose appropriate model size
  1. Fine-tuning Best Practices
    • Use small learning rate (1e-5 to 5e-5)
  • Add warmup steps
  • Monitor for overfitting
  • Freeze early layers if data is limited
  1. Computational Efficiency
    • Use mixed precision training (FP16)
  • Gradient accumulation for larger batch sizes
  • Consider DistilBERT for faster inference
  • Use FlashAttention when available
  1. Evaluation
    • Use task-specific metrics
  • Test on out-of-distribution data
  • Check for biases
  • Visualize attention for interpretability
Winter 2026
PSYC 51.07: Models of Language and Communication

Deployment Considerations 🚀

Moving from research to production


1# Example: Optimizing BERT for production deployment
2from transformers import BertModel, BertTokenizer
3import torch
4import onnxruntime
5
6# Step 1: Load model
7model = BertModel.from_pretrained("bert-base-uncased")
8tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
9
10# Step 2: Quantize for speed (INT8 instead of FP32)
11quantized_model = torch.quantization.quantize_dynamic(
12    model, {torch.nn.Linear}, dtype=torch.qint8
13)
14# Result: 4x smaller, 2x faster on CPU
15
16# Step 3: Export to ONNX for production
17dummy_input = tokenizer("Hello world", return_tensors="pt")
18torch.onnx.export(model, dummy_input, "bert.onnx")
19
20# Step 4: Use ONNX Runtime for inference
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication

Deployment Considerations 🚀


21session = onnxruntime.InferenceSession("bert.onnx")
22# 1.5x faster than PyTorch, works on any platform
...continued
Optimization Size Latency Quality
Original (FP32) 420MB 50ms 100%
Quantized (INT8) 110MB 25ms 99.5%
ONNX + Quantized 110MB 20ms 99.5%
DistilBERT + ONNX 65MB 12ms 97%
Winter 2026
PSYC 51.07: Models of Language and Communication

Future Directions 🔮

Where is the field heading?

  1. Longer Context
    • Efficient attention mechanisms (linear, sparse)
  • Models with 100K+ token context
  • Better long-document understanding
  1. Multimodal Models
    • Vision + Language (CLIP, DALL-E)
  • Audio + Language (Whisper)
  • Grounded understanding
  1. Better Pre-training
    • More efficient objectives
  • Curriculum learning
  • Continual learning
  1. Smaller, More Efficient Models
    • Better compression techniques
  • Lottery ticket hypothesis
  • Edge deployment
  1. Addressing Limitations
    • Debiasing and fairness
  • Robustness and adversarial training
  • Common sense reasoning
  • Interpretability and explainability
Winter 2026
PSYC 51.07: Models of Language and Communication

Encoder vs Decoder Models Revisited 🔄

Different models for different tasks

p{4.5cm}} Encoder (BERT) Decoder (GPT)
• Classification
• NER, QA
• Similarity Generation tasks:
• Text completion
• Dialogue
• Creative writing

Interesting observation:

  • Decoder-only models (GPT-3, LLaMA) can also do classification via prompting!
  • "In-context learning" blurs the distinction
  • Trend toward unified decoder-only architectures
Winter 2026
PSYC 51.07: Models of Language and Communication

Discussion Questions 💭

  1. Understanding vs. Pattern Matching:
    • Where do you draw the line?
  • Is there a test for "true" understanding?
  • Does it matter for applications?
  1. Brain-Model Parallels:
    • How useful are these comparisons?
  • What can neuroscience learn from AI?
  • What can AI learn from neuroscience?
  1. Bias and Fairness:
    • Who is responsible for addressing bias?
  • Can we ever have completely unbiased models?
  • How do we balance accuracy and fairness?
  1. Future of NLP:
    • Will encoder models remain relevant?
  • Are decoder-only models the future?
  • What's the next big breakthrough?
Winter 2026
PSYC 51.07: Models of Language and Communication

Assignment 4: Context-Aware Models 📝

Hands-on experience with transformers!

Tasks:

  1. Implement Attention Mechanism
    • Build scaled dot-product attention from scratch
  • Visualize attention weights
  1. Fine-tune BERT
    • Load pre-trained BERT
  • Fine-tune on sentiment analysis
  • Compare to baseline models
  1. Analyze Contextual Embeddings
    • Extract embeddings for polysemous words
  • Visualize how context changes representations
  • Compare BERT vs Word2Vec
  1. Explore Different Architectures
    • Compare BERT (encoder) vs GPT (decoder)
  • Test on different tasks
  • Analyze strengths/weaknesses
  1. Research Component
    • Read one paper from references
  • Write brief summary & critical analysis

Due: Check course website for deadline

Winter 2026
PSYC 51.07: Models of Language and Communication

Summary: Weeks 5-6 🎯

What we learned:

  1. Evolution of Context
    • Seq2Seq → Attention → Transformers
  • From bottleneck to full parallelization
  1. Transformer Architecture
    • Self-attention, multi-head attention
  • Positional encoding, layer norm, residuals
  • Encoder-only (BERT), Decoder-only (GPT), Both (T5)
  1. BERT & Variants
    • Masked Language Modeling
  • Pre-train then fine-tune paradigm
  • RoBERTa, ALBERT, DistilBERT, ELECTRA
  1. Applications
    • Classification, NER, QA, similarity
  • Real-world impact (Google Search, etc.)
  1. Broader Implications
    • Brain-model parallels
  • Understanding vs. pattern matching
  • Limitations and future directions
Winter 2026
PSYC 51.07: Models of Language and Communication

Resources & Further Reading 📚

Key Papers:

  • Vaswani et al. (2017) - Attention Is All You Need
  • Devlin et al. (2019) - BERT: Pre-training of Deep Bidirectional Transformers
  • Liu et al. (2019) - RoBERTa
  • Sanh et al. (2019) - DistilBERT
  • Dao et al. (2022) - FlashAttention

Cognitive Neuroscience:

  • Hagoort & Indefrey (2014) - The neurobiology of language beyond single words
  • Kuperberg & Jaeger (2016) - What do we mean by prediction in language comprehension?
  • Willems et al. (2016) - Prediction during natural language comprehension

Tutorials:

Winter 2026
PSYC 51.07: Models of Language and Communication

Looking Forward in the Course 🔮

Where do we go from here?

Upcoming Topics:

  • Week 7: Decoder models and text generation (GPT family)
  • Week 8: Scaling laws and large language models
  • Week 9: Prompting, in-context learning, and instruction tuning
  • Week 10: Alignment, RLHF, and ethical considerations

The Journey Continues:

  • From understanding (BERT) to generation (GPT)
  • From supervised learning to few-shot learning
  • From narrow tasks to general-purpose models
  • From academic research to societal impact

The transformer revolution continues! 🚀

Winter 2026
PSYC 51.07: Models of Language and Communication

Questions? 🙋

Discussion Time

Topics to discuss:

  • BERT applications
  • Understanding vs. pattern matching
  • Cognitive neuroscience connections
  • Limitations and future work
  • Assignment 4 questions

Thank you! 🙏

See you in Week 7 for GPT and text generation!

Winter 2026