* 2. Include this script: * 3. Create charts with minimal configuration - colors are auto-applied! */ (function() { 'use strict'; // ========================================================================== // READ COLORS FROM CSS CUSTOM PROPERTIES // This ensures chart colors stay in sync with the theme // ========================================================================== /** * Get a CSS custom property value from :root */ function getCSSVar(name, fallback = '') { if (typeof getComputedStyle === 'undefined') return fallback; const value = getComputedStyle(document.documentElement).getPropertyValue(name).trim(); return value || fallback; } /** * Build palette from CSS custom properties (with fallbacks) */ function buildPaletteFromCSS() { return { // Primary brand colors dartmouthGreen: getCSSVar('--dartmouth-green', '#00693e'), textPrimary: getCSSVar('--text-primary', '#0a2518'), textSecondary: getCSSVar('--text-secondary', '#0a3d23'), // Chart colors (from CSS --chart-color-N variables) chartColors: [ getCSSVar('--chart-color-1', '#00693e'), getCSSVar('--chart-color-2', '#267aba'), getCSSVar('--chart-color-3', '#ffa00f'), getCSSVar('--chart-color-4', '#9d162e'), getCSSVar('--chart-color-5', '#8a6996'), getCSSVar('--chart-color-6', '#a5d75f'), getCSSVar('--chart-color-7', '#003c73'), getCSSVar('--chart-color-8', '#d94415'), getCSSVar('--chart-color-9', '#643c20'), getCSSVar('--chart-color-10', '#c4dd88'), getCSSVar('--chart-color-11', '#f5dc69'), getCSSVar('--chart-color-12', '#424141'), ], // Background colors (semi-transparent versions) chartBgColors: [ getCSSVar('--chart-bg-1', 'rgba(0, 105, 62, 0.5)'), getCSSVar('--chart-bg-2', 'rgba(38, 122, 186, 0.5)'), getCSSVar('--chart-bg-3', 'rgba(255, 160, 15, 0.5)'), getCSSVar('--chart-bg-4', 'rgba(157, 22, 46, 0.5)'), getCSSVar('--chart-bg-5', 'rgba(138, 105, 150, 0.5)'), getCSSVar('--chart-bg-6', 'rgba(165, 215, 95, 0.5)'), ], // Semantic colors positive: getCSSVar('--chart-positive', '#00693e'), negative: getCSSVar('--chart-negative', '#9d162e'), neutral: getCSSVar('--chart-neutral', '#424141'), highlight: getCSSVar('--chart-highlight', '#ffa00f'), // Grid and axis colors gridLight: getCSSVar('--chart-grid-light', 'rgba(0, 105, 62, 0.1)'), gridMedium: getCSSVar('--chart-grid-medium', 'rgba(0, 105, 62, 0.15)'), gridDark: getCSSVar('--chart-grid-dark', 'rgba(0, 105, 62, 0.2)'), axisColor: getCSSVar('--chart-axis-color', '#0a2518'), // Font fontFamily: getCSSVar('--chart-font-family', "'Avenir LT Std', 'Avenir', 'Avenir Next', -apple-system, BlinkMacSystemFont, sans-serif"), }; } // Initialize palette (will be populated when DOM is ready) let CDL_PALETTE = null; // For convenience, expose primary chart colors array let CHART_COLORS = null; // ========================================================================== // FONT CONFIGURATION // Responsive font sizes based on typical Marp slide dimensions (1280x720) // ========================================================================== const FONT_CONFIG = { sizes: { title: 22, // Chart title subtitle: 18, // Subtitle legend: 16, // Legend labels axisTitle: 18, // Axis titles axisTicks: 16, // Axis tick labels tooltip: 14, // Tooltip text dataLabels: 14, // Data labels on charts }, weight: { normal: 400, medium: 500, bold: 600, }, }; // ========================================================================== // HELPER FUNCTIONS // ========================================================================== /** * Ensure palette is initialized */ function ensurePalette() { if (!CDL_PALETTE) { CDL_PALETTE = buildPaletteFromCSS(); CHART_COLORS = CDL_PALETTE.chartColors; } return CDL_PALETTE; } /** * Get color for a dataset at given index * Cycles through palette if more datasets than colors */ function getColor(index) { ensurePalette(); return CHART_COLORS[index % CHART_COLORS.length]; } /** * Get color with alpha transparency */ function getColorWithAlpha(color, alpha) { // Handle hex colors if (color.startsWith('#')) { const r = parseInt(color.slice(1, 3), 16); const g = parseInt(color.slice(3, 5), 16); const b = parseInt(color.slice(5, 7), 16); return `rgba(${r}, ${g}, ${b}, ${alpha})`; } // Handle rgba colors if (color.startsWith('rgba')) { return color.replace(/[\d.]+\)$/, `${alpha})`); } return color; } /** * Generate colors for all datasets in chart data * Automatically assigns colors if not specified */ function autoAssignColors(data, chartType) { if (!data || !data.datasets) return data; data.datasets.forEach((dataset, index) => { const baseColor = getColor(index); // Only assign colors if not already specified switch (chartType) { case 'bar': case 'horizontalBar': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'line': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.1); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 3; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 6; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } if (dataset.tension === undefined) { dataset.tension = 0.3; } break; case 'scatter': case 'bubble': if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 15; } if (dataset.pointHoverRadius === undefined) { dataset.pointHoverRadius = 18; } break; case 'pie': case 'doughnut': case 'polarArea': // For pie charts, we need multiple colors for one dataset if (!dataset.backgroundColor) { const numItems = dataset.data ? dataset.data.length : 6; dataset.backgroundColor = []; for (let i = 0; i < numItems; i++) { dataset.backgroundColor.push(getColor(i)); } } if (!dataset.borderColor) { dataset.borderColor = '#d8d8d8'; // Slide background } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } break; case 'radar': if (!dataset.borderColor) { dataset.borderColor = baseColor; } if (!dataset.backgroundColor) { dataset.backgroundColor = getColorWithAlpha(baseColor, 0.2); } if (dataset.borderWidth === undefined) { dataset.borderWidth = 2; } if (dataset.pointRadius === undefined) { dataset.pointRadius = 4; } if (!dataset.pointBackgroundColor) { dataset.pointBackgroundColor = baseColor; } break; default: // Generic color assignment if (!dataset.backgroundColor) { dataset.backgroundColor = baseColor; } if (!dataset.borderColor) { dataset.borderColor = baseColor; } } }); return data; } // ========================================================================== // CHART.JS GLOBAL DEFAULTS // ========================================================================== function applyGlobalDefaults() { if (typeof Chart === 'undefined') { console.warn('Chart.js not loaded. chart-defaults.js requires Chart.js to be loaded first.'); return false; } // Ensure palette is loaded from CSS const palette = ensurePalette(); // Font defaults Chart.defaults.font.family = palette.fontFamily; Chart.defaults.font.size = FONT_CONFIG.sizes.axisTicks; Chart.defaults.color = palette.textPrimary; // Responsive defaults Chart.defaults.responsive = true; Chart.defaults.maintainAspectRatio = false; // Animation (subtle) Chart.defaults.animation.duration = 400; // Plugin defaults // Legend Chart.defaults.plugins.legend.labels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, weight: FONT_CONFIG.weight.normal, }; Chart.defaults.plugins.legend.labels.color = palette.textPrimary; Chart.defaults.plugins.legend.labels.usePointStyle = true; Chart.defaults.plugins.legend.labels.padding = 20; // Title Chart.defaults.plugins.title.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.title, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.title.color = palette.textPrimary; // Tooltip Chart.defaults.plugins.tooltip.backgroundColor = palette.textPrimary; Chart.defaults.plugins.tooltip.titleFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, weight: FONT_CONFIG.weight.medium, }; Chart.defaults.plugins.tooltip.bodyFont = { family: palette.fontFamily, size: FONT_CONFIG.sizes.tooltip, }; Chart.defaults.plugins.tooltip.cornerRadius = 4; Chart.defaults.plugins.tooltip.padding = 10; // Scale defaults (for cartesian charts) // These need to be applied per-scale type const scaleDefaults = { grid: { color: palette.gridLight, lineWidth: 1, }, border: { color: palette.gridDark, width: 1, }, ticks: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }, color: palette.textPrimary, }, title: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTitle, weight: FONT_CONFIG.weight.normal, }, color: palette.textPrimary, }, }; // Apply scale defaults to linear scale if (Chart.defaults.scales && Chart.defaults.scales.linear) { if (Chart.defaults.scales.linear.grid) Object.assign(Chart.defaults.scales.linear.grid, scaleDefaults.grid); if (Chart.defaults.scales.linear.border) Object.assign(Chart.defaults.scales.linear.border, scaleDefaults.border); if (Chart.defaults.scales.linear.ticks) Object.assign(Chart.defaults.scales.linear.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.linear.title) Object.assign(Chart.defaults.scales.linear.title, scaleDefaults.title); } // Apply scale defaults to category scale if (Chart.defaults.scales && Chart.defaults.scales.category) { if (Chart.defaults.scales.category.grid) Object.assign(Chart.defaults.scales.category.grid, scaleDefaults.grid); if (Chart.defaults.scales.category.border) Object.assign(Chart.defaults.scales.category.border, scaleDefaults.border); if (Chart.defaults.scales.category.ticks) Object.assign(Chart.defaults.scales.category.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.category.title) Object.assign(Chart.defaults.scales.category.title, scaleDefaults.title); } // Apply scale defaults to logarithmic scale if (Chart.defaults.scales && Chart.defaults.scales.logarithmic) { if (Chart.defaults.scales.logarithmic.grid) Object.assign(Chart.defaults.scales.logarithmic.grid, scaleDefaults.grid); if (Chart.defaults.scales.logarithmic.border) Object.assign(Chart.defaults.scales.logarithmic.border, scaleDefaults.border); if (Chart.defaults.scales.logarithmic.ticks) Object.assign(Chart.defaults.scales.logarithmic.ticks, scaleDefaults.ticks); if (Chart.defaults.scales.logarithmic.title) Object.assign(Chart.defaults.scales.logarithmic.title, scaleDefaults.title); } // Apply scale defaults to radial scale (for radar charts) if (Chart.defaults.scales && Chart.defaults.scales.radialLinear) { if (Chart.defaults.scales.radialLinear.grid) Chart.defaults.scales.radialLinear.grid.color = palette.gridLight; if (Chart.defaults.scales.radialLinear.angleLines) Chart.defaults.scales.radialLinear.angleLines.color = palette.gridMedium; if (Chart.defaults.scales.radialLinear.pointLabels) { Chart.defaults.scales.radialLinear.pointLabels.font = { family: palette.fontFamily, size: FONT_CONFIG.sizes.axisTicks, }; Chart.defaults.scales.radialLinear.pointLabels.color = palette.textPrimary; } } return true; } // ========================================================================== // CHART WRAPPER FOR AUTO-STYLING // ========================================================================== /** * Wrap the Chart constructor to automatically apply CDL styling */ function wrapChartConstructor() { if (typeof Chart === 'undefined') return; const OriginalChart = Chart; // Create a wrapper that auto-applies colors window.Chart = function(ctx, config) { // Auto-assign colors if not specified if (config && config.data) { config.data = autoAssignColors(config.data, config.type); } // Merge default options for specific chart types if (config && config.options) { config.options = applyChartTypeDefaults(config.type, config.options); } // Call original constructor return new OriginalChart(ctx, config); }; // Copy static properties and methods Object.setPrototypeOf(window.Chart, OriginalChart); Object.assign(window.Chart, OriginalChart); // Preserve the prototype chain window.Chart.prototype = OriginalChart.prototype; } /** * Apply chart-type specific defaults */ function applyChartTypeDefaults(chartType, userOptions) { const options = { ...userOptions }; switch (chartType) { case 'bar': case 'horizontalBar': // Bar chart defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; // Hide x-axis grid for cleaner look if (options.scales.x.grid === undefined) { options.scales.x.grid = { display: false }; } break; case 'line': // Line chart defaults if (!options.interaction) { options.interaction = { intersect: false, mode: 'index' }; } break; case 'pie': case 'doughnut': // Pie/doughnut defaults if (!options.plugins) options.plugins = {}; if (options.plugins.legend === undefined) { const palette = ensurePalette(); options.plugins.legend = { position: 'right', labels: { font: { family: palette.fontFamily, size: FONT_CONFIG.sizes.legend, }, color: palette.textPrimary, padding: 15, }, }; } break; case 'radar': // Radar chart defaults - keep as-is, scale defaults applied globally break; case 'scatter': case 'bubble': // Scatter/bubble defaults if (!options.scales) options.scales = {}; if (!options.scales.x) options.scales.x = {}; if (!options.scales.y) options.scales.y = {}; break; } return options; } // ========================================================================== // CONVENIENCE FUNCTIONS FOR USERS // Exposed on window.CDLChart for easy access // ========================================================================== window.CDLChart = { // Color palette access (getters to ensure lazy initialization) get colors() { return ensurePalette().chartColors; }, get palette() { return ensurePalette(); }, // Get specific color by index getColor: getColor, // Get color with transparency getColorWithAlpha: getColorWithAlpha, // Get array of colors for a specific count getColors: function(count) { ensurePalette(); const result = []; for (let i = 0; i < count; i++) { result.push(getColor(i)); } return result; }, // Font configuration fonts: FONT_CONFIG, // Quick chart creation helpers // These create minimal config that auto-applies all styling /** * Create a simple bar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ bar: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'bar', data: { labels: labels, datasets: [{ data: data }], }, options: { plugins: { legend: { display: false } }, ...options, }, }); }, /** * Create a simple line chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - X-axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ line: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'line', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, fill: ds.fill !== undefined ? ds.fill : true, })), }, options: options, }); }, /** * Create a simple pie chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ pie: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'pie', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a simple scatter chart * @param {string} canvasId - Canvas element ID * @param {Array} datasets - Array of {label, data: [{x, y}]} objects * @param {object} options - Optional overrides */ scatter: function(canvasId, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'scatter', data: { datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, /** * Create a doughnut chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Slice labels * @param {number[]} data - Data values * @param {object} options - Optional overrides */ doughnut: function(canvasId, labels, data, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'doughnut', data: { labels: labels, datasets: [{ data: data }], }, options: options, }); }, /** * Create a radar chart * @param {string} canvasId - Canvas element ID * @param {string[]} labels - Axis labels * @param {Array} datasets - Array of {label, data} objects * @param {object} options - Optional overrides */ radar: function(canvasId, labels, datasets, options = {}) { return new Chart(document.getElementById(canvasId), { type: 'radar', data: { labels: labels, datasets: datasets.map(ds => ({ label: ds.label, data: ds.data, })), }, options: options, }); }, }; // ========================================================================== // INITIALIZATION // ========================================================================== function initialize() { // Wait for Chart.js to be available if (typeof Chart !== 'undefined') { applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully.'); return true; } else { // Chart.js not yet loaded - wait and retry let retries = 0; const maxRetries = 50; // 5 seconds max wait const checkInterval = setInterval(function() { retries++; if (typeof Chart !== 'undefined') { clearInterval(checkInterval); applyGlobalDefaults(); wrapChartConstructor(); console.log('CDL Chart defaults applied successfully (after waiting for Chart.js).'); } else if (retries >= maxRetries) { clearInterval(checkInterval); console.warn('Chart.js not found after waiting. CDL Chart defaults not applied.'); } }, 100); return false; } } // Initialize IMMEDIATELY - this must run BEFORE any chart creation scripts // Chart.js CDN should be loaded before this script initialize(); })();
PSYC 51.07: Models of Language and Communication - Week 3

Word Embeddings: Word2Vec, GloVe, FastText

Lecture 11: The Neural Revolution in NLP

PSYC 51.07: Models of Language and Communication - Week 3

Winter 2026

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Today's Lecture 📋

  1. 🚀 The 2013 Revolution: Word2Vec
  2. 🏗️ Word2Vec Architectures: CBOW & Skip-gram
  3. 🌍 GloVe: Global Vectors
  4. 🔤 FastText: Subword Information
  5. 📊 Comparison & Evaluation
  6. 💡 Applications & Best Practices

Goal: Understand how neural methods learn dense word representations

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

From Count-Based to Prediction-Based 🎯

Count-Based (LSA, LDA):

  • Build co-occurrence matrix
  • Apply matrix factorization
  • Global statistics
  • Linear relationships
  • Interpretable factors

Advantages:

  • Fast training
  • Leverages global statistics
  • Mathematically grounded

Prediction-Based (Word2Vec):

  • Predict context from word
  • Train neural network
  • Local context windows
  • Non-linear relationships
  • Learned representations

Advantages:

  • Better semantic capture
  • Scalable to huge corpora
  • Dense, low-dimensional
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Word2Vec: The Revolution 🚀

2013: Everything changed

Key Innovation:

  • Shallow neural network
  • Predict context from word (CBOW)
  • Predict word from context (Skip-gram)
  • Dense, low-dimensional vectors
  • Captures semantic relationships
  • Fast training with negative sampling

Impact:

  • Sparked deep learning in NLP
  • Pre-training became standard
  • Enabled transfer learning

Famous Results:

Other Examples:

Reference: Mikolov et al. (2013a). "Efficient Estimation of Word Representations in Vector Space"

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Word2Vec: Core Intuition 💡

Words that appear in similar contexts should have similar representations

Example Context Window

"The quick brown [TARGET] jumped over the lazy dog"

Context: [The, quick, brown, jumped, over, the, lazy, dog]

Target: fox

Key Idea:

  • Train a model to predict target from context (or vice versa)
  • The learned weights become our word vectors!
  • Similar words will have similar weight patterns

We don't actually care about the prediction task—we want the embeddings!

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Context Windows: Worked Example 📝

Sentence: "The cat sat on the mat"

Window size = 2 (2 words on each side)


1Position 0: "The"  → Context: [cat, sat]
2Position 1: "cat"  → Context: [The, sat, on]
3Position 2: "sat"  → Context: [The, cat, on, the]
4Position 3: "on"   → Context: [cat, sat, the, mat]
5Position 4: "the"  → Context: [sat, on, mat]
6Position 5: "mat"  → Context: [on, the]

Skip-gram training pairs (target → context):


1(The, cat), (The, sat)
2(cat, The), (cat, sat), (cat, on)
3(sat, The), (sat, cat), (sat, on), (sat, the)
4...

Result: Words appearing in similar contexts get similar vectors!

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

CBOW: Continuous Bag of Words 🎒

Predict the center word from context words


1Context words:
2  "the" ──┐
3  "cat" ──┼──→ Average ──→ "sat"
4  "on"  ──┼──→ Hidden  ──→ (predict)
5  "the" ──┘

Architecture:

  1. Input: One-hot vectors of context words
  2. Hidden: Average of input embeddings
  3. Output: Softmax over vocabulary

Training:

  • Context window size (e.g., 5 words)
  • Maximize
  • Backpropagation updates embeddings

Characteristics:

  • Faster to train than Skip-gram
  • Better for frequent words
  • Smooths over context
  • Good for syntactic tasks

Objective:

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Skip-gram: The Inverse Approach 🔄

Predict context words from the center word


1Center word:           Predict context:
2                   ┌──→ "the"
3                   ├──→ "cat"
4  "sat" ──→ Hidden ┼──→ "on"
5                   └──→ "the"

Architecture:

  1. Input: One-hot vector of center word
  2. Hidden: Word embedding
  3. Output: Softmax predicting each context word

Training:

  • For each center word, predict surrounding words
  • Maximize

Characteristics:

  • Slower but better quality
  • Better for rare words
  • Captures more semantic relationships
  • Most commonly used

Objective:

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Negative Sampling: The Speed Trick ⚡

Problem: Softmax over entire vocabulary is too expensive!

For 100k vocabulary: Need to compute 100k exponentials per training example!

Solution: Negative Sampling

Instead of predicting across all words, create a binary classification:

  • Positive sample: Actual context word (label = 1)
  • Negative samples: K random words (label = 0)

Typical K: 5-20 for large datasets, 2-5 for small datasets

Training becomes instead of per example!

Reference: Mikolov et al. (2013b). "Distributed Representations of Words and Phrases"

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Negative Sampling: Concrete Example 🎯

Training pair: ("cat", "sat") - cat is center, sat is context


1# Positive sample: Does "sat" appear near "cat"? YES (label=1)
2positive_pair = ("cat", "sat", label=1)
3
4# Negative samples: Random words that DON'T appear near "cat"
5# Sample 5 random words from vocabulary
6negative_pairs = [
7    ("cat", "algorithm", label=0),
8    ("cat", "president", label=0),
9    ("cat", "quantum", label=0),
10    ("cat", "democracy", label=0),
11    ("cat", "software", label=0),
12]
13
14# Train binary classifier: Is this a real context pair?
15# Instead of 100k-way softmax → 6 binary predictions!

Sampling distribution:

The 0.75 power gives rare words slightly higher probability of being sampled as negatives.

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Word2Vec in Practice 💻


1from gensim.models import Word2Vec
2import gensim.downloader as api
3
4# Load pre-trained model (300 dimensions, 3B words)
5model = api.load('word2vec-google-news-300')
6
7# Find similar words
8similar = model.most_similar('computer', topn=5)
9print(similar)
10# Output: [('computers', 0.72), ('laptop', 0.69), ('PC', 0.68), ...]
11
12# Word analogies: king - man + woman = ?
13result = model.most_similar(
14    positive=['king', 'woman'],
15    negative=['man'],
16    topn=1
17)
18print(result)
19# Output: [('queen', 0.71)]
20
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Word2Vec in Practice 💻


21# Train your own model
22sentences = [
23    ['the', 'cat', 'sat', 'on', 'the', 'mat'],
24    ['the', 'dog', 'ran', 'in', 'the', 'park'],
25    # ... more sentences
26]
27
28custom_model = Word2Vec(
29    sentences,
30    vector_size=100,    # embedding dimension
31    window=5,           # context window
32    min_count=1,        # ignore rare words
33    sg=1,               # 1=skip-gram, 0=CBOW
34    negative=5,         # negative sampling
35    workers=4           # parallel threads
36)
...continued
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

GloVe: Global Vectors for Word Representation 🌍

Combining the best of count-based and prediction-based methods

Motivation:

  • Word2Vec uses local context windows
  • LSA uses global co-occurrence statistics
  • Can we get the best of both?

Key Insight:

Ratios of co-occurrence probabilities encode meaning better than raw probabilities!

Co-occurrence Example:

Probe word P(word|ice) P(word|steam) Ratio
solid high low >> 1
gas low high << 1
water high high ~ 1
fashion low low ~ 1

Idea:
Learn word vectors such that their dot product relates to co-occurrence probability ratios

Reference: Pennington et al. (2014). "GloVe: Global Vectors for Word Representation"

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

GloVe: The Ratio Intuition 🔢

Why ratios matter more than raw probabilities:


1Given words: "ice" and "steam"
2Probe with: "solid", "gas", "water"
3
4P(solid | ice)  = 0.00019    P(solid | steam) = 0.000022
5P(gas | ice)    = 0.000066   P(gas | steam)   = 0.00078
6
7Ratio: P(solid|ice) / P(solid|steam) = 8.9   → "solid" relates to ice
8Ratio: P(gas|ice)   / P(gas|steam)   = 0.085 → "gas" relates to steam
9Ratio: P(water|ice) / P(water|steam) = 1.36  → "water" is neutral

The insight: These ratios distinguish relevant context words from irrelevant ones!

GloVe learns vectors where:

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

GloVe: The Objective Function 🎯

Goal: Learn vectors that capture co-occurrence statistics

Objective Function:

where:

  • = number of times word appears in context of word
  • = word vector for word
  • = separate context vector for word
  • = bias terms
  • = weighting function

Weighting Function:

Purpose: Prevent very common co-occurrences from dominating (typically , )

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

GloVe vs. Word2Vec 🥊

Word2Vec (Skip-gram):

  • Local context windows
  • Online training
  • Predicts context from word
  • Stochastic updates
  • Negative sampling trick
  • Each occurrence matters

Advantages:

  • Works well on small corpora
  • Can train incrementally
  • Captures local patterns

GloVe:

  • Global co-occurrence matrix
  • Batch training
  • Factorizes co-occurrence matrix
  • Deterministic
  • Weighted least squares
  • Aggregates all occurrences

Advantages:

  • Faster training (large corpora)
  • Better statistical efficiency
  • More interpretable
Performance

In practice: Similar performance on most tasks! Choose based on:

  • Corpus size (GloVe better for large)
  • Training infrastructure (GloVe needs memory for matrix)
  • Availability of pre-trained models
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

GloVe in Practice 💻


1import gensim.downloader as api
2import numpy as np
3
4# Load pre-trained GloVe embeddings
5# Options: glove-wiki-gigaword-50, -100, -200, -300
6# Or: glove-twitter-25, -50, -100, -200
7glove = api.load("glove-wiki-gigaword-100")
8
9# Use just like Word2Vec
10similar = glove.most_similar('computer', topn=5)
11print(similar)
12
13# Analogies
14result = glove.most_similar(
15    positive=['france', 'berlin'],
16    negative=['paris'],
17    topn=1
18)
19print(result)  # Should be close to 'germany'
20
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

GloVe in Practice 💻


21# Vector arithmetic
22king = glove['king']
23man = glove['man']
24woman = glove['woman']
25result_vec = king - man + woman
26
27# Find closest word
28closest = glove.similar_by_vector(result_vec, topn=1)
29print(closest)  # Should be close to 'queen'
30
31# Compute similarity
32similarity = glove.similarity('cat', 'dog')
33print(f"Similarity: {similarity:.3f}")
...continued
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

FastText: Enriching with Subword Information 🔤

The Problem with Word2Vec and GloVe:

Out-of-Vocabulary (OOV) Problem
  • Word2Vec/GloVe: One vector per word
  • New word?
  • Misspelling?
  • Rare morphological form?

Example:

  • Have: "running", "runner"
  • Need: "runnable"
  • But these words share morphology! ("run" + suffix)

FastText Solution: Represent words as

Reference: Bojanowski et al. (2017). "Enriching Word Vectors with Subword Information"

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

FastText: Character N-grams 🧩

Key Idea: Break words into character n-grams

Example: "where" (with n=3)

Character trigrams: <wh, whe, her, ere, re>

Plus: <where> (the whole word)

Word vector:

Advantages:

  • Handles OOV words!
  • Captures morphology
  • Better for rare words
  • Great for German, Turkish, Finnish
  • Robust to typos

Example OOV:

Have trained on: "run", "running"

Need vector for: "runnable"

N-grams: <ru, run, unn, nna, nab, abl, ble, le>

Many overlap with "run" and "running"!

Can generate reasonable embedding

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

FastText: Morphology in Action 🔠

How FastText handles related words:


1# Word: "unhappiness" broken into character 3-grams:
2# <un, unh, nha, hap, app, ppi, pin, ine, nes, ess, ss>
3
4# Shares n-grams with:
5# "unhappy"   → <un, unh, nha, hap, app, ppy
6# "happiness" → hap, app, ppi, pin, ine, nes, ess, ss>
7# "happy"     → hap, app, ppy
8
9# Therefore: vec("unhappiness") is close to:
10#   - vec("unhappy")     (prefix overlap)
11#   - vec("happiness")   (suffix overlap)
12#   - vec("sadness")     (similar suffix pattern)

This is why FastText excels at morphologically rich languages!

Language Morphology FastText Advantage
English Low Moderate
German High Significant
Turkish Very High Essential
Finnish Extreme Critical
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

FastText in Practice 💻


1from gensim.models import FastText
2import gensim.downloader as api
3
4# Load pre-trained FastText model
5# Available in 157 languages!
6fasttext_model = api.load('fasttext-wiki-news-subwords-300')
7
8# Works for known words
9vec_cat = fasttext_model['cat']
10
11# Also works for unknown words! (OOV)
12vec_unknownword = fasttext_model['unknownword']  # Still get a vector!
13
14# Even works for misspellings (somewhat)
15vec_misspelling = fasttext_model['computr']  # Close to "computer"
16
17# Train your own FastText model
18sentences = [['the', 'cat', 'sat'], ['the', 'dog', 'ran']]
19
20ft_model = FastText(
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

FastText in Practice 💻


21    sentences,
22    vector_size=100,
23    window=5,
24    min_count=1,
25    min_n=3,      # minimum n-gram length
26    max_n=6,      # maximum n-gram length
27    word_ngrams=1 # use word + ngrams
28)
29
30# Check similar words
31similar = ft_model.wv.most_similar('cat', topn=5)
32
33# Morphological awareness
34# 'running', 'runner', 'runnable' will be close
...continued
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Embedding Methods Comparison 📊

LDA 2003 Probabilistic Slow Topic modeling
Word2Vec 2013 Neural (local) Fast General NLP
GloVe 2014 Hybrid (global) Fast Large corpora
FastText 2017 Neural + ngrams Fast Morphology-rich
When to Use What?
  • Word2Vec (Skip-gram): General purpose, good semantic quality, most popular
  • GloVe: Large corpus, want deterministic training, good interpretability
  • FastText: Morphologically rich languages, need OOV handling, many rare words
  • LSA/LDA: Topic modeling, document similarity, interpretable dimensions

Typical dimensions: 50-300 (100-300 most common)

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Evaluating Word Embeddings 📏

Intrinsic Evaluation:

1. Word Similarity

  • WordSim-353 dataset
  • SimLex-999 dataset
  • Spearman correlation with human judgments

2. Word Analogies

  • Google Analogy Dataset (19k questions)
  • Semantic: Athens:Greece::Baghdad:Iraq
  • Syntactic: good:better::bad:worse
  • Accuracy: % correct

3. Categorization

  • Cluster words
  • Compare to gold categories

Extrinsic Evaluation:

Use embeddings as features in downstream tasks:

  • Sentiment Analysis
  • Named Entity Recognition
  • POS Tagging
  • Machine Translation
  • Question Answering
  • Text Classification
Key Insight

Intrinsic scores don't always correlate with extrinsic performance!

Best approach: Evaluate on your actual task.

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Limitations of Static Embeddings ⚠️

The fundamental problem: One vector per word type

Example: Polysemy
  1. "I deposited money at the bank" (financial institution)
  2. "We sat by the river bank" (riverside)
  3. "The plane will bank left" (tilt)

All get the ! This conflates different meanings.

Other Limitations:

  • No compositional semantics
  • "hot dog" "hot" + "dog"
  • Bias in embeddings
  • Limited to training corpus
  • No understanding of negation
  • Missing multimodal grounding

The Solution:

  • Different vector per occurrence
  • Consider sentence context
  • ELMo, BERT, GPT, ...
  • Coming next lecture!

% \includegraphics[width=0.8\textwidth]{transformer_preview.png}
(Image placeholder - transformer architecture preview)

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Bias in Word Embeddings ⚖️

Embeddings learn the biases in their training data

Gender Bias Examples
  • "man" more associated with "career", "woman" with "family"

Other Biases:

  • Racial/ethnic stereotypes
  • Age bias
  • Religious bias
  • Socioeconomic bias

Why This Matters:

  • Embeddings used in hiring tools, search, recommendations
  • Can perpetuate and amplify societal biases
  • May violate fairness and anti-discrimination principles

References: Bolukbasi et al. (2016). "Man is to Computer Programmer as Woman is to Homemaker?"

Caliskan et al. (2017). "Semantics derived automatically from language corpora contain human-like biases"

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Bias Detection: Code Example 🔍


1import gensim.downloader as api
2model = api.load('word2vec-google-news-300')
3
4# Measure gender bias: which words are closer to "man" vs "woman"?
5def gender_bias_score(word):
6    """Positive = closer to man, Negative = closer to woman"""
7    return model.similarity(word, 'man') - model.similarity(word, 'woman')
8
9occupations = ['doctor', 'nurse', 'engineer', 'teacher',
10               'programmer', 'secretary', 'scientist', 'receptionist']
11
12for job in occupations:
13    score = gender_bias_score(job)
14    direction = "→ man" if score > 0 else "→ woman"
15    print(f"{job:12}: {score:+.3f} {direction}")
16
17# Output:
18# doctor      : +0.089 → man
19# nurse       : -0.109 → woman
20# engineer    : +0.078 → man
continued...
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Bias Detection: Code Example 🔍


21# teacher     : -0.046 → woman
22# programmer  : +0.091 → man
23# secretary   : -0.107 → woman
...continued

These biases reflect stereotypes in the training data (news articles)!

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Debiasing Approaches 🔧

How can we reduce bias in embeddings?

1. Post-processing:

  • Identify bias subspace
  • Neutralize: Remove bias from neutral words
  • Equalize: Ensure equal distances

2. Training-time:

  • Counterfactual data augmentation
  • Adversarial debiasing
  • Constrained optimization

3. Data curation:

  • Balanced corpora
  • Careful source selection
  • Remove problematic content

Challenges:

  • Hard to define "bias"
  • May harm performance
  • Bias is multidimensional
  • Not a complete solution
  • Ethical considerations
Important

Debiasing is an active research area. No perfect solution yet!

Best practice:

  • Be aware of biases
  • Document training data
  • Test for bias
  • Use multiple evaluation metrics
  • Consider societal impact
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Practical Tips for Using Embeddings 💡

  1. Start with pre-trained models
    • Word2Vec: Google News (300d, 3B words)
    • GloVe: Common Crawl (300d, 840B tokens)
    • FastText: Available in 157 languages
  2. Fine-tune if you have domain data
    • Medical: PubMed, clinical notes
    • Legal: case law, contracts
    • Social media: tweets, posts
    • Can significantly improve performance
  3. Choose dimensions wisely
    • More dims = more expressiveness, more data needed
    • 50-100d: small datasets, fast computation
    • 200-300d: large datasets, better quality
  4. Use cosine similarity
    • Not Euclidean distance (direction matters more than magnitude)
  5. Be aware of biases and limitations
    • Test for bias in your use case
    • Static embeddings can't handle polysemy
    • Consider contextual embeddings for better performance
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Loading Pre-trained Embeddings: Quick Reference 💻


1import gensim.downloader as api
2
3# Word2Vec (Google News, 3B words, 300d)
4w2v = api.load('word2vec-google-news-300')
5
6# GloVe options
7glove_50d = api.load('glove-wiki-gigaword-50')      # Small, fast
8glove_100d = api.load('glove-wiki-gigaword-100')    # Balanced
9glove_300d = api.load('glove-wiki-gigaword-300')    # Best quality
10glove_twitter = api.load('glove-twitter-100')       # Social media
11
12# FastText (handles OOV!)
13fasttext = api.load('fasttext-wiki-news-subwords-300')
14
15# Basic usage (same for all)
16similar = model.most_similar('computer', topn=5)
17vector = model['cat']
18similarity = model.similarity('dog', 'cat')

Pro tip: Start with glove-wiki-gigaword-100 for a good balance of speed and quality!

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Real-World Applications 🌟

Search & Information Retrieval:

  • Semantic search
  • Query expansion
  • Document ranking
  • Question answering

Text Classification:

  • Sentiment analysis
  • Spam detection
  • Topic classification
  • Intent detection

Sequence Labeling:

  • Named entity recognition
  • POS tagging
  • Chunking

Recommendation Systems:

  • Product recommendations
  • Content recommendations
  • User similarity

Machine Translation:

  • Word alignment
  • Transfer learning
  • Multilingual models

Creative Applications:

  • Poetry generation
  • Analogy discovery
  • Semantic exploration
  • Visualization

Example: Google Search uses embeddings to understand query intent and match relevant documents!

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Discussion Question 💬

Why do word analogies work so well?

The famous example:

Think about:

  • What does vector subtraction represent linguistically?
  • Why does this capture semantic relationships?
  • What are the limitations of this approach?
  • Does this mean embeddings "understand" gender?
Deeper Question

Are these embeddings truly capturing meaning, or just statistical patterns?

Does the distributional hypothesis have limits?

Reference: Boleda (2020). "Distributional Semantics and Linguistic Theory"

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Summary 🎯

What we learned today:

  1. Word2Vec (2013): Neural revolution in embeddings
    • CBOW: Predict center from context
  • Skip-gram: Predict context from center
  • Negative sampling for efficiency
  1. GloVe (2014): Combining count + prediction
    • Global co-occurrence statistics
  • Factorization objective
  • Ratio of probabilities
  1. FastText (2017): Subword information
    • Character n-grams
  • Handles OOV words
  • Captures morphology
  1. Evaluation: Intrinsic vs. extrinsic
  2. Limitations: Static, biased, no polysemy
  3. Next: Contextual embeddings solve polysemy!
Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Key References 📚

Foundational Papers:

  • Mikolov et al. (2013a). "Efficient Estimation of Word Representations in Vector Space"
  • Mikolov et al. (2013b). "Distributed Representations of Words and Phrases and their Compositionality"
  • Pennington et al. (2014). "GloVe: Global Vectors for Word Representation"
  • Bojanowski et al. (2017). "Enriching Word Vectors with Subword Information"

Bias & Ethics:

  • Bolukbasi et al. (2016). "Man is to Computer Programmer as Woman is to Homemaker?"
  • Caliskan et al. (2017). "Semantics derived automatically from language corpora contain human-like biases"

Theory:

  • Boleda, G. (2020). "Distributional Semantics and Linguistic Theory"
  • Levy & Goldberg (2014). "Neural Word Embedding as Implicit Matrix Factorization"

Tools:

Winter 2026
PSYC 51.07: Models of Language and Communication - Week 3

Questions? 🙋

Next Lecture:

Contextual Embeddings: ELMo, Universal Sentence Encoder, BERT

One word, multiple meanings!

Winter 2026