From 0bb0d0e57a3f93cd533054500c7beb901b21032b Mon Sep 17 00:00:00 2001 From: JuanchiiBM Date: Wed, 17 Jun 2026 21:52:37 -0300 Subject: [PATCH] feat(compression): implement Huffman coding algorithm and visualization - Add Huffman coding algorithm for lossless data compression, including detailed descriptions and steps for visualization. - Introduce new `HuffmanState` type for managing the algorithm's state during visualization. - Update translations to include new algorithm and its description in both English and Spanish. - Modify `package.json` to override Vite version for compatibility. This commit enhances the compression category with a foundational algorithm widely used in data compression. --- package.json | 3 + src/components/ConceptVisualizer.tsx | 744 ++++++++++++++++++++++++--- src/i18n/translations.ts | 58 +++ src/lib/algorithms/compression.ts | 378 ++++++++++++++ src/lib/algorithms/index.ts | 30 +- src/lib/types.ts | 28 + 6 files changed, 1142 insertions(+), 99 deletions(-) create mode 100644 src/lib/algorithms/compression.ts diff --git a/package.json b/package.json index 889dcfd..dcbc27b 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,9 @@ "react-dom": "19.2.7", "tailwindcss": "4.3.1" }, + "overrides": { + "vite": "^7" + }, "devDependencies": { "@stylistic/eslint-plugin": "5.10.0", "@types/react": "19.2.17", diff --git a/src/components/ConceptVisualizer.tsx b/src/components/ConceptVisualizer.tsx index 599298f..13ed3c5 100644 --- a/src/components/ConceptVisualizer.tsx +++ b/src/components/ConceptVisualizer.tsx @@ -11,6 +11,7 @@ import type { MemoTableState, CoinChangeState, BucketsState, + HuffmanState, } from '@lib/types' interface ConceptVisualizerProps { @@ -44,6 +45,8 @@ export default function ConceptVisualizer({ step }: ConceptVisualizerProps) { return case 'buckets': return + case 'huffman': + return default: return null } @@ -112,13 +115,18 @@ function BigOChart({ state }: { state: BigOState }) { style={{ maxHeight: '340px' }} role="img" aria-label={`Big O complexity chart${ - visibleCurves.length > 0 - ? `: ${visibleCurves.map((c) => c.name).join(', ')}` - : '' + visibleCurves.length > 0 ? `: ${visibleCurves.map((c) => c.name).join(', ')}` : '' }`} > {/* Background */} - + {/* Horizontal grid lines */} {Array.from({ length: yTicks + 1 }, (_, i) => { @@ -126,8 +134,22 @@ function BigOChart({ state }: { state: BigOState }) { const val = maxY - (i / yTicks) * maxY return ( - - + + {val < 10 ? val.toFixed(1) : Math.round(val)} @@ -140,8 +162,22 @@ function BigOChart({ state }: { state: BigOState }) { const x = toX(n) return ( - - + + {Math.round(n)} @@ -149,11 +185,30 @@ function BigOChart({ state }: { state: BigOState }) { })} {/* Axes */} - - + + {/* Axis labels */} - + n (input size) {/* Stack label */} -
Call Stack
+
+ Call Stack +
{/* Frames — top of stack (last frame) is rendered first */}
@@ -292,9 +349,10 @@ function CallStackViz({ state }: { state: CallStackState }) { backgroundColor: colors.bg, borderColor: colors.border, color: colors.text, - boxShadow: frame.state === 'active' || frame.state === 'base' - ? `0 0 20px ${colors.border}` - : 'none', + boxShadow: + frame.state === 'active' || frame.state === 'base' + ? `0 0 20px ${colors.border}` + : 'none', }} > {/* Pulse animation for active/base frame */} @@ -307,15 +365,16 @@ function CallStackViz({ state }: { state: CallStackState }) {
{frame.label} - {frame.detail && ( - {frame.detail} - )} + {frame.detail && {frame.detail}}
{/* TOP indicator */} {isTop && ( -
+
← top
)} @@ -366,7 +425,9 @@ function StackViz({ return (
{/* Title */} -
Stack · LIFO
+
+ Stack · LIFO +
{/* Operation badge */} {operation && ( @@ -446,7 +507,9 @@ function QueueViz({ return (
{/* Title */} -
Queue · FIFO
+
+ Queue · FIFO +
{/* Operation badge */} {operation && ( @@ -468,8 +531,12 @@ function QueueViz({
{items.length > 0 && ( <> - front - back + + front + + + back + )}
@@ -515,10 +582,20 @@ function QueueViz({ - +
-
processing direction
+
+ processing direction +
) @@ -541,7 +618,9 @@ function LinkedListViz({ state }: { state: LinkedListState }) { return (
-
Linked List
+
+ Linked List +
{operation && (
@@ -555,9 +634,16 @@ function LinkedListViz({ state }: { state: LinkedListState }) {
{/* HEAD label */}
- head + + head + - +
@@ -579,7 +665,13 @@ function LinkedListViz({ state }: { state: LinkedListState }) { {node.value}
{/* Arrow to next */} - + @@ -620,7 +712,9 @@ function HashTableViz({ state }: { state: HashTableState }) { return (
-
Hash Table
+
+ Hash Table +
{operation && (
@@ -631,7 +725,12 @@ function HashTableViz({ state }: { state: HashTableState }) { {hashingKey != null && (
hash("{hashingKey}") - {hashResult != null && = {hashResult}} + {hashResult != null && ( + + {' '} + = {hashResult} + + )}
)} @@ -674,14 +773,27 @@ function HashTableViz({ state }: { state: HashTableState }) { backgroundColor: colors.bg, borderColor: colors.border, color: colors.text, - boxShadow: entry.state !== 'normal' ? `0 0 12px ${colors.border}` : 'none', + boxShadow: + entry.state !== 'normal' ? `0 0 12px ${colors.border}` : 'none', }} > {entry.key}:{entry.value}
{ei < entries.length - 1 && ( - - + + )} @@ -736,15 +848,16 @@ function BinaryTreeViz({ state }: { state: BinaryTreeState }) { return { x, y } } - const label = treeType === 'heap' - ? `${heapType === 'min' ? 'Min' : 'Max'} Heap` - : 'Binary Search Tree' + const label = + treeType === 'heap' ? `${heapType === 'min' ? 'Min' : 'Max'} Heap` : 'Binary Search Tree' const nonNullNodes = nodes.reduce((acc, n) => acc + (n ? 1 : 0), 0) return (
-
{label}
+
+ {label} +
{operation && (
@@ -824,7 +937,9 @@ function BinaryTreeViz({ state }: { state: BinaryTreeState }) { {/* Heap array view */} {treeType === 'heap' && nonNullNodes > 0 && (
-
array view
+
+ array view +
{nodes.map((node, idx) => { if (!node) return null @@ -869,7 +984,9 @@ function TwoPointersViz({ state }: { state: TwoPointersState }) { return (
-
Two Pointers
+
+ Two Pointers +
{operation && (
@@ -885,8 +1002,12 @@ function TwoPointersViz({ state }: { state: TwoPointersState }) { const isRight = i === right return (
- {isLeft && L ↓} - {isRight && !isLeft && R ↓} + {isLeft && ( + L ↓ + )} + {isRight && !isLeft && ( + R ↓ + )}
) })} @@ -905,7 +1026,8 @@ function TwoPointersViz({ state }: { state: TwoPointersState }) { backgroundColor: colors.bg, borderColor: colors.border, color: colors.text, - boxShadow: hl !== 'default' && hl !== 'checked' ? `0 0 12px ${colors.border}` : 'none', + boxShadow: + hl !== 'default' && hl !== 'checked' ? `0 0 12px ${colors.border}` : 'none', }} > {val} @@ -917,7 +1039,12 @@ function TwoPointersViz({ state }: { state: TwoPointersState }) { {/* Index row */}
{array.map((_, i) => ( -
{i}
+
+ {i} +
))}
@@ -925,8 +1052,16 @@ function TwoPointersViz({ state }: { state: TwoPointersState }) { {/* Sum display */} {sum != null && target != null && (
- arr[{left}] + arr[{right}] = {array[left]} + {array[right]} = {sum} - {sum === target ? ' ✓' : sum < target ? ` < ${target} → move L →` : ` > ${target} → ← move R`} + arr[{left}] + arr[{right}] = {array[left]} +{' '} + {array[right]} ={' '} + + {sum} + + {sum === target + ? ' ✓' + : sum < target + ? ` < ${target} → move L →` + : ` > ${target} → ← move R`}
)}
@@ -952,7 +1087,9 @@ function SlidingWindowViz({ state }: { state: SlidingWindowState }) { return (
-
Sliding Window
+
+ Sliding Window +
{operation && (
@@ -988,14 +1125,21 @@ function SlidingWindowViz({ state }: { state: SlidingWindowState }) { {/* Index row */}
{chars.map((_, i) => ( -
{i}
+
+ {i} +
))}
{/* Window bracket */} {windowEnd >= windowStart && windowEnd >= 0 && (
- window [{windowStart}..{windowEnd}] + + window [{windowStart}..{windowEnd}] + "{windowStr}" len={windowStr.length}
@@ -1005,7 +1149,8 @@ function SlidingWindowViz({ state }: { state: SlidingWindowState }) { {/* Best so far */} {bestStr && (
- best = "{bestStr}" (length {bestStr.length}) + best = "{bestStr}" (length{' '} + {bestStr.length})
)}
@@ -1028,7 +1173,9 @@ function MemoTableViz({ state }: { state: MemoTableState }) { return (
-
Memoization
+
+ Memoization +
{operation && (
@@ -1036,9 +1183,7 @@ function MemoTableViz({ state }: { state: MemoTableState }) {
)} - {currentCall && ( -
{currentCall}
- )} + {currentCall &&
{currentCall}
} {/* Memo table grid */}
@@ -1059,8 +1204,17 @@ function MemoTableViz({ state }: { state: MemoTableState }) { > {entry.value != null ? entry.value : '—'}
-
- {entry.state === 'hit' ? '↑ HIT' : entry.state === 'computing' ? '...' : entry.state === 'cached' ? '✓' : ''} +
+ {entry.state === 'hit' + ? '↑ HIT' + : entry.state === 'computing' + ? '...' + : entry.state === 'cached' + ? '✓' + : ''}
) @@ -1085,12 +1239,16 @@ function MemoTableViz({ state }: { state: MemoTableState }) { function CoinChangeViz({ state }: { state: CoinChangeState }) { const { coins, target, selected, remaining, approach, greedyResult, dpResult, operation } = state - const approachLabel = approach === 'greedy' ? 'Greedy' : approach === 'dp' ? 'Dynamic Programming' : 'Comparison' - const approachColor = approach === 'greedy' ? '#fb923c' : approach === 'dp' ? '#60a5fa' : '#c084fc' + const approachLabel = + approach === 'greedy' ? 'Greedy' : approach === 'dp' ? 'Dynamic Programming' : 'Comparison' + const approachColor = + approach === 'greedy' ? '#fb923c' : approach === 'dp' ? '#60a5fa' : '#c084fc' return (
-
Greedy vs DP
+
+ Greedy vs DP +
{operation && (
@@ -1099,14 +1257,25 @@ function CoinChangeViz({ state }: { state: CoinChangeState }) { )} {/* Approach label */} -
+
{approachLabel}
{/* Target */}
target = {target} - {remaining > 0 && remaining < target && remaining: {remaining}} + {remaining > 0 && remaining < target && ( + + remaining: {remaining} + + )}
{/* Available coins */} @@ -1114,7 +1283,10 @@ function CoinChangeViz({ state }: { state: CoinChangeState }) { coins:
{coins.map((c, i) => ( -
+
{c}
))} @@ -1141,7 +1313,9 @@ function CoinChangeViz({ state }: { state: CoinChangeState }) {
))}
- = {selected.reduce((a, b) => a + b, 0)} ({selected.length} coins) + + = {selected.reduce((a, b) => a + b, 0)} ({selected.length} coins) +
)} @@ -1149,19 +1323,33 @@ function CoinChangeViz({ state }: { state: CoinChangeState }) { {approach === 'compare' && greedyResult && dpResult && (
- Greedy + + Greedy +
{greedyResult.map((c, i) => ( -
{c}
+
+ {c} +
))}
{greedyResult.length} coins
- DP (optimal) + + DP (optimal) +
{dpResult.map((c, i) => ( -
{c}
+
+ {c} +
))}
{dpResult.length} coins ✓ @@ -1208,11 +1396,15 @@ function BucketsViz({ state }: { state: BucketsState }) {
- Current Min + + Current Min + {min ?? '—'}
- Current Max + + Current Max + {max ?? '—'}
@@ -1220,7 +1412,9 @@ function BucketsViz({ state }: { state: BucketsState }) { {/* Bucket Calculation Formula */} {buckets.length > 0 && (
-
Bucket Count Calculation
+
+ Bucket Count Calculation +
floor((max - min) / size) + 1
@@ -1228,7 +1422,8 @@ function BucketsViz({ state }: { state: BucketsState }) {
- {max} - {min} + {max} -{' '} + {min}
{bucketSize}
@@ -1269,10 +1464,7 @@ function BucketsViz({ state }: { state: BucketsState }) { const isProcessing = i === currentElementIndex const isCollected = phase === 'collecting' && i < currentElementIndex! return ( -
+
{ switch (highlight) { - case 'comparing': return { bg: 'rgba(59,130,246,0.3)', border: '#3b82f6', text: '#fff' } - case 'active': return { bg: 'rgba(234,179,8,0.3)', border: '#eab308', text: '#fff' } - case 'current': return { bg: 'rgba(168,85,247,0.3)', border: '#a855f7', text: '#fff' } - case 'found': return { bg: 'rgba(74,222,128,0.2)', border: '#4ade80', text: '#4ade80' } - default: return { bg: 'rgba(38,38,38,1)', border: 'rgba(255,255,255,0.1)', text: '#60a5fa' } + case 'comparing': + return { bg: 'rgba(59,130,246,0.3)', border: '#3b82f6', text: '#fff' } + case 'active': + return { bg: 'rgba(234,179,8,0.3)', border: '#eab308', text: '#fff' } + case 'current': + return { bg: 'rgba(168,85,247,0.3)', border: '#a855f7', text: '#fff' } + case 'found': + return { bg: 'rgba(74,222,128,0.2)', border: '#4ade80', text: '#4ade80' } + default: + return { + bg: 'rgba(38,38,38,1)', + border: 'rgba(255,255,255,0.1)', + text: '#60a5fa', + } } } const styles = getHighlightStyles() @@ -1359,7 +1560,10 @@ function BucketsViz({ state }: { state: BucketsState }) { borderColor: styles.border, borderWidth: '1px', color: styles.text, - animation: phase === 'distributing' && isActive && vIdx === bucket.length - 1 ? 'pop 0.3s ease-out' : 'none', + animation: + phase === 'distributing' && isActive && vIdx === bucket.length - 1 + ? 'pop 0.3s ease-out' + : 'none', transform: highlight ? 'scale(1.05)' : 'none', zIndex: highlight ? 10 : 1, }} @@ -1417,3 +1621,381 @@ function BucketsViz({ state }: { state: BucketsState }) {
) } + +// ════════════════════════════════════════════════════════════════ +// HUFFMAN CODING — Frequency table, tree building & codes +// ════════════════════════════════════════════════════════════════ + +type HuffNode = HuffmanState['nodes'][number] + +const HUFF_COLORS: Record = { + normal: { fill: 'rgba(96,165,250,0.12)', stroke: 'rgba(96,165,250,0.35)', text: '#60a5fa' }, + merging: { fill: 'rgba(251,146,60,0.18)', stroke: 'rgba(251,146,60,0.5)', text: '#fb923c' }, + new: { fill: 'rgba(74,222,128,0.18)', stroke: 'rgba(74,222,128,0.5)', text: '#4ade80' }, + path: { fill: 'rgba(250,204,21,0.15)', stroke: 'rgba(250,204,21,0.45)', text: '#facc15' }, + leafFound: { fill: 'rgba(250,204,21,0.24)', stroke: 'rgba(250,204,21,0.6)', text: '#fde047' }, +} + +function HuffmanViz({ state }: { state: HuffmanState }) { + const { + nodes, + queue, + nodeStates = {}, + codes = [], + text, + highlightChar, + summary, + operation, + phase, + } = state + + // Roots currently visible (priority-queue forest, or the single final root) + const roots = queue.filter((id) => nodes[id] != null) + + // ── Layout: in-order column + depth per node, then map to SVG space ── + const pos: Record = {} + let col = 0 + let maxDepth = 0 + const GAP = 0.7 + const place = (id: number, depth: number): number => { + const n = nodes[id] + if (!n) return col + if (depth > maxDepth) maxDepth = depth + const isLeaf = n.left == null && n.right == null + let x: number + if (isLeaf) { + x = col + col += 1 + } else { + const xs: number[] = [] + if (n.left != null) xs.push(place(n.left, depth + 1)) + if (n.right != null) xs.push(place(n.right, depth + 1)) + x = xs.reduce((a, b) => a + b, 0) / Math.max(xs.length, 1) + } + pos[id] = { col: x, depth } + return x + } + for (const r of roots) { + place(r, 0) + col += GAP + } + + const allCols = Object.values(pos).map((p) => p.col) + const maxCol = allCols.length ? Math.max(...allCols) : 0 + + const W = 620 + const PADX = 36 + const R = 17 + const TOP = 26 + const levelH = 62 + const H = TOP + maxDepth * levelH + R + 22 + + const toX = (c: number) => (maxCol === 0 ? W / 2 : PADX + (c / maxCol) * (W - 2 * PADX)) + const toY = (depth: number) => TOP + depth * levelH + + const stateOf = (id: number): string => nodeStates[id] ?? 'normal' + const onPath = (id: number) => stateOf(id) === 'path' || stateOf(id) === 'leafFound' + + // ── Node table: one row per node (sorted by id), mirrors the tree structure ── + const tableIds = Object.keys(nodes) + .map(Number) + .sort((a, b) => a - b) + const codeByChar: Record = {} + for (const c of codes) codeByChar[c.char] = c.code + const childIdSet = new Set() + for (const id of tableIds) { + const n = nodes[id] + if (n.left != null) childIdSet.add(n.left) + if (n.right != null) childIdSet.add(n.right) + } + const isFinalRoot = (id: number) => + roots.length === 1 && roots[0] === id && (nodes[id].left != null || nodes[id].right != null) + + const statusInfo = (id: number): { label: string; color: string; rowBg: string } => { + const st = stateOf(id) + const n = nodes[id] + const isLeaf = n.left == null && n.right == null + if (st === 'merging') + return { label: 'merging', color: HUFF_COLORS.merging.text, rowBg: 'rgba(251,146,60,0.09)' } + if (st === 'new') + return { label: 'new', color: HUFF_COLORS.new.text, rowBg: 'rgba(74,222,128,0.09)' } + if (st === 'leafFound') + return { label: 'coded', color: HUFF_COLORS.leafFound.text, rowBg: 'rgba(250,204,21,0.12)' } + if (st === 'path') + return { label: 'on path', color: HUFF_COLORS.path.text, rowBg: 'rgba(250,204,21,0.06)' } + if (isFinalRoot(id)) return { label: 'root', color: '#a5b4fc', rowBg: 'transparent' } + if (isLeaf) return { label: 'leaf', color: '#60a5fa', rowBg: 'transparent' } + return { label: 'node', color: '#9ca3af', rowBg: 'transparent' } + } + + return ( +
+
+ Huffman Coding +
+ + {operation && ( +
+ {operation} +
+ )} + + {/* Text strip — visible while counting frequencies */} + {phase === 'frequency' && ( +
+ {[...text].map((ch, i) => { + const active = highlightChar != null && ch === highlightChar + return ( +
+ {ch} +
+ ) + })} +
+ )} + + {/* Tree / priority-queue forest */} + {roots.length > 0 && ( + + {/* Edges with 0 / 1 labels */} + {Object.keys(nodes).map((key) => { + const id = Number(key) + const n = nodes[id] + if (!n || !pos[id]) return null + const p = pos[id] + const children: [number | null, string][] = [ + [n.left, '0'], + [n.right, '1'], + ] + return children.map(([childId, bit]) => { + if (childId == null || !pos[childId]) return null + const c = pos[childId] + const x1 = toX(p.col) + const y1 = toY(p.depth) + const x2 = toX(c.col) + const y2 = toY(c.depth) + const lit = onPath(id) && onPath(childId) + return ( + + + + {bit} + + + ) + }) + })} + + {/* Nodes */} + {Object.keys(nodes).map((key) => { + const id = Number(key) + const n = nodes[id] as HuffNode + if (!n || !pos[id]) return null + const p = pos[id] + const x = toX(p.col) + const y = toY(p.depth) + const st = stateOf(id) + const colors = HUFF_COLORS[st] ?? HUFF_COLORS.normal + const isLeaf = n.left == null && n.right == null + const highlighted = st !== 'normal' + return ( + + {highlighted && ( + + )} + + + {n.freq} + + {isLeaf && n.char != null && ( + + '{n.char}' + + )} + + ) + })} + + )} + + {/* Node table — mirrors the tree: status, char, freq, code & child pointers */} + {tableIds.length > 0 && ( +
+
+ Node table · left = 0, right = 1 +
+ + + + + + + + + + + + + {tableIds.map((id) => { + const n = nodes[id] + const info = statusInfo(id) + const code = n.char != null ? codeByChar[n.char] : undefined + const isLeaf = n.left == null && n.right == null + return ( + + + + + + + + + ) + })} + +
#StatusCharFreqCodePointer
{id} + + + {info.label} + + + {n.char != null ? `'${n.char}'` : '—'} + {n.freq} + {code ?? '—'} + + {isLeaf ? ( + + ) : ( + + + 0→{n.left} + + + 1→{n.right} + + + )} +
+
+ )} + + {/* Compression summary */} + {summary && ( +
+
+
+ + ASCII · 8-bit + + + {summary.originalBits} + + bits +
+
+
+ + Huffman + + + {summary.compressedBits} + + bits +
+
+ + {/* Ratio bar */} +
+
+
+
+
+ ~{summary.avgBits.toFixed(2)} bits/char + {summary.savingPct}% smaller +
+
+ + {/* Encoded bitstream */} +
+ + encoded + +
+ {summary.encoded} +
+
+
+ )} +
+ ) +} diff --git a/src/i18n/translations.ts b/src/i18n/translations.ts index 1e04952..6b57d21 100644 --- a/src/i18n/translations.ts +++ b/src/i18n/translations.ts @@ -147,6 +147,7 @@ export const translations: Record = { Backtracking: 'Backtracking', 'Divide and Conquer': 'Divide and Conquer', Math: 'Math', + Compression: 'Compression', }, algorithmDescriptions: { @@ -1050,6 +1051,34 @@ Properties: - Foundational for number theory and cryptography preprocessing Named after the Greek mathematician Eratosthenes of Cyrene (~276–194 BCE), this sieve remains one of the most efficient ways to find all small primes and is the basis for many factorization preprocessing steps.`, + + 'huffman-coding': `Huffman Coding + +Huffman Coding is a greedy algorithm for lossless data compression. It assigns shorter binary codes to frequent characters and longer codes to rare ones, so the total number of bits needed to store the data shrinks. + +How it works: +1. Count how often each character appears +2. Create a leaf node per character and put them in a min-priority queue +3. Repeatedly remove the two lowest-frequency nodes and merge them under a new parent whose frequency is their sum; push the parent back +4. When one node remains it becomes the tree root +5. Assign codes by walking the tree: left = 0, right = 1 + +Why it works: + No code is a prefix of another (it's a prefix-free code), so the encoded bitstream decodes unambiguously. The greedy merge guarantees an optimal prefix code for the given frequencies. + +Time Complexity: + Best: O(n log n) + Average: O(n log n) + Worst: O(n log n) + +Space Complexity: O(n) + +Properties: + - Lossless: the original data is recovered exactly + - Optimal among prefix codes for a known frequency distribution + - Used in DEFLATE (ZIP, gzip, PNG), JPEG and MP3 + +Invented by David A. Huffman in 1952 while he was a student at MIT, it remains a cornerstone of modern compression.`, }, }, @@ -1120,6 +1149,7 @@ Named after the Greek mathematician Eratosthenes of Cyrene (~276–194 BCE), thi Backtracking: 'Backtracking', 'Divide and Conquer': 'Divide y Vencerás', Math: 'Matemáticas', + Compression: 'Compresión', }, algorithmDescriptions: { @@ -2023,6 +2053,34 @@ Propiedades: - Fundamento para teoría de números y preprocesamiento criptográfico Lleva el nombre del matemático griego Eratóstenes de Cirene (~276–194 a.C.). Esta criba sigue siendo una de las formas más eficientes de encontrar todos los primos pequeños y es la base de muchos pasos de preprocesamiento para factorización.`, + + 'huffman-coding': `Codificación de Huffman + +La Codificación de Huffman es un algoritmo voraz (greedy) para compresión de datos sin pérdida. Asigna códigos binarios más cortos a los caracteres frecuentes y más largos a los raros, reduciendo así la cantidad total de bits necesarios para almacenar los datos. + +Cómo funciona: +1. Contar cuántas veces aparece cada carácter +2. Crear un nodo hoja por carácter y ponerlos en una cola de prioridad mínima +3. Quitar repetidamente los dos nodos de menor frecuencia y fusionarlos bajo un nuevo padre cuya frecuencia es la suma; devolver el padre a la cola +4. Cuando queda un solo nodo, se convierte en la raíz del árbol +5. Asignar códigos recorriendo el árbol: izquierda = 0, derecha = 1 + +Por qué funciona: + Ningún código es prefijo de otro (es un código libre de prefijos), así que el flujo de bits codificado se decodifica sin ambigüedad. La fusión voraz garantiza un código de prefijo óptimo para las frecuencias dadas. + +Complejidad Temporal: + Mejor: O(n log n) + Promedio: O(n log n) + Peor: O(n log n) + +Complejidad Espacial: O(n) + +Propiedades: + - Sin pérdida: los datos originales se recuperan exactamente + - Óptimo entre los códigos de prefijo para una distribución de frecuencias conocida + - Usado en DEFLATE (ZIP, gzip, PNG), JPEG y MP3 + +Inventado por David A. Huffman en 1952 cuando era estudiante en el MIT, sigue siendo un pilar de la compresión moderna.`, }, }, } diff --git a/src/lib/algorithms/compression.ts b/src/lib/algorithms/compression.ts new file mode 100644 index 0000000..30cc013 --- /dev/null +++ b/src/lib/algorithms/compression.ts @@ -0,0 +1,378 @@ +import type { Algorithm, Step, HuffmanState } from '@lib/types' +import { d } from '@lib/algorithms/shared' + +type HNode = HuffmanState['nodes'][number] +type HNodeState = NonNullable[number] + +const huffmanCoding: Algorithm = { + id: 'huffman-coding', + name: 'Huffman Coding', + category: 'Compression', + difficulty: 'advanced', + visualization: 'concept', + code: `function huffmanCoding(text) { + // 1. Count character frequencies + const freq = {}; + for (const ch of text) { + freq[ch] = (freq[ch] || 0) + 1; + } + + // 2. Create a leaf node per character and push + // them all into a min-priority queue + let pq = Object.entries(freq).map( + ([char, f]) => ({ char, freq: f, left: null, right: null }) + ); + + // 3. Build the tree: repeatedly merge the two + // lowest-frequency nodes into a new parent + while (pq.length > 1) { + pq.sort((a, b) => a.freq - b.freq); + const left = pq.shift(); + const right = pq.shift(); + pq.push({ char: null, freq: left.freq + right.freq, left, right }); + } + const root = pq[0]; + + // 4. Walk the tree to assign a binary code to + // each character (left = 0, right = 1) + const codes = {}; + function assign(node, code) { + if (!node.left && !node.right) { + codes[node.char] = code || '0'; + return; + } + assign(node.left, code + '0'); + assign(node.right, code + '1'); + } + assign(root, ''); + + // 5. Encode the text using the generated codes + const encoded = [...text].map((ch) => codes[ch]).join(''); + return { codes, encoded }; +} + +huffmanCoding('ABRACADABRA');`, + description: `Huffman Coding + +Huffman Coding is a greedy algorithm for lossless data compression. It assigns shorter binary codes to frequent characters and longer codes to rare ones, so the total number of bits needed to store the data shrinks. + +How it works: +1. Count how often each character appears +2. Create a leaf node per character and put them in a min-priority queue +3. Repeatedly remove the two lowest-frequency nodes and merge them under a new parent whose frequency is their sum; push the parent back +4. When one node remains it becomes the tree root +5. Assign codes by walking the tree: left = 0, right = 1 + +Why it works: + No code is a prefix of another (it's a prefix-free code), so the encoded bitstream decodes unambiguously. The greedy merge guarantees an optimal prefix code for the given frequencies. + +Time Complexity: + Best: O(n log n) + Average: O(n log n) + Worst: O(n log n) + +Space Complexity: O(n) + +Properties: + - Lossless: the original data is recovered exactly + - Optimal among prefix codes for a known frequency distribution + - Used in DEFLATE (ZIP, gzip, PNG), JPEG and MP3 + +Invented by David A. Huffman in 1952 while he was a student at MIT, it remains a cornerstone of modern compression.`, + + generateSteps(locale = 'en') { + const TEXT = 'ABRACADABRA' + const steps: Step[] = [] + + // ── Model state, mutated as the algorithm runs ── + const nodes: Record = {} + let nextId = 0 + const newNode = ( + char: string | null, + freq: number, + left: number | null, + right: number | null, + ): number => { + const id = nextId++ + nodes[id] = { id, char, freq, left, right } + return id + } + + // Snapshot helper — clones nodes so each step is immutable + const snap = (extra: { + phase: 'frequency' | 'build' | 'encode' | 'done' + queue: number[] + nodeStates?: Record + highlightChar?: string | null + freqTable?: { char: string; freq: number; active?: boolean }[] + codes?: { char: string; code: string; freq: number; active?: boolean }[] + activeCode?: string | null + summary?: { + uniqueChars: number + originalBits: number + compressedBits: number + avgBits: number + savingPct: number + encoded: string + } + operation?: string + }) => ({ + type: 'huffman' as const, + text: TEXT, + nodes: Object.fromEntries(Object.entries(nodes).map(([k, v]) => [k, { ...v }])), + ...extra, + }) + + // ════════════════════════════════════════════ + // Phase 1 — Frequency counting + // ════════════════════════════════════════════ + + // Count frequencies in order of first appearance + const freqMap = new Map() + for (const ch of TEXT) freqMap.set(ch, (freqMap.get(ch) ?? 0) + 1) + const orderedChars = [...freqMap.keys()] + + steps.push({ + concept: snap({ + phase: 'frequency', + queue: [], + freqTable: [], + operation: d(locale, 'Input', 'Entrada'), + }), + description: d( + locale, + `Compress the text "${TEXT}" (${TEXT.length} characters). First, count how often each character appears.`, + `Comprimir el texto "${TEXT}" (${TEXT.length} caracteres). Primero, contar cuántas veces aparece cada carácter.`, + ), + codeLine: 3, + variables: { text: TEXT, length: TEXT.length }, + }) + + // Build a leaf node + freq table row per character, one step each + const queue: number[] = [] + const freqTable: { char: string; freq: number; active?: boolean }[] = [] + + for (const ch of orderedChars) { + const f = freqMap.get(ch)! + const id = newNode(ch, f, null, null) + queue.push(id) + freqTable.push({ char: ch, freq: f }) + + steps.push({ + concept: snap({ + phase: 'frequency', + queue: [...queue], + highlightChar: ch, + freqTable: freqTable.map((r) => ({ ...r, active: r.char === ch })), + nodeStates: { [id]: 'new' }, + operation: d(locale, 'Counting frequencies', 'Contando frecuencias'), + }), + description: d( + locale, + `'${ch}' appears ${f} time${f === 1 ? '' : 's'}. Create a leaf node for it and add it to the priority queue.`, + `'${ch}' aparece ${f} ${f === 1 ? 'vez' : 'veces'}. Crear un nodo hoja y agregarlo a la cola de prioridad.`, + ), + codeLine: 5, + variables: { char: ch, freq: f }, + }) + } + + // Sort the queue by (freq, id) — this is the min-priority ordering used throughout + const sortQueue = (q: number[]) => [...q].sort((a, b) => nodes[a].freq - nodes[b].freq || a - b) + + let pq = sortQueue(queue) + + steps.push({ + concept: snap({ + phase: 'build', + queue: [...pq], + freqTable: freqTable.map((r) => ({ ...r })), + operation: d(locale, 'Priority queue ready', 'Cola de prioridad lista'), + }), + description: d( + locale, + `${pq.length} leaf nodes are now in the priority queue, sorted by frequency (lowest first). Time to build the tree.`, + `Ahora hay ${pq.length} nodos hoja en la cola de prioridad, ordenados por frecuencia (menor primero). Hora de construir el árbol.`, + ), + codeLine: 11, + variables: { queueSize: pq.length }, + }) + + // ════════════════════════════════════════════ + // Phase 2 — Build the tree + // ════════════════════════════════════════════ + + while (pq.length > 1) { + const leftId = pq[0] + const rightId = pq[1] + + // Step A — highlight the two lowest-frequency nodes + steps.push({ + concept: snap({ + phase: 'build', + queue: [...pq], + freqTable: freqTable.map((r) => ({ ...r })), + nodeStates: { [leftId]: 'merging', [rightId]: 'merging' }, + operation: d(locale, 'Pick the two smallest', 'Tomar los dos menores'), + }), + description: d( + locale, + `Remove the two lowest-frequency nodes: ${nodeLabel(nodes, leftId)} and ${nodeLabel(nodes, rightId)} (${nodes[leftId].freq} + ${nodes[rightId].freq} = ${nodes[leftId].freq + nodes[rightId].freq}).`, + `Quitar los dos nodos de menor frecuencia: ${nodeLabel(nodes, leftId)} y ${nodeLabel(nodes, rightId)} (${nodes[leftId].freq} + ${nodes[rightId].freq} = ${nodes[leftId].freq + nodes[rightId].freq}).`, + ), + codeLine: 18, + variables: { + left: nodes[leftId].freq, + right: nodes[rightId].freq, + sum: nodes[leftId].freq + nodes[rightId].freq, + }, + }) + + // Merge into a new parent + const parentFreq = nodes[leftId].freq + nodes[rightId].freq + const parentId = newNode(null, parentFreq, leftId, rightId) + pq = sortQueue([...pq.slice(2), parentId]) + + // Step B — show the new parent in the queue + steps.push({ + concept: snap({ + phase: 'build', + queue: [...pq], + freqTable: freqTable.map((r) => ({ ...r })), + nodeStates: { [parentId]: 'new', [leftId]: 'path', [rightId]: 'path' }, + operation: d(locale, 'Merge into a parent', 'Fusionar en un padre'), + }), + description: d( + locale, + `Create a parent node of frequency ${parentFreq} linking both, then push it back. ${pq.length} node${pq.length === 1 ? '' : 's'} left in the queue.`, + `Crear un nodo padre de frecuencia ${parentFreq} que enlaza a ambos y devolverlo a la cola. Quedan ${pq.length} nodo${pq.length === 1 ? '' : 's'} en la cola.`, + ), + codeLine: 19, + variables: { parentFreq, queueSize: pq.length }, + }) + } + + const rootId = pq[0] + + // ════════════════════════════════════════════ + // Phase 3 — Assign codes (DFS, left = 0, right = 1) + // ════════════════════════════════════════════ + + const codeByChar = new Map() + const orderForCodes: { char: string; code: string; path: number[] }[] = [] + + const assign = (id: number, code: string, path: number[]) => { + const node = nodes[id] + const here = [...path, id] + if (node.left === null && node.right === null) { + const finalCode = code || '0' + codeByChar.set(node.char!, finalCode) + orderForCodes.push({ char: node.char!, code: finalCode, path: here }) + return + } + if (node.left !== null) assign(node.left, code + '0', here) + if (node.right !== null) assign(node.right, code + '1', here) + } + assign(rootId, '', []) + + steps.push({ + concept: snap({ + phase: 'encode', + queue: [rootId], + freqTable: freqTable.map((r) => ({ ...r })), + codes: [], + operation: d(locale, 'Assign codes', 'Asignar códigos'), + }), + description: d( + locale, + 'The tree is complete. Walk it from the root: every left branch adds a 0, every right branch adds a 1. The code for a character is the path to its leaf.', + 'El árbol está completo. Recorrerlo desde la raíz: cada rama izquierda agrega un 0, cada rama derecha un 1. El código de un carácter es el camino hasta su hoja.', + ), + codeLine: 26, + variables: { root: nodes[rootId].freq }, + }) + + const codesAcc: { char: string; code: string; freq: number; active?: boolean }[] = [] + for (const entry of orderForCodes) { + const nodeStates: Record = {} + for (const pid of entry.path) nodeStates[pid] = 'path' + nodeStates[entry.path[entry.path.length - 1]] = 'leafFound' + + codesAcc.push({ char: entry.char, code: entry.code, freq: freqMap.get(entry.char)! }) + + steps.push({ + concept: snap({ + phase: 'encode', + queue: [rootId], + freqTable: freqTable.map((r) => ({ ...r, active: r.char === entry.char })), + codes: codesAcc.map((c) => ({ ...c, active: c.char === entry.char })), + nodeStates, + activeCode: entry.code, + operation: d(locale, 'Tracing path', 'Trazando camino'), + }), + description: d( + locale, + `'${entry.char}' is reached by the path "${entry.code}", so its Huffman code is ${entry.code} (${entry.code.length} bit${entry.code.length === 1 ? '' : 's'}).`, + `Se llega a '${entry.char}' por el camino "${entry.code}", así que su código de Huffman es ${entry.code} (${entry.code.length} bit${entry.code.length === 1 ? '' : 's'}).`, + ), + codeLine: 28, + variables: { char: entry.char, code: entry.code, bits: entry.code.length }, + }) + } + + // ════════════════════════════════════════════ + // Phase 4 — Encode & summary + // ════════════════════════════════════════════ + + const encoded = [...TEXT].map((ch) => codeByChar.get(ch)!).join('') + const uniqueChars = orderedChars.length + const fixedWidth = Math.max(1, Math.ceil(Math.log2(uniqueChars))) + const originalBits = TEXT.length * 8 // ASCII baseline + const compressedBits = encoded.length + const avgBits = compressedBits / TEXT.length + const savingPct = Math.round((1 - compressedBits / originalBits) * 100) + + steps.push({ + concept: snap({ + phase: 'done', + queue: [rootId], + freqTable: freqTable.map((r) => ({ ...r })), + codes: codesAcc.map((c) => ({ ...c })), + summary: { + uniqueChars, + originalBits, + compressedBits, + avgBits, + savingPct, + encoded, + }, + operation: d(locale, 'Done', 'Listo'), + }), + description: d( + locale, + `Encoding "${TEXT}" takes ${compressedBits} bits with Huffman vs ${originalBits} bits as 8-bit ASCII — about ${savingPct}% smaller (~${avgBits.toFixed(2)} bits/char instead of a fixed ${fixedWidth}).`, + `Codificar "${TEXT}" usa ${compressedBits} bits con Huffman frente a ${originalBits} bits en ASCII de 8 bits — alrededor de ${savingPct}% más chico (~${avgBits.toFixed(2)} bits/carácter en vez de ${fixedWidth} fijos).`, + ), + codeLine: 38, + variables: { + originalBits, + compressedBits, + saving: `${savingPct}%`, + }, + consoleOutput: [ + `encoded: ${encoded}`, + `${originalBits} bits → ${compressedBits} bits (${savingPct}% smaller)`, + ], + }) + + return steps + }, +} + +/** Short label for a node: its char (leaf) or its frequency sum (internal) */ +function nodeLabel(nodes: Record, id: number): string { + const n = nodes[id] + return n.char !== null ? `'${n.char}'` : `(${n.freq})` +} + +export { huffmanCoding } diff --git a/src/lib/algorithms/index.ts b/src/lib/algorithms/index.ts index 0985581..1c6f9f7 100644 --- a/src/lib/algorithms/index.ts +++ b/src/lib/algorithms/index.ts @@ -39,30 +39,18 @@ import { interpolationSearch, } from '@lib/algorithms/searching' -import { - bfs, - dfs, - dijkstra, - prim, - topologicalSort, -} from '@lib/algorithms/graphs' +import { bfs, dfs, dijkstra, prim, topologicalSort } from '@lib/algorithms/graphs' -import { - fibonacciDp, - knapsack, - lcs, -} from '@lib/algorithms/dynamic-programming' +import { fibonacciDp, knapsack, lcs } from '@lib/algorithms/dynamic-programming' -import { - nQueens, - sudokuSolver, - mazePathfinding, -} from '@lib/algorithms/backtracking' +import { nQueens, sudokuSolver, mazePathfinding } from '@lib/algorithms/backtracking' import { towerOfHanoi } from '@lib/algorithms/divide-and-conquer' import { sieveOfEratosthenes } from '@lib/algorithms/math' +import { huffmanCoding } from '@lib/algorithms/compression' + export const algorithms: Algorithm[] = [ // Concepts bigONotation, @@ -113,11 +101,16 @@ export const algorithms: Algorithm[] = [ towerOfHanoi, // Math sieveOfEratosthenes, + // Compression + huffmanCoding, ] export const categories: Category[] = [ { name: 'Concepts', algorithms: algorithms.filter((a) => a.category === 'Concepts') }, - { name: 'Data Structures', algorithms: algorithms.filter((a) => a.category === 'Data Structures') }, + { + name: 'Data Structures', + algorithms: algorithms.filter((a) => a.category === 'Data Structures'), + }, { name: 'Sorting', algorithms: algorithms.filter((a) => a.category === 'Sorting') }, { name: 'Searching', algorithms: algorithms.filter((a) => a.category === 'Searching') }, { name: 'Graphs', algorithms: algorithms.filter((a) => a.category === 'Graphs') }, @@ -131,4 +124,5 @@ export const categories: Category[] = [ algorithms: algorithms.filter((a) => a.category === 'Divide and Conquer'), }, { name: 'Math', algorithms: algorithms.filter((a) => a.category === 'Math') }, + { name: 'Compression', algorithms: algorithms.filter((a) => a.category === 'Compression') }, ] diff --git a/src/lib/types.ts b/src/lib/types.ts index f9054d1..bd982a6 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -194,6 +194,33 @@ export interface BucketsState { operation?: string } +// ── Compression visualization types ── + +export interface HuffmanState { + type: 'huffman' + phase: 'frequency' | 'build' | 'encode' | 'done' + text: string + highlightChar?: string | null + nodes: Record< + number, + { id: number; char: string | null; freq: number; left: number | null; right: number | null } + > + queue: number[] + nodeStates?: Record + freqTable?: { char: string; freq: number; active?: boolean }[] + codes?: { char: string; code: string; freq: number; active?: boolean }[] + activeCode?: string | null + summary?: { + uniqueChars: number + originalBits: number + compressedBits: number + avgBits: number + savingPct: number + encoded: string + } + operation?: string +} + export type ConceptState = | BigOState | CallStackState @@ -206,6 +233,7 @@ export type ConceptState = | MemoTableState | CoinChangeState | BucketsState + | HuffmanState export interface Step { array?: number[]