diff --git a/src/main/java/com/thealgorithms/others/Huffman.java b/src/main/java/com/thealgorithms/others/Huffman.java index 4fdee5d5e70e..22e75da502b5 100644 --- a/src/main/java/com/thealgorithms/others/Huffman.java +++ b/src/main/java/com/thealgorithms/others/Huffman.java @@ -1,125 +1,211 @@ package com.thealgorithms.others; import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; import java.util.PriorityQueue; -import java.util.Scanner; -// node class is the basic structure -// of each node present in the Huffman - tree. +/** + * Node class representing a node in the Huffman tree. + * Each node contains a character, its frequency, and references to left and + * right children. + */ class HuffmanNode { - int data; char c; - HuffmanNode left; HuffmanNode right; -} -// comparator class helps to compare the node -// on the basis of one of its attribute. -// Here we will be compared -// on the basis of data values of the nodes. -class MyComparator implements Comparator { + /** + * Constructor for HuffmanNode. + * + * @param c the character stored in this node + * @param data the frequency of the character + */ + HuffmanNode(char c, int data) { + this.c = c; + this.data = data; + this.left = null; + this.right = null; + } + + /** + * Default constructor for HuffmanNode. + */ + HuffmanNode() { + this.left = null; + this.right = null; + } +} +/** + * Comparator class for comparing HuffmanNode objects based on their frequency + * data. + * Used to maintain min-heap property in the priority queue. + */ +class HuffmanComparator implements Comparator { + @Override public int compare(HuffmanNode x, HuffmanNode y) { - return x.data - y.data; + return Integer.compare(x.data, y.data); } } +/** + * Implementation of Huffman Coding algorithm for data compression. + * Huffman Coding is a greedy algorithm that assigns variable-length codes to + * characters + * based on their frequency of occurrence. Characters with higher frequency get + * shorter codes. + * + *

+ * Time Complexity: O(n log n) where n is the number of unique characters + * Space Complexity: O(n) + * + * @see Huffman + * Coding + */ public final class Huffman { private Huffman() { } - // recursive function to print the - // huffman-code through the tree traversal. - // Here s is the huffman - code generated. - public static void printCode(HuffmanNode root, String s) { - // base case; if the left and right are null - // then its a leaf node and we print - // the code s generated by traversing the tree. - if (root.left == null && root.right == null && Character.isLetter(root.c)) { - // c is the character in the node - System.out.println(root.c + ":" + s); - - return; + /** + * Builds a Huffman tree from the given character array and their frequencies. + * + * @param charArray array of characters + * @param charFreq array of frequencies corresponding to the characters + * @return root node of the Huffman tree + * @throws IllegalArgumentException if arrays are null, empty, or have different + * lengths + */ + public static HuffmanNode buildHuffmanTree(char[] charArray, int[] charFreq) { + if (charArray == null || charFreq == null) { + throw new IllegalArgumentException("Character array and frequency array cannot be null"); + } + if (charArray.length == 0 || charFreq.length == 0) { + throw new IllegalArgumentException("Character array and frequency array cannot be empty"); + } + if (charArray.length != charFreq.length) { + throw new IllegalArgumentException("Character array and frequency array must have the same length"); } - // if we go to left then add "0" to the code. - // if we go to the right add"1" to the code. - // recursive calls for left and - // right sub-tree of the generated tree. - printCode(root.left, s + "0"); - printCode(root.right, s + "1"); - } + int n = charArray.length; + PriorityQueue priorityQueue = new PriorityQueue<>(n, new HuffmanComparator()); - // main function - public static void main(String[] args) { - Scanner s = new Scanner(System.in); + // Create leaf nodes and add to priority queue + for (int i = 0; i < n; i++) { + if (charFreq[i] < 0) { + throw new IllegalArgumentException("Frequencies must be non-negative"); + } + HuffmanNode node = new HuffmanNode(charArray[i], charFreq[i]); + priorityQueue.add(node); + } - // number of characters. - int n = 6; - char[] charArray = {'a', 'b', 'c', 'd', 'e', 'f'}; - int[] charfreq = {5, 9, 12, 13, 16, 45}; + // Build the Huffman tree + while (priorityQueue.size() > 1) { + HuffmanNode left = priorityQueue.poll(); + HuffmanNode right = priorityQueue.poll(); - // creating a priority queue q. - // makes a min-priority queue(min-heap). - PriorityQueue q = new PriorityQueue(n, new MyComparator()); + HuffmanNode parent = new HuffmanNode(); + parent.data = left.data + right.data; + parent.c = '-'; + parent.left = left; + parent.right = right; - for (int i = 0; i < n; i++) { - // creating a Huffman node object - // and add it to the priority queue. - HuffmanNode hn = new HuffmanNode(); + priorityQueue.add(parent); + } - hn.c = charArray[i]; - hn.data = charfreq[i]; + return priorityQueue.poll(); + } - hn.left = null; - hn.right = null; + /** + * Generates Huffman codes for all characters in the tree. + * + * @param root root node of the Huffman tree + * @return map of characters to their Huffman codes + */ + public static Map generateCodes(HuffmanNode root) { + Map huffmanCodes = new HashMap<>(); + if (root != null) { + generateCodesHelper(root, "", huffmanCodes); + } + return huffmanCodes; + } - // add functions adds - // the huffman node to the queue. - q.add(hn); + /** + * Helper method to recursively generate Huffman codes by traversing the tree. + * + * @param node current node in the tree + * @param code current code being built + * @param huffmanCodes map to store character-to-code mappings + */ + private static void generateCodesHelper(HuffmanNode node, String code, Map huffmanCodes) { + if (node == null) { + return; } - // create a root node - HuffmanNode root = null; + // If it's a leaf node, store the code + if (node.left == null && node.right == null && Character.isLetter(node.c)) { + huffmanCodes.put(node.c, code.isEmpty() ? "0" : code); + return; + } - // Here we will extract the two minimum value - // from the heap each time until - // its size reduces to 1, extract until - // all the nodes are extracted. - while (q.size() > 1) { - // first min extract. - HuffmanNode x = q.peek(); - q.poll(); + // Traverse left with '0' and right with '1' + if (node.left != null) { + generateCodesHelper(node.left, code + "0", huffmanCodes); + } + if (node.right != null) { + generateCodesHelper(node.right, code + "1", huffmanCodes); + } + } - // second min extarct. - HuffmanNode y = q.peek(); - q.poll(); + /** + * Prints Huffman codes for all characters in the tree. + * This method is kept for backward compatibility and demonstration purposes. + * + * @param root root node of the Huffman tree + * @param code current code being built (initially empty string) + */ + public static void printCode(HuffmanNode root, String code) { + if (root == null) { + return; + } - // new node f which is equal - HuffmanNode f = new HuffmanNode(); + // If it's a leaf node, print the code + if (root.left == null && root.right == null && Character.isLetter(root.c)) { + System.out.println(root.c + ":" + code); + return; + } - // to the sum of the frequency of the two nodes - // assigning values to the f node. - f.data = x.data + y.data; - f.c = '-'; + // Traverse left with '0' and right with '1' + if (root.left != null) { + printCode(root.left, code + "0"); + } + if (root.right != null) { + printCode(root.right, code + "1"); + } + } - // first extracted node as left child. - f.left = x; + /** + * Demonstrates the Huffman coding algorithm with sample data. + * + * @param args command line arguments (not used) + */ + public static void main(String[] args) { + // Sample characters and their frequencies + char[] charArray = {'a', 'b', 'c', 'd', 'e', 'f'}; + int[] charFreq = {5, 9, 12, 13, 16, 45}; - // second extracted node as the right child. - f.right = y; + System.out.println("Characters: a, b, c, d, e, f"); + System.out.println("Frequencies: 5, 9, 12, 13, 16, 45"); + System.out.println("\nHuffman Codes:"); - // marking the f node as the root node. - root = f; + // Build Huffman tree + HuffmanNode root = buildHuffmanTree(charArray, charFreq); - // add this node to the priority-queue. - q.add(f); + // Generate and print Huffman codes + Map codes = generateCodes(root); + for (Map.Entry entry : codes.entrySet()) { + System.out.println(entry.getKey() + ": " + entry.getValue()); } - - // print the codes by traversing the tree - printCode(root, ""); - s.close(); } } diff --git a/src/test/java/com/thealgorithms/others/HuffmanTest.java b/src/test/java/com/thealgorithms/others/HuffmanTest.java new file mode 100644 index 000000000000..aa16f6493506 --- /dev/null +++ b/src/test/java/com/thealgorithms/others/HuffmanTest.java @@ -0,0 +1,223 @@ +package com.thealgorithms.others; + +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test class for Huffman coding algorithm. + * Tests various scenarios including normal cases, edge cases, and error + * conditions. + */ +class HuffmanTest { + + @Test + void testBuildHuffmanTreeWithBasicInput() { + char[] charArray = {'a', 'b', 'c', 'd', 'e', 'f'}; + int[] charFreq = {5, 9, 12, 13, 16, 45}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + + Assertions.assertNotNull(root); + Assertions.assertEquals(100, root.data); // Total frequency + } + + @Test + void testGenerateCodesWithBasicInput() { + char[] charArray = {'a', 'b', 'c', 'd', 'e', 'f'}; + int[] charFreq = {5, 9, 12, 13, 16, 45}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(6, codes.size()); + + // Verify that all characters have codes + for (char c : charArray) { + Assertions.assertTrue(codes.containsKey(c), "Missing code for character: " + c); + Assertions.assertNotNull(codes.get(c), "Null code for character: " + c); + } + + // Verify that higher frequency characters have shorter codes + // 'f' has the highest frequency (45), so it should have one of the shortest + // codes + Assertions.assertTrue(codes.get('f').length() <= codes.get('a').length()); + } + + @Test + void testSingleCharacter() { + char[] charArray = {'a'}; + int[] charFreq = {10}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(1, codes.size()); + Assertions.assertEquals("0", codes.get('a')); // Single character gets code "0" + } + + @Test + void testTwoCharacters() { + char[] charArray = {'a', 'b'}; + int[] charFreq = {3, 7}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(2, codes.size()); + + // Verify both characters have codes + Assertions.assertTrue(codes.containsKey('a')); + Assertions.assertTrue(codes.containsKey('b')); + + // Verify codes are different + Assertions.assertNotEquals(codes.get('a'), codes.get('b')); + } + + @Test + void testEqualFrequencies() { + char[] charArray = {'a', 'b', 'c'}; + int[] charFreq = {5, 5, 5}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(3, codes.size()); + + // Verify all characters have codes + for (char c : charArray) { + Assertions.assertTrue(codes.containsKey(c)); + } + } + + @Test + void testLargeFrequencyDifference() { + char[] charArray = {'a', 'b', 'c'}; + int[] charFreq = {1, 10, 100}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(3, codes.size()); + + // Character 'c' with highest frequency should have shortest code + Assertions.assertTrue(codes.get('c').length() <= codes.get('b').length()); + Assertions.assertTrue(codes.get('c').length() <= codes.get('a').length()); + } + + @Test + void testNullCharacterArray() { + int[] charFreq = {5, 9, 12}; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { Huffman.buildHuffmanTree(null, charFreq); }); + } + + @Test + void testNullFrequencyArray() { + char[] charArray = {'a', 'b', 'c'}; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { Huffman.buildHuffmanTree(charArray, null); }); + } + + @Test + void testEmptyArrays() { + char[] charArray = {}; + int[] charFreq = {}; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { Huffman.buildHuffmanTree(charArray, charFreq); }); + } + + @Test + void testMismatchedArrayLengths() { + char[] charArray = {'a', 'b', 'c'}; + int[] charFreq = {5, 9}; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { Huffman.buildHuffmanTree(charArray, charFreq); }); + } + + @Test + void testNegativeFrequency() { + char[] charArray = {'a', 'b', 'c'}; + int[] charFreq = {5, -9, 12}; + + Assertions.assertThrows(IllegalArgumentException.class, () -> { Huffman.buildHuffmanTree(charArray, charFreq); }); + } + + @Test + void testZeroFrequency() { + char[] charArray = {'a', 'b', 'c'}; + int[] charFreq = {0, 5, 10}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(3, codes.size()); + Assertions.assertTrue(codes.containsKey('a')); // Even with 0 frequency, character should have a code + } + + @Test + void testGenerateCodesWithNullRoot() { + Map codes = Huffman.generateCodes(null); + + Assertions.assertNotNull(codes); + Assertions.assertTrue(codes.isEmpty()); + } + + @Test + void testPrefixProperty() { + // Verify that no code is a prefix of another (Huffman property) + char[] charArray = {'a', 'b', 'c', 'd', 'e'}; + int[] charFreq = {5, 9, 12, 13, 16}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + // Check that no code is a prefix of another + for (Map.Entry entry1 : codes.entrySet()) { + for (Map.Entry entry2 : codes.entrySet()) { + if (!entry1.getKey().equals(entry2.getKey())) { + String code1 = entry1.getValue(); + String code2 = entry2.getValue(); + Assertions.assertTrue(!code1.startsWith(code2) && !code2.startsWith(code1), "Code " + code1 + " is a prefix of " + code2); + } + } + } + } + + @Test + void testBinaryCodesOnly() { + // Verify that all codes contain only '0' and '1' + char[] charArray = {'a', 'b', 'c', 'd'}; + int[] charFreq = {1, 2, 3, 4}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + for (String code : codes.values()) { + Assertions.assertTrue(code.matches("[01]+"), "Code contains non-binary characters: " + code); + } + } + + @Test + void testMultipleCharactersWithLargeAlphabet() { + char[] charArray = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'}; + int[] charFreq = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29}; + + HuffmanNode root = Huffman.buildHuffmanTree(charArray, charFreq); + Map codes = Huffman.generateCodes(root); + + Assertions.assertNotNull(codes); + Assertions.assertEquals(10, codes.size()); + + // Verify all characters have codes + for (char c : charArray) { + Assertions.assertTrue(codes.containsKey(c)); + } + } +}