Week 10 - Greedy Algorithm 4
Week 10 - Greedy Algorithm 4
Huffman Algorithm
Huffman algorithm uses a binary tree to compress data.
It is called the Huffman code, after David Huffman who
discovered it in 1952.
Data compression is important in many situations. An
example is sending data over the Internet, where
especially over a dial-up connection, transmission can
take a long time.
Each character in a normal uncompressed text file is
represented in computer by one byte (for the ASCII Code)
or by two bytes (for Unicode). In these schemes, every
character requires the same number of bits.
There are several approaches to compressing data.
For text, the most common approach is to reduce the
number of bits that represent the most-used characters.
In this approach we must be careful that no character is
represented by the same bit combination that appears at
the beginning of a longer code used for some other
character.
For example, if E is 01, and X is 01011000, then anyone
decoding 01011000 would not know if the initial 01
represented an E or the beginning of an X.
This leads to a rule:
No code can be the prefix of any other code.
For each message, we make up a new code
tailored to that particular message.
Let us assume an alphabet containing the
letters "a", "b", "c", "d" and "e" which
are the names of leaves shown in the
following table, with their corresponding
frequencies:
Huffman Algorithm
import java.io.*;
import java.util.Scanner;
public class testBut {
public static void main(String[] args) {
Scanner input = new Scanner(System.in);
System.out.print("Enter a text: ");
String text = input.nextLine();
int[] counts = getCharacterFrequency(text);
System.out.printf("%-15s%-15s%-15s%-15s\n",
"ASCII Code", "Character", "Frequency", "Code");
Tree tree = getHuffmanTree(counts);
String[] codes = getCode(tree.root);
for (int i = 0; i < codes.length; i++)
if (counts[i] != 0)
System.out.printf("%-15d%-15s%-15d%-15s\n",
i, (char)i + "", counts[i], codes[i]);
}
public static String[] getCode(Tree.Node root) {
if (root == null) return null;
String[] codes = new String[2 * 128];
assignCode(root, codes);
return codes;}
private static void assignCode(Tree.Node root, String[] codes) {
if (root.left != null) {
root.left.code = root.code + "0";
assignCode(root.left, codes);
root.right.code = root.code + "1";
assignCode(root.right, codes);
}
else {
codes[(int)root.element] = root.code;
}
}
public static Tree getHuffmanTree(int[] counts) {
Heap<Tree> heap = new Heap<Tree>();
for (int i = 0; i < counts.length; i++) {
if (counts[i] > 0)
heap.add(new Tree(counts[i], (char)i));
}
while (heap.getSize() > 1) {
Tree t1 = heap.remove();
Tree t2 = heap.remove();
heap.add(new Tree(t1, t2));
}
return heap.remove();
}
public static int[] getCharacterFrequency(String text) {
int[] counts = new int[256];
for (int i = 0; i < text.length(); i++)
counts[(int)text.charAt(i)]++;
return counts;
}
public static class Tree implements Comparable<Tree> {
Node root;
public Tree(Tree t1, Tree t2) {
root = new Node();
root.left = t1.root;
root.right = t2.root;
root.weight = t1.root.weight + t2.root.weight;
}
public Tree(int weight, char element) {
root = new Node(weight, element);
}
public int compareTo(Tree o) {
if (root.weight < o.root.weight)
return 1;
else if (root.weight == o.root.weight)
return 0;
else
return -1;
}
public class Node {
char element;
int weight;
Node left;
Node right;
String code = "";
public Node() {
}
public Node(int weight, char element) {
this.weight = weight;
this.element = element;
}}}}
public class Heap<E extends Comparable> {
private java.util.ArrayList<E> list = new java.util.ArrayList<E>();
public Heap(){
}
public Heap(E[] objects){
for (int i = 0; i < objects.length; i++)
add(objects[i]);
}
public void add(E newObject){
list.add(newObject);
int currentIndex = list.size() - 1;
while (currentIndex > 0) {
int parentIndex = (currentIndex - 1) / 2;
if (list.get(currentIndex).compareTo(
list.get(parentIndex)) > 0) {
E temp = list.get(currentIndex);
list.set(currentIndex, list.get(parentIndex));
list.set(parentIndex, temp);
}
else
break;
currentIndex = parentIndex;
}}
public E remove(){
if (list.size() == 0) return null;
E removedObject = list.get(0);
list.set(0, list.get(list.size() - 1));
list.remove(list.size() - 1);
int currentIndex = 0;
while (currentIndex < list.size()) {
int leftChildIndex = 2 * currentIndex + 1;
int rightChildIndex = 2 * currentIndex + 2;
if (leftChildIndex >= list.size()) break;
int maxIndex = leftChildIndex;
if (rightChildIndex < list.size()) {
if (list.get(maxIndex).compareTo(
list.get(rightChildIndex)) < 0) {
maxIndex = rightChildIndex;
}
}
if (list.get(currentIndex).compareTo(
list.get(maxIndex)) < 0) {
E temp = list.get(maxIndex);
list.set(maxIndex, list.get(currentIndex));
list.set(currentIndex, temp);
currentIndex = maxIndex;
}
else
break;
}
return removedObject;
}
public int getSize(){
return list.size();
}}
End of Chapter 4
Any Question?