0% found this document useful (0 votes)
11 views13 pages

Message

Uploaded by

fardin.iqbal
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views13 pages

Message

Uploaded by

fardin.iqbal
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 13

#include <stdio.

h>
#include <stdlib.h>

#include "global.h"
#include "huff.h"
#include "debug.h"

//helper method to get a sequence of n bits where 0 = leaf and 1 = internal node
//since we will be recursively calling, we will use a pointer to out and count
//to keep track of the actual value
static void postorder(NODE* root, unsigned char* out, int* count) {
//in postorder traversal, we do left, right, then root
if(root->left != NULL)
postorder(root->left, out, count);

if(root->right != NULL)
postorder(root->right, out, count);

//if leaf, shift char left 1 and add 0, then increment count
if(root->left == NULL && root->right == NULL) {
*out = *out << 1;
*out += 0;
(*count)++;
}
//else, shift char left 1 and add 1, then increment count
else {
*out = *out << 1;
*out += 1;
(*count)++;
}

//if count = 8, out is filled up so we output it and reset


if(*count == 8) {
fputc(*out, stdout);
//fflush(stdout);
*out = 0;
*count = 0;
}
return;
}

//helper method to postorder the tree and output the symbols left to right
static void symboltraversal(NODE* root) {
//recursive call on left and right subtrees
if(root->left != NULL)
symboltraversal(root->left);

if(root->right != NULL)
symboltraversal(root->right);

//if leaf, we output its symbol


if(root->left == NULL && root->right == NULL) {
//if symbol is 256, it is end block so output FF 00
if(root->symbol == 256) {
fputc(0xFF, stdout);
fputc(0x00, stdout);
//fflush(stdout);
}
//if symbol is 0xFF, we out put 0xFF and any byte other than 0x00. I will
//output 0xFF twice
else if(root->symbol == 255) {
fputc(0xFF, stdout);
fputc(0xFF, stdout);
//fflush(stdout);
}
//else we output its symbol normally
else {
fputc(root->symbol, stdout);
//fflush(stdout);
}
}
return;
}

/**
* @brief Emits a description of the Huffman tree used to compress the current
block.
* @details This function emits, to the standard output, a description of the
* Huffman tree used to compress the current block. Refer to the assignment
handout
* for a detailed specification of the format of this description.
*/
void emit_huffman_tree() {
//output the number of nodes, given as a 2 byte sequence in big-endian order
unsigned char msb;
unsigned char lsb;
msb = (num_nodes >> 8) & 0xFF;
lsb = num_nodes & 0xFF;
fputc(msb, stdout);
fputc(lsb, stdout);
//fflush(stdout);

//a sequence of n bits from a postorder traversal (0 = leaf, 1 = internal node)


unsigned char out = 0;
int count = 0;
postorder(nodes, &out, &count);
//after finishing the recursive call, we pad out with 0s if count not 0
if(count != 0) {
out = out << (8 - count);
fputc(out, stdout);
//fflush(stdout);
}

//a sequence of bytes that gives the values of the symbols at the (n+1)/2
//leaves of the tree. each byte is 1 symbol except FF 00 which is end block
symboltraversal((nodes+0));

return;
}

//helper method to traverse tree and set its node's parents


static void set_parents(NODE* root) {
//if left is not null, set left's parent as root and recursive call set_parents
if(root->left != NULL) {
root->left->parent = root;
set_parents(root->left);
}
//if right is not null, set right's parent as root and recursive call
set_parents

if(root->right != NULL) {
root->right->parent = root;
set_parents(root->right);
}
return;
}

//helper method to traverse tree and set node_for_symbol with its leaves in
postorder
static void postordertrackleaf(NODE* root, int* add) {
//postorder traversal goes left, right, then root
if(root->left != NULL)
postordertrackleaf(root->left, add);
if(root->right != NULL)
postordertrackleaf(root->right, add);

//if we are at leaf we put it into the node_for_symbol array


if(root->left == NULL && root->right == NULL) {
*(node_for_symbol+*add) = root;
(*add)++;
}
}

//helper method to find a node for a symbol


static NODE *findnode(int c) {
for(int i = 0; i < num_nodes; i++) {
if((nodes+i)->symbol == c)
return nodes+i;
}
return NULL;
}

/**
* @brief Reads a description of a Huffman tree and reconstructs the tree from
* the description.
* @details This function reads, from the standard input, the description of a
* Huffman tree in the format produced by emit_huffman_tree(), and it reconstructs
* the tree from the description. Refer to the assignment handout for a
specification
* of the format for this description, and a discussion of how the tree can be
* reconstructed from it.
*
* @return 0 if the tree is read and reconstructed without error, 1 if EOF is
* encountered at the start of a block, otherwise -1 if an error occurs.
*/
int read_huffman_tree() {
//if EOF at beginning of block return error
if(feof(stdin))
return -1;
//reading 2 bytes first
unsigned char* currblockptr = current_block;
for(int i = 0; i < 2; i++) {
*currblockptr++ = fgetc(stdin);
//if end of file or error, return -1
if(feof(stdin) || ferror(stdin))
return -1;
}
//the first 2 bytes read will be num_nodes
num_nodes = *current_block << 8;
num_nodes += *(current_block+1);

int n = 0;
//the number of bytes representing the tree
if(num_nodes % 8 == 0)
n = num_nodes/8;
else
n = num_nodes/8 + 1;
//read the bytes representing the tree
for(int i = 0; i < n; i++) {
*currblockptr++ = fgetc(stdin);
//if end of file or error, return -1
if(feof(stdin) || ferror(stdin))
return -1;
}

//the next num_nodes/8 bytes will be the postorder traversal bit sequence
int position = num_nodes - 1;
NODE* stackptr = nodes;
int last = num_nodes % 8;
currblockptr = (current_block+2);
for(int i = 0; i < n; i++) {
unsigned char a = *currblockptr++;
//inner loop to use all 8 bits of the byte
for(int count = 0; count < 8; count++) {
//if this is the last byte and count = remainder of num_nodes%8
//then break as the rest of the byte is padding
if((i == n - 1) && count == last)
break;

//if the bit is a 0 (leaf), create a new node and push onto stack
if((a & 0x80) == 0) {
NODE q;
q.left = NULL;
q.right = NULL;
q.parent = NULL;
*stackptr++ = q;
}
//else the bit is a 1 (internal node), pop 2 nodes and place them at
//the back of the nodes array, and create a new node with those
//nodes as its children
else {
stackptr--;
NODE R = *stackptr;
stackptr--;
NODE L = *stackptr;
//we place them at the back of the nodes array
*(nodes+position) = R;
position--;
*(nodes+position) = L;
position--;

NODE P;
P.left = (nodes+position+1);
P.right = (nodes+position+2);
P.parent = NULL;
*stackptr++ = P;
}
a = a << 1;
}
}

//we traverse the tree post order and put leaf pointers in the node_for_symbol
array
int zzz = 0;
postordertrackleaf(nodes, &zzz);
//we are uncertain of the bytes representing leaves because end block and 0xFF
//both are represented by 2 bytes. Thus we fgetc as we go
for(int i = 0; i < zzz; i++) {
unsigned char asd = fgetc(stdin);
if(feof(stdin) || ferror(stdin))
return -1;
//if next char is 0xFF
if(asd == 0xFF) {
//get another char
unsigned char qwe = fgetc(stdin);
if(feof(stdin) || ferror(stdin))
return -1;
//if 0xFF is followed by 0x00 it is the end block symbol
if(qwe == 0x00)
(*(node_for_symbol+i))->symbol = 256;
//if it is followed by anything else, it is 0xFF
else {
(*(node_for_symbol+i))->symbol = 0xFF;
}
}
//else the next leaf's symbol is the gotten char
else
(*(node_for_symbol+i))->symbol = asd;
}

//then we set the parents in the tree


set_parents((nodes+0));
return 0;
}

//helper method to construct a histogram from the block


//also puts symbol nodes into the nodes_for_symbol array
static void construct_histogram(int blocksize) {
//we will use num_nodes to track symbol count
num_nodes = 0;
for(int i = 0; i < blocksize; i++) {
//used to keep track of whether it is a new symbol
int newsymbol = 1;
//loops through current symbols and if there is, increment its weight
for(int j = 0; j < num_nodes; j++) {
if((nodes+j)->symbol == *(current_block+i)) {
((nodes+j)->weight)++;
newsymbol = 0;
break;
}
}
//if newsymbol = 1 then we create a new node for the symbol
//we also put a pointer to that node in the node_for_symbol array
if(newsymbol == 1) {
NODE a;
a.symbol = *(current_block+i);
a.parent = NULL;
a.left = NULL;
a.right = NULL;
*(nodes+num_nodes) = a;
num_nodes++;
}
}
//we make an node to represent end of file
NODE a;
a.symbol = -1;
a.parent = NULL;
a.left = NULL;
a.right = NULL;
*(nodes+num_nodes) = a;
num_nodes++;

return;
}

//helper method to construct a huffman tree from the histogram


static void construct_huffmantree() {
//gets the total number of nodes using our leaves
int totalnodes = num_nodes * 2 - 1;
//position to place minimum nodes for the huffman tree
int position = totalnodes - 1;
//current number of nodes whose position is not decided in the huffman tree
int currunsorted = num_nodes;
//while there is more than 1 unsorted node
while(currunsorted > 1) {
//minimum weight nodes
int min1 = nodes->weight;
int min2 = (nodes+1)->weight;
int node1 = 0;
int node2 = 1;
//loop through current undecided nodes
for(int i = 2; i < currunsorted; i++) {
//if current node's weight >= both min1 and min2 we continue
if((nodes+i)->weight >= min1 && (nodes+i)->weight >= min2)
continue;
//if the current node is less than both min1 and min2
else if((nodes+i)->weight < min1 && (nodes+i)->weight < min2) {
if(min1 < min2) {
min2 = (nodes+i)->weight;
node2 = i;
}
else {
min1 = (nodes+i)->weight;
node1 = i;
}
}
//if current node < min1 but >= min2, set min1 to current node's weight
else if((nodes+i)->weight < min1 && (nodes+i)->weight >= min2) {
min1 = (nodes+i)->weight;
node1 = i;
}
//else set min2 to current node's weight
else {
min2 = (nodes+i)->weight;
node2 = i;
}
}
//move the minimum nodes the "high end" region of the array
*(nodes+position) = *(nodes+node2);
position--;
*(nodes+position) = *(nodes+node1);
position--;

//create a new internal node with the 2 minimum nodes as its children
NODE a;
a.parent = NULL;
a.symbol = -1;
a.left = (nodes+position+1);
a.right = (nodes+position+2);
//set a's weight to the sum of its children
a.weight = (nodes+position+1)->weight + (nodes+position+2)->weight;

//insert a to the section of unsorted nodes taking the lefter node's


position
int max = 0;
if(node1 < node2) {
*(nodes+node1) = a;
max = node2;
}
else {
*(nodes+node2) = a;
max = node1;
}
//decrement currunsorted
currunsorted--;
//if currunsorted is not node1 or node2, we shift the leftmost node to
node1 or node2
if(currunsorted != node1 && currunsorted != node2)
*(nodes+max) = *(nodes+currunsorted);

}
//set num_nodes to the current total nodes
num_nodes = totalnodes;
return;
}

//helper method to convert the block of data to compressed data and output
static void outputcompressed(int blocksize) {
//out is 1 byte to be printed while count keeps track of when out reaches 8
bits
unsigned char out = 0;
int count = 0;
NODE* a = NULL;
//compression to be repeated for every element in buffer
for(int i = 0; i < blocksize+1; i++) {
//if i == blocksize, we set a as end block pointer
if(i == blocksize)
a = findnode(256);
//else we set a normally
else
a = findnode(*(current_block+i));
//while not at root
while(a != NULL && a->parent != NULL) {
//if a is a left child set its parents weight as 0
if(a->parent->left != NULL && a->parent->left == a) {
a = a->parent;
a->weight = 0;
}
//else if a is a right child set its parents weight as 1
else {
a = a->parent;
a->weight = 1;
}
}
//while not at leaf
while(a->left != NULL && a->right != NULL) {
//we put the bits into char out one by one and increment count
out = out << 1;
out += a->weight;
count++;
//if count = 8 then we filled all 8 bits of out so we output it and
//reset count to 0
if(count == 8) {
fputc(out, stdout);
//fflush(stdout);
count = 0;
}

//depending on whether weight is 0 or 1, set a to its left/right child


if(a->weight == 0)
a = a->left;
else
a = a->right;
}
}

//we pad the last byte with 0s if it hasn't filled all 8 bits
out = out << (8 - count);
fputc(out, stdout);
//fflush(stdout);

return;
}

/**
* @brief Reads one block of data from standard input and emits corresponding
* compressed data to standard output.
* @details This function reads raw binary data bytes from the standard input
* until the specified block size has been read or until EOF is reached.
* It then applies a data compression algorithm to the block and outputs the
* compressed block to the standard output. The block size parameter is
* obtained from the global_options variable.
*
* @return 0 if compression completes without error, -1 if an error occurs.
*/
int compress_block() {
//get blocksize from global_options upper 16 bits
int blocksize = ((unsigned int) global_options >> 16) + 1;
int read = 0;
//if EOF at beginning of block return error
if(feof(stdin))
return -1;
//reading 1 block of data from stdin to array current_block
for(; read < blocksize; read++) {
//get char then put in array block_size
char c = fgetc(stdin);
//if end of file, exit loop
if(feof(stdin)) {
break;
}
//if an error occurs, return -1
else if(ferror(stdin)) {
return -1;
}
*(current_block+read) = c;
}

//read will be the same as block_size or end block


blocksize = read;
//if blocksize <= 0 it is an error so return -1
if(blocksize <= 0)
return -1;

//we use blocksize to keep track of EOF or end of block


//constructs a histogram from the buffer
construct_histogram(blocksize);
//uses histogram to construct huffmantree
construct_huffmantree();
//traverses through huffman tree and sets parents
set_parents((nodes+0));
//sets node_for_symbol array with leaves of huffman tree
for(int i = 0; i < num_nodes; i++) {
//if symbol is not a -1 (internal node)
if((nodes+i)->symbol != -1)
*(node_for_symbol+((nodes+i)->symbol)) = nodes+i;
}
//emits description of tree
emit_huffman_tree();
//compress the data and output it
outputcompressed(blocksize);

return 0;
}

/**
* @brief Reads one block of compressed data from standard input and writes
* the corresponding uncompressed data to standard output.
* @details This function reads one block of compressed data from the standard
* input, it decompresses the block, and it outputs the uncompressed data to
* the standard output. The input data blocks are assumed to be in the format
* produced by compress(). If EOF is encountered before a complete block has
* been read, it is an error.
*
* @return 0 if decompression completes without error, -1 if an error occurs.
*/
int decompress_block() {
//read huffman tree
int q = read_huffman_tree();
//if return is not 0 then it is an error
if(q != 0)
return -1;

unsigned char c = fgetc(stdin);


if(feof(stdin) || ferror(stdin))
return -1;
int count = 0;
//we do loop until break or return error
while(1) {
//NODE pointer to root of tree
NODE* a = nodes;
//while a is not a leaf
while(a->left != NULL && a->right != NULL) {
//c & 10000000 will test if the most significant bit is 1 or 0
int bit = c & 0x80;
//then we shift c left 1 and increment count
c = c << 1;
count++;
//if count = 8 then we have used all 8 bits of the byte
//so we get the next byte and reset count
if(count == 8) {
//since we are unsure of where end block is, we must get the input
one
//by one instead of a whole block at once
c = fgetc(stdin);
if(feof(stdin) || ferror(stdin))
return -1;
count = 0;
}

//if bit = 0, then the msb was 0 so we go left else right for 1
if(bit == 0)
a = a->left;
else
a = a->right;
}
//after the inner loop ends, we are at a leaf
//if the symbol represents EOF, then we break
if(a->symbol == 256)
break;

//we output the symbol


fputc(a->symbol, stdout);
fflush(stdout);
}

return 0;
}

/**
* @brief Reads raw data from standard input, writes compressed data to
* standard output.
* @details This function reads raw binary data bytes from the standard input in
* blocks of up to a specified maximum number of bytes or until EOF is reached,
* it applies a data compression algorithm to each block, and it outputs the
* compressed blocks to standard output. The block size parameter is obtained
* from the global_options variable.
*
* @return 0 if compression completes without error, -1 if an error occurs.
*/
int compress() {
//while not at EOF, call compress_block
int error = 0;
int compressed = 0;
while(feof(stdin) == 0) {
error = compress_block();
//if compress_block returns -1 and we did not compress any blocks return -
1;
if(error && compressed == 0)
return -1;
compressed++;
}
//if error occurs, return -1
if(ferror(stdin))
return -1;

return 0;
}

/**
* @brief Reads compressed data from standard input, writes uncompressed
* data to standard output.
* @details This function reads blocks of compressed data from the standard
* input until EOF is reached, it decompresses each block, and it outputs
* the uncompressed data to the standard output. The input data blocks
* are assumed to be in the format produced by compress().
*
* @return 0 if decompression completes without error, -1 if an error occurs.
*/
int decompress() {
//while not at EOF, call decompress_block
int error = 0;
int decompressed = 0;
while(feof(stdin) == 0) {
error = decompress_block();
//if decompress_block returns -1 and we have no decompressed blocks, return
-1
if(error && decompressed == 0)
return -1;
decompressed++;
}
//if error return -1
if(ferror(stdin))
return -1;

return 0;
}

//helper method strequal to compare if 2 strings are equal


//returns 0 if not equal else 1 if equal
static int strequal(char* first, char* second) {
while(*first != '\0' || *second != '\0') {
if(*first != *second)
return 0;
first++;
second++;
}
return 1;
}

/**
* @brief Validates command line arguments passed to the program.
* @details This function will validate all the arguments passed to the
* program, returning 0 if validation succeeds and -1 if validation fails.
* Upon successful return, the selected program options will be set in the
* global variable "global_options", where they will be accessible
* elsewhere in the program.
*
* @param argc The number of arguments passed to the program from the CLI.
* @param argv The argument strings passed to the program from the CLI.
* @return 0 if validation succeeds and -1 if validation fails.
* Refer to the homework document for the effects of this function on
* global variables.
* @modifies global variable "global_options" to contain a bitmap representing
* the selected options.
*/
int validargs(int argc, char **argv) {
// Case when only program name is passed, return -1
if (argc == 1) {
return -1;
}

// Handle the '-h' flag to display help


if (strequal(*(argv + 1), "-h")) {
global_options = 0x1;
return 0;
}

// Handle cases where the second argument is either '-c' or '-d' without block
size
if (argc == 2) {
if (strequal(*(argv + 1), "-c")) {
global_options = 0xffff0002; // Compression with default block size
return 0;
} else if (strequal(*(argv + 1), "-d")) {
global_options = 0xffff0004; // Decompression with default block size
return 0;
} else {
return -1; // Invalid option
}
}

// Handle '-c -b BLOCKSIZE' format for compression


if (argc == 4 && strequal(*(argv + 1), "-c") && strequal(*(argv + 2), "-b")) {
char *blocksize_str = *(argv + 3);
int blocksize = 0;

// Validate that block size consists only of digits


while (*blocksize_str != '\0') {
if (*blocksize_str < '0' || *blocksize_str > '9') {
return -1; // Invalid character in block size input
}
blocksize = blocksize * 10 + (*blocksize_str - '0');
blocksize_str++;
}

// Validate block size range


if (blocksize < MIN_BLOCK_SIZE || blocksize > MAX_BLOCK_SIZE) {
return -1; // Block size is out of valid range
}

// Set global_options with block size and compression flag


global_options = ((blocksize - 1) << 16) + 2;
return 0;
}

// If none of the conditions are met, return -1 as default


return -1;
}

You might also like