哈夫曼树的C语言实现
哈夫曼树是最优二叉树。定义:给定n个权值作为n个叶子节点,构造一棵二叉树,若树的带树路径长度最小,则这棵树被称为哈夫曼树。如下图。
哈夫曼树涉及的几个概念
路径和路径长度:在一棵树中,从一个结点往下可以到达孩子或孙子结点之间的通路,称为路径。通路中结点的数目称为路径长度。如15的路径长度是1;5,6,7,8的路径长度是3。
结点的权及带权路径长度: 若将树中结点赋给一个有着某种含义的数值,则这个数值称为该结点的权。结点的带权路径长度为:从根结点到该结点之间的路径长度与该结点的权的乘积。如节点5的的路径长度是3,它的带权路径长度为3x5=15。
树的带权路径长度: 树的带树路径长度规定为所有叶子结点的带权路径长度之和。上图中,树的带权路径之和是15x1 + 5x3 + 6x3 + 7x3 + 8x3 = 93。
pairing堆的数据结构一般如下:
typedef struct huffman_tree_s* huffman_tree_pt;
typedef struct huffman_tree_s {
Key data;
huffman_tree_pt left;
huffman_tree_pt right;
huffman_tree_pt parent;
} huffman_tree_t;
哈夫曼树的构建图文说明
- 将所有结点看成是一个森林。
- 从森林中取出根结点的权值最小的两棵树进行合并,取出的两棵树作为一个新生成树的左右子树,且新树的权值是左右两个子树之和。
- 从森林中删除这两棵树,并将新树加入森林。
- 重复2,3步,直到只剩最后一棵树,这棵树就是哈夫曼树。
如下图所示:
哈夫曼树的构建代码
一般使用最小堆来构建森林并选取最小树。源码实现中使用了以前的二叉堆的代码。代码如下:
huffman_tree_pt
huffman_create(Key arr[], int size)
{
int i;
binary_heap_pt heap;
huffman_tree_pt tree, left, right;
heap = binary_heap_init(size);
for (i=0; i<size; i++) {
tree = huffman_new(arr[i]);
if (tree == NULL) {
clean_heap(heap);
return NULL;
}
binary_heap_push(heap, tree);
}
for (i=0; i < size - 1; i++) {
left = binary_heap_pop(heap);
right = binary_heap_pop(heap);
tree = huffman_new(left->data + right->data);
if (tree == NULL) {
clean_heap(heap);
return NULL;
}
tree->left = left;
tree->right = right;
left->parent = tree;
right->parent = tree;
if (binary_heap_push(heap, tree) ==0){
clean_heap(heap);
return NULL;
}
}
tree = binary_heap_pop(heap);
clean_heap(heap);
return tree;
}
完整源码
/* binary_heap.h */
#ifndef BINARY_HEAP_H
#define BINARY_HEAP_H
#include "huffman_tree.h"
typedef struct huffman_tree_s* huffman_tree_pt;
typedef struct binary_heap_s* binary_heap_pt;
typedef huffman_tree_pt Item;
typedef struct binary_heap_s {
int size;
int current;
Item *data;
} binary_heap_t;
binary_heap_pt binary_heap_init(int size);
void binary_heap_free(binary_heap_pt heap);
int binary_heap_push(binary_heap_pt heap, Item element);
Item binary_heap_pop(binary_heap_pt heap);
Item binary_heap_top(binary_heap_pt heap);
int binary_heap_empty(binary_heap_pt heap);
void binary_heap_destroy(binary_heap_pt heap);
#endif
binary_heap.h
/* binary_heap.c */
/*
* =====================================================================================
*
* Filename: binary_heap.c
*
* Description: 二叉堆实现
*
* Version: 1.0
* Created: 2016-09-19 20:08
* Revision: none
* Compiler: gcc
*
* Author: simon
*
* =====================================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include "binary_heap.h"
static void
swap(Item *data, int a, int b)
{
Item c;
c = data[a];
data[a] = data[b];
data[b] = c;
}
/*
* 最小堆 ((a) > (b))
* 最大堆 ({a} < (b))
*
*/
#define item_cmp(a, b) (((a)->data) > ((b)->data)) /* 最小堆 */
/*
* Name: fixup
* Description: 向上堆化
*
*/
static void
fixup(binary_heap_pt heap, int k)
{
while (k>0 && item_cmp(heap->data[(k - 1) / 2],heap->data[k])) {
swap(heap->data, k, (k - 1) / 2);
k = (k - 1) / 2;
}
}
/*
* Name: fixdown
* Description: 向下堆化
*
*/
static void
fixdown(binary_heap_pt heap, int k, int end)
{
int temp;
while (k*2 + 1 <= end) {
temp = k*2 + 1;
if (temp < end && item_cmp(heap->data[temp], heap->data[temp + 1])) {
temp++;
}
if (!item_cmp(heap->data[k], heap->data[temp])) {
break;
}
swap(heap->data, k, temp);
k = temp;
}
}
binary_heap_pt
binary_heap_init(int size)
{
if (size<=0) {
return NULL;
}
binary_heap_pt heap_pt = malloc(sizeof(binary_heap_t));
heap_pt->data = malloc(sizeof(Item)*size);
heap_pt->size = size;
heap_pt->current = 0;
return heap_pt;
}
void
binary_heap_free(binary_heap_pt heap)
{
if (heap == NULL) {
return;
}
free(heap->data);
free(heap);
}
int
binary_heap_push(binary_heap_pt heap, Item element)
{
if (heap == NULL) {
return 0;
}
if (heap->current == heap->size) {
return 0;
}
heap->data[heap->current] = element;
fixup(heap, heap->current++);
return 1;
}
Item
binary_heap_pop(binary_heap_pt heap)
{
swap(heap->data, 0, heap->current - 1);
fixdown(heap, 0, heap->current - 2);
return heap->data[--heap->current];
}
Item
binary_heap_top(binary_heap_pt heap)
{
return heap->data[0];
}
int
binary_heap_empty(binary_heap_pt heap)
{
return heap->current == 0;
}
void
binary_heap_destroy(binary_heap_pt heap)
{
free(heap);
}
binary_heap.c
/* huffman_tree.h */
#ifndef HUffMAN_TREE_H
#define HUffMAN_TREE_H
#include "binary_heap.h"
typedef int Key;
typedef struct binary_heap_s* binary_heap_pt;
typedef struct huffman_tree_s* huffman_tree_pt;
typedef struct huffman_tree_s {
Key data;
huffman_tree_pt left;
huffman_tree_pt right;
huffman_tree_pt parent;
} huffman_tree_t;
huffman_tree_pt huffman_create(Key arr[], int size);
void huffman_destroy(huffman_tree_pt tree);
void huffman_print(huffman_tree_pt heap);
void huffman_tree_export_dot(huffman_tree_pt tree, char *filename, char *label);
#endif
huffman_tree.h
/* huffman_tree.c */
/*
* =====================================================================================
*
* Filename: huffman_tree.c
*
* Description: 哈夫曼树实现
*
* Version: 1.0
* Created: 2016-10-07 20:09
* Revision: none
* Compiler: gcc
*
* Author: simon
*
* =====================================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include "binary_heap.h"
#include "huffman_tree.h"
static huffman_tree_pt huffman_new(Key element);
static void clean_heap(binary_heap_pt heap);
static huffman_tree_pt
huffman_new(Key element)
{
huffman_tree_pt tree;
tree = (huffman_tree_pt)malloc(sizeof(huffman_tree_t));
if (tree == NULL) {
printf("Error: make new huffman node failed.\n");
return NULL;
}
tree->data = element;
tree->left = NULL;
tree->right = NULL;
tree->parent = NULL;
return tree;
}
static void
clean_heap(binary_heap_pt heap)
{
while (!binary_heap_empty(heap)) {
huffman_destroy(binary_heap_pop(heap));
}
binary_heap_destroy(heap);
}
huffman_tree_pt
huffman_create(Key arr[], int size)
{
int i;
binary_heap_pt heap;
huffman_tree_pt tree, left, right;
heap = binary_heap_init(size);
for (i=0; i<size; i++) {
tree = huffman_new(arr[i]);
if (tree == NULL) {
clean_heap(heap);
return NULL;
}
binary_heap_push(heap, tree);
}
for (i=0; i < size - 1; i++) {
left = binary_heap_pop(heap);
right = binary_heap_pop(heap);
tree = huffman_new(left->data + right->data);
if (tree == NULL) {
clean_heap(heap);
return NULL;
}
tree->left = left;
tree->right = right;
left->parent = tree;
right->parent = tree;
if (binary_heap_push(heap, tree) ==0){
clean_heap(heap);
return NULL;
}
}
tree = binary_heap_pop(heap);
clean_heap(heap);
return tree;
}
void
huffman_destroy(huffman_tree_pt tree)
{
if (tree == NULL) {
return;
}
huffman_destroy(tree->left);
huffman_destroy(tree->right);
free(tree);
}
static void
huffman_tree_printnode(huffman_tree_pt node, int h)
{
int i;
for (i=0; i < h; i++) {
printf(" ");
}
if (node == NULL) {
printf("*\n");
}
else {
printf("%d\n", node->data);
}
}
static void
huffman_tree_show(huffman_tree_pt tree, int h)
{
if (tree == NULL) {
huffman_tree_printnode(tree, h);
return;
}
huffman_tree_show(tree->left, h+1);
huffman_tree_printnode(tree, h);
huffman_tree_show(tree->right, h+1);
}
void
huffman_print(huffman_tree_pt tree)
{
huffman_tree_show(tree, 0);
}
/* 打印节点计数变量 */
static int node_num = 0;
static void
export_dot(FILE *fp, huffman_tree_pt tree, char *last_label)
{
/* 打印节点 */
char node_name[10] = {0};
sprintf(node_name, "n%d", node_num);
if (tree == NULL) {
fprintf(fp, "%s[style=invis];\n", node_name);
}
else {
fprintf(fp, "%s[label=%d];\n", node_name, tree->data);
}
node_num++;
/* 打印边 */
if (last_label != NULL) {
if (tree == NULL) {
fprintf(fp, "%s->%s[style=invis];\n", last_label, node_name);
}
else {
fprintf(fp, "%s->%s;\n", last_label, node_name);
}
}
if (tree == NULL) {
return;
}
/* 打印子节点 */
if (tree ->left == NULL && tree->right == NULL) {
return;
}
export_dot(fp, tree->left, node_name);
export_dot(fp, NULL, node_name);
export_dot(fp, tree->right, node_name);
}
void
huffman_tree_export_dot(huffman_tree_pt tree, char *filename, char *label)
{
FILE *fp;
fp = fopen(filename, "w");
fprintf(fp, "digraph g{\nnode[shape=circle];\nlabel=\"%s\";\nlabeljust=l;\nlabelloc=t;\n", label);
export_dot(fp, tree, NULL);
fprintf(fp, "}\n");
fclose(fp);
}
huffman_tree.c
/* huffman_tree_test.c */
#include <stdlib.h>
#include <stdio.h>
#include "huffman_tree.h"
#define LENGTH(a) ( (sizeof(a)) / (sizeof(a[0])) )
int
main(int argc, char *argv[])
{
int a[] = {5,6,7,8,15};
int i, len=LENGTH(a);
huffman_tree_pt tree=NULL;
printf("\n== 建立哈夫曼树: \n");
tree = huffman_create(a, LENGTH(a));
huffman_print(tree);
huffman_tree_export_dot(tree, "huffman.dot", "哈夫曼树");
huffman_destroy(tree);
return 0;
}
huffman_tree_test.c
测试程序的运行结果
== 建立哈夫曼树:
*
15
*
41
*
5
*
11
*
6
*
26
*
7
*
15
*
8
*
最后生成dot标记文件,然后使用工具可以生成下图
转载文章请注明出处: http://mingnote.com