哈夫曼树的C语言实现

哈夫曼树是最优二叉树。定义:给定n个权值作为n个叶子节点,构造一棵二叉树,若树的带树路径长度最小,则这棵树被称为哈夫曼树。如下图。

图一

哈夫曼树涉及的几个概念

路径和路径长度:在一棵树中,从一个结点往下可以到达孩子或孙子结点之间的通路,称为路径。通路中结点的数目称为路径长度。如15的路径长度是1;5,6,7,8的路径长度是3。

结点的权及带权路径长度: 若将树中结点赋给一个有着某种含义的数值,则这个数值称为该结点的权。结点的带权路径长度为:从根结点到该结点之间的路径长度与该结点的权的乘积。如节点5的的路径长度是3,它的带权路径长度为3x5=15。

树的带权路径长度: 树的带树路径长度规定为所有叶子结点的带权路径长度之和。上图中,树的带权路径之和是15x1 + 5x3 + 6x3 + 7x3 + 8x3 = 93。

pairing堆的数据结构一般如下:

typedef struct huffman_tree_s* huffman_tree_pt;
typedef struct huffman_tree_s {
    Key data;
    huffman_tree_pt left;
    huffman_tree_pt right;
    huffman_tree_pt parent;
} huffman_tree_t;

哈夫曼树的构建图文说明

  1. 将所有结点看成是一个森林。
  2. 从森林中取出根结点的权值最小的两棵树进行合并,取出的两棵树作为一个新生成树的左右子树,且新树的权值是左右两个子树之和。
  3. 从森林中删除这两棵树,并将新树加入森林。
  4. 重复2,3步,直到只剩最后一棵树,这棵树就是哈夫曼树。

如下图所示:

图一

图二

图三

图四

图五

哈夫曼树的构建代码

一般使用最小堆来构建森林并选取最小树。源码实现中使用了以前的二叉堆的代码。代码如下:

huffman_tree_pt 
huffman_create(Key arr[], int size)
{
    int i;
    binary_heap_pt heap;
    huffman_tree_pt tree, left, right;
    heap = binary_heap_init(size);

    for (i=0; i<size; i++) {
        tree = huffman_new(arr[i]);
        if (tree == NULL) {
            clean_heap(heap);
            return NULL;
        }
        binary_heap_push(heap, tree);
    }

    for (i=0; i < size - 1; i++) {
        left = binary_heap_pop(heap);
        right = binary_heap_pop(heap);
        tree = huffman_new(left->data + right->data);
        if (tree == NULL) {
            clean_heap(heap);
            return NULL;
        }
        tree->left = left;
        tree->right = right;
        left->parent = tree;
        right->parent = tree;

        if (binary_heap_push(heap, tree) ==0){
            clean_heap(heap);
            return NULL;
        }

    }
    tree = binary_heap_pop(heap);
    clean_heap(heap);
    return tree;
}

完整源码

/* binary_heap.h */
#ifndef BINARY_HEAP_H
#define BINARY_HEAP_H

#include "huffman_tree.h"

typedef struct huffman_tree_s* huffman_tree_pt;
typedef struct binary_heap_s* binary_heap_pt;

typedef huffman_tree_pt Item;

typedef struct binary_heap_s {
    int size;
    int current;
    Item *data;
} binary_heap_t;

binary_heap_pt binary_heap_init(int size);
void binary_heap_free(binary_heap_pt heap);
int binary_heap_push(binary_heap_pt heap, Item element);
Item binary_heap_pop(binary_heap_pt heap);
Item binary_heap_top(binary_heap_pt heap);
int binary_heap_empty(binary_heap_pt heap);
void binary_heap_destroy(binary_heap_pt heap);

#endif

binary_heap.h

/* binary_heap.c */
/*
 * =====================================================================================
 *
 *       Filename:  binary_heap.c
 *
 *    Description:  二叉堆实现
 *
 *        Version:  1.0
 *        Created:  2016-09-19 20:08
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  simon
 *
 * =====================================================================================
 */

#include <stdio.h>
#include <stdlib.h>

#include "binary_heap.h"

static void 
swap(Item *data, int a, int b)
{
    Item c;
    c = data[a];
    data[a] = data[b];
    data[b] = c;
}

/* 
 * 最小堆 ((a) > (b))
 * 最大堆 ({a} < (b))
 * 
 */
#define item_cmp(a, b) (((a)->data) > ((b)->data)) /* 最小堆 */


/* 
 *        Name:  fixup
 * Description:  向上堆化
 * 
 */
static void 
fixup(binary_heap_pt heap, int k)
{
    while (k>0 && item_cmp(heap->data[(k - 1) / 2],heap->data[k])) {
        swap(heap->data, k, (k - 1) / 2);
        k = (k - 1) / 2;
    }
}

/* 
 *        Name:  fixdown
 * Description:  向下堆化
 * 
 */
static void 
fixdown(binary_heap_pt heap, int k, int end)
{
    int temp;
    while (k*2 + 1 <= end) {
        temp = k*2 + 1;
        if (temp < end && item_cmp(heap->data[temp], heap->data[temp + 1])) {
            temp++;
        }
        if (!item_cmp(heap->data[k], heap->data[temp])) {
            break;
        }
        swap(heap->data, k, temp);
        k = temp;
    }
}

binary_heap_pt 
binary_heap_init(int size)
{
    if (size<=0) {
        return NULL;
    }
    binary_heap_pt heap_pt = malloc(sizeof(binary_heap_t));
    heap_pt->data = malloc(sizeof(Item)*size);
    heap_pt->size = size;
    heap_pt->current = 0;
    return heap_pt;
}

void 
binary_heap_free(binary_heap_pt heap)
{
    if (heap == NULL) {
        return;
    }
    free(heap->data);
    free(heap);
}

int 
binary_heap_push(binary_heap_pt heap, Item element)
{
    if (heap == NULL) {
        return 0;
    }
    if (heap->current == heap->size) {
        return 0;
    }
    heap->data[heap->current] = element;
    fixup(heap, heap->current++);
    return 1;
}

Item 
binary_heap_pop(binary_heap_pt heap)
{
    swap(heap->data, 0, heap->current - 1);
    fixdown(heap, 0, heap->current - 2);
    return heap->data[--heap->current];
}

Item 
binary_heap_top(binary_heap_pt heap)
{
    return heap->data[0];
}

int
binary_heap_empty(binary_heap_pt heap)
{
    return heap->current == 0;
}

void 
binary_heap_destroy(binary_heap_pt heap)
{
    free(heap);
}

binary_heap.c

/* huffman_tree.h */
#ifndef HUffMAN_TREE_H
#define HUffMAN_TREE_H

#include "binary_heap.h"

typedef int Key;

typedef struct binary_heap_s* binary_heap_pt;
typedef struct huffman_tree_s* huffman_tree_pt;

typedef struct huffman_tree_s {
    Key data;
    huffman_tree_pt left;
    huffman_tree_pt right;
    huffman_tree_pt parent;
} huffman_tree_t;

huffman_tree_pt huffman_create(Key arr[], int size);
void huffman_destroy(huffman_tree_pt tree);
void huffman_print(huffman_tree_pt heap);
void huffman_tree_export_dot(huffman_tree_pt tree, char *filename, char *label);

#endif

huffman_tree.h

/* huffman_tree.c */
/*
 * =====================================================================================
 *
 *       Filename:  huffman_tree.c
 *
 *    Description:  哈夫曼树实现
 *
 *        Version:  1.0
 *        Created:  2016-10-07 20:09
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  simon
 *
 * =====================================================================================
 */

#include <stdio.h>
#include <stdlib.h>

#include "binary_heap.h"
#include "huffman_tree.h"

static huffman_tree_pt huffman_new(Key element);
static void clean_heap(binary_heap_pt heap);


static huffman_tree_pt 
huffman_new(Key element)
{
    huffman_tree_pt tree;
    tree = (huffman_tree_pt)malloc(sizeof(huffman_tree_t));
    if (tree == NULL) {
        printf("Error: make new huffman node failed.\n");
        return NULL;
    }
    tree->data = element;
    tree->left = NULL;
    tree->right = NULL;
    tree->parent = NULL;
    return tree;
}

static void 
clean_heap(binary_heap_pt heap)
{
    while (!binary_heap_empty(heap)) {
        huffman_destroy(binary_heap_pop(heap));
    }
    binary_heap_destroy(heap);
}

huffman_tree_pt 
huffman_create(Key arr[], int size)
{
    int i;
    binary_heap_pt heap;
    huffman_tree_pt tree, left, right;
    heap = binary_heap_init(size);

    for (i=0; i<size; i++) {
        tree = huffman_new(arr[i]);
        if (tree == NULL) {
            clean_heap(heap);
            return NULL;
        }
        binary_heap_push(heap, tree);
    }

    for (i=0; i < size - 1; i++) {
        left = binary_heap_pop(heap);
        right = binary_heap_pop(heap);
        tree = huffman_new(left->data + right->data);
        if (tree == NULL) {
            clean_heap(heap);
            return NULL;
        }
        tree->left = left;
        tree->right = right;
        left->parent = tree;
        right->parent = tree;

        if (binary_heap_push(heap, tree) ==0){
            clean_heap(heap);
            return NULL;
        }

    }
    tree = binary_heap_pop(heap);
    clean_heap(heap);
    return tree;
}

void 
huffman_destroy(huffman_tree_pt tree)
{
    if (tree == NULL) {
        return;
    }
    huffman_destroy(tree->left);
    huffman_destroy(tree->right);
    free(tree);
}

static void 
huffman_tree_printnode(huffman_tree_pt node, int h)
{
    int i;
    for (i=0; i < h; i++) {
        printf("    ");
    }
    if (node == NULL) {
        printf("*\n");
    }
    else {
        printf("%d\n", node->data);
    }
}

static void 
huffman_tree_show(huffman_tree_pt tree, int h)
{
    if (tree == NULL) {
        huffman_tree_printnode(tree, h);
        return;
    }
    huffman_tree_show(tree->left, h+1);
    huffman_tree_printnode(tree, h);
    huffman_tree_show(tree->right, h+1);
}

void 
huffman_print(huffman_tree_pt tree)
{
    huffman_tree_show(tree, 0);
}

/* 打印节点计数变量 */
static int node_num = 0;

static void 
export_dot(FILE *fp, huffman_tree_pt tree, char *last_label)
{
    /* 打印节点 */
    char node_name[10] = {0};
    sprintf(node_name, "n%d", node_num);
    if (tree == NULL) {
        fprintf(fp, "%s[style=invis];\n", node_name);
    }
    else {
        fprintf(fp, "%s[label=%d];\n", node_name, tree->data);
    }
    node_num++;

    /* 打印边 */
    if (last_label != NULL) {
        if (tree == NULL) {
            fprintf(fp, "%s->%s[style=invis];\n", last_label, node_name);
        }
        else {
            fprintf(fp, "%s->%s;\n", last_label, node_name);
        }
    }

    if (tree == NULL) {
        return;
    }

    /* 打印子节点 */
    if (tree ->left == NULL && tree->right == NULL) {
        return;
    }
    export_dot(fp, tree->left, node_name);
    export_dot(fp, NULL, node_name);
    export_dot(fp, tree->right, node_name);
}

void 
huffman_tree_export_dot(huffman_tree_pt tree, char *filename, char *label)
{
    FILE *fp;
    fp = fopen(filename, "w");
    fprintf(fp, "digraph g{\nnode[shape=circle];\nlabel=\"%s\";\nlabeljust=l;\nlabelloc=t;\n", label);
    export_dot(fp, tree, NULL);
    fprintf(fp, "}\n");
    fclose(fp);
}

huffman_tree.c

/* huffman_tree_test.c */
#include <stdlib.h>
#include <stdio.h>

#include "huffman_tree.h"

#define LENGTH(a) ( (sizeof(a)) / (sizeof(a[0])) )

int
main(int argc, char *argv[])
{
    int a[] = {5,6,7,8,15};
    int i, len=LENGTH(a);
    huffman_tree_pt tree=NULL;

    printf("\n== 建立哈夫曼树: \n");
    tree = huffman_create(a, LENGTH(a));
    huffman_print(tree);
    huffman_tree_export_dot(tree, "huffman.dot", "哈夫曼树");

    huffman_destroy(tree);
    return 0;
}

huffman_tree_test.c

测试程序的运行结果

== 建立哈夫曼树: 
        *
    15
        *
41
                *
            5
                *
        11
                *
            6
                *
    26
                *
            7
                *
        15
                *
            8
                *

最后生成dot标记文件,然后使用工具可以生成下图

图一

转载文章请注明出处: http://mingnote.com


Date 2016-10-07(星期五) 22:10 By Ming In 算法 Tags 算法,