结点的权值:某种特定含义的数值
结点的带权路径长度:根到结点路径长度×结点的权值
树的带权路径长度:所有叶子结点的带权路径长度之和
哈夫曼树:在给定叶子结点权值的二叉树中带权路径长度最小的二叉树
给定五个结点求它们构成的哈夫曼树
利用贪心算法的思想,每次找到权值最小的两个结点组合在一起然后继续参与选择,直至所有结点都用完。哈夫曼树并不唯一,但在考试时构造过程要注意以下几点:
1.尽量按照一定规则去构造,比如:筛选出的两个权值最小的结点应当将权值较小的结点放在左孩子的位置,较大的放在右孩子的位置;筛选出的两个结点如果权值相同应当将体型小的放在左边,体型大的放在右边,如果体型一样则选择命名靠前的结点。
2.如果只给出了结点权值应当自己给每个结点命名,权值应当写在节点外,尽量不要直接写在结点中以免引起歧义。
3.初始所给的结点最后都将成为哈夫曼树的叶子结点,且叶子结点一定只能是所给出的结点,另外哈夫曼树一定不会出现只有一个孩子的结点,总结点树总是为(叶子结点数×2-1)个。
哈夫曼树的存储结构
采用二叉树的顺序存储方式来存储哈夫曼树,定义一个结构体,哈夫曼树每个节点都保存着父结点的下标,左右孩子结点的下标以及自己的权值。
//哈夫曼树结点
typedef struct HTNode {
int parent; //父结点下标
int lChild; //左孩子下标
int rChild; //右孩子下标
int weight; //结点权值
} HTNode, *HuffmanTree;
初始化哈夫曼树
上面我们说过,哈夫曼树的总结点数始终为(叶子结点×2-1)个,初始化哈夫曼树时,我们需要接收用户键入的叶子结点数和相对应的权值,将最初这几个结点的左右孩子节点下标设为-1,由于目前还没开始构建哈夫曼树,父亲结点下标也要设置为-1。
//初始化哈夫曼树
void initHuffmanTree(HuffmanTree &huffmanTree, int numLeaf) {
huffmanTree = (HTNode *) malloc(sizeof(HTNode) * (2 * numLeaf - 1)); //哈夫曼树总结点数永远是叶子结点数的两倍减一
for (int i = 0; i < numLeaf; ++i) { //依次记录哈每个叶子结点的权值
huffmanTree[i].parent = huffmanTree[i].lChild = huffmanTree[i].rChild = -1;
cout << "请输入第" << i + 1 << "个结点的权值:";
cin >> huffmanTree[i].weight;
}
}
拿前面例图的构造过程举例,在初始化哈夫曼树之后前五个结点数据如下表所示:
结点下标 | 结点权值 | 父结点下标 | 左孩子下标 | 右孩子下标 |
---|---|---|---|---|
0 | 1 | -1 | -1 | -1 |
1 | 3 | -1 | -1 | -1 |
2 | 2 | -1 | -1 | -1 |
3 | 7 | -1 | -1 | -1 |
4 | 2 | -1 | -1 | -1 |
构建哈夫曼树
循环找到最权值最小的两个结点,把它们的下标存进后面结点的左右孩子下标中,再将它们的父结点下标由-1改为对应数值。再下一次筛选权值最小结点时,通过判断该结点父结点下标是否是-1来确定它是否进入候选名单。
//构建哈夫曼树
void createHuffmanTree(HuffmanTree &huffmanTree, int numLeaf) {
int minWeight1, minWeight2, minPos1, minPos2; //用于存储权值最小的两个结点的权值和下标
for (int i = 0; i < numLeaf - 1; ++i) {
minPos1 = minPos2 = -1; //下标默认为-1
minWeight1 = minWeight2 = INT_MAX; //将权值默认设置为最大的整型,叶子结点的权值一定比其小
for (int j = 0; j < numLeaf + i; ++j) { //循环寻找最小且不存在父结点的两个结点
if (huffmanTree[j].weight < minWeight2 && huffmanTree[j].parent == -1) {
minWeight2 = huffmanTree[j].weight;
minPos2 = j;
if (minWeight2 < minWeight1) { //将权值较小的结点放到左边
minWeight2 = minWeight1;
minPos2 = minPos1;
minWeight1 = huffmanTree[j].weight;
minPos1 = j;
}
}
} //以下是更改节点合并后它们的信息
huffmanTree[minPos1].parent = huffmanTree[minPos2].parent = numLeaf + i;
huffmanTree[numLeaf + i].weight = minWeight1 + minWeight2;
huffmanTree[numLeaf + i].parent = -1;
huffmanTree[numLeaf + i].lChild = minPos1;
huffmanTree[numLeaf + i].rChild = minPos2;
}
}
函数执行完后数据结构如下表所示:
结点下标 | 结点权值 | 父结点下标 | 左孩子下标 | 右孩子下标 |
---|---|---|---|---|
0 | 1 | 5 | -1 | -1 |
1 | 3 | 6 | -1 | -1 |
2 | 2 | 5 | -1 | -1 |
3 | 7 | 8 | -1 | -1 |
4 | 2 | 6 | -1 | -1 |
5 | 3 | 7 | 0 | 2 |
6 | 5 | 7 | 4 | 1 |
7 | 8 | 8 | 5 | 6 |
8 | 15 | -1 | 3 | 7 |
将树的左分支编码为0右分支编码为1对上述两棵树用不同编码方式可得:
编码方式 | 结点a | 结点b | 结点c | 结点d | WPL |
---|---|---|---|---|---|
固定长度编码 | 00 | 01 | 10 | 11 | 200 |
哈夫曼编码 | 00 | 011 | 1 | 010 | 130 |
代码实现哈夫曼编码
//构建哈夫曼编码
void createHuffmanCode(HuffmanTree huffmanTree, int numLeaf, char **&huffmanCode) {
huffmanCode = (char **) malloc(sizeof(char *) * numLeaf);
for (int i = 0; i < numLeaf; ++i) {
huffmanCode[i] = (char *) malloc(sizeof(char) * numLeaf);
memset(huffmanCode[i], '\0', sizeof(char *));
}
for (int i = 0; i < numLeaf; ++i) { //从叶子结点开始寻找根结点
HTNode *p = &huffmanTree[i];
while (p->parent != -1) {
char *tempCode = (char *) malloc(sizeof(char) * numLeaf);
memset(tempCode, '\0', sizeof(char *));
if (&huffmanTree[huffmanTree[p->parent].lChild] == p) {
tempCode[0] = '0';
} else {
tempCode[0] = '1';
}
strcat(tempCode, huffmanCode[i]);
free(huffmanCode[i]);
huffmanCode[i] = tempCode;
p = &huffmanTree[p->parent];
}
}
}
带权路径长度等于所有叶子结点的带权路径长度之和比如上图两个二叉树的带权路径长度分别是:
W
P
L
1
=
2
×
(
10
+
8
+
80
+
2
)
=
200
WPL_1=2×(10+8+80+2)=200
WPL1?=2×(10+8+80+2)=200
W
P
L
2
=
1
×
80
+
2
×
10
+
3
×
(
2
+
8
)
=
130
WPL_2=1×80+2×10+3×(2+8)=130
WPL2?=1×80+2×10+3×(2+8)=130
代码实现计算WPL
//计算带权路径长度
int calculateWPL(HuffmanTree huffmanTree, int numLeaf) {
int WPL = 0;
for (int i = 0; i < numLeaf; ++i) { //从叶子结点开始寻找根结点
HTNode *p = &huffmanTree[i];
int weight = p->weight;
int depth = 0; //记录叶子结点到根的深度
while (p->parent != -1) { //循环寻找根结点
depth++; //每向上寻找一次深度就加一
p = &huffmanTree[p->parent];
}
WPL += weight * depth; //将每个叶子结点的带权路径长度累加起来
}
return WPL;
}
压缩率就是由完全二叉树的带权路径长度与之对应的哈夫曼树的带权路径长度的比值,例如上图中的二叉树哈夫曼编码对于固定长度编码:
压缩率 = 130 200 × 100 % = 65 % 压缩率= \frac{130}{200} ×100\%=65\% 压缩率=200130?×100%=65%
#include <iostream>
#include <cstring>
using namespace std;
//哈夫曼树结点
typedef struct HTNode {
int parent; //父结点下标
int lChild; //左孩子下标
int rChild; //右孩子下标
int weight; //结点权值
} HTNode, *HuffmanTree;
//初始化哈夫曼树
void initHuffmanTree(HuffmanTree &huffmanTree, int numLeaf) {
huffmanTree = (HTNode *) malloc(sizeof(HTNode) * (2 * numLeaf - 1)); //哈夫曼树总结点数永远是叶子结点数的两倍减一
for (int i = 0; i < numLeaf; ++i) { //依次记录哈每个叶子结点的权值
huffmanTree[i].parent = huffmanTree[i].lChild = huffmanTree[i].rChild = -1;
cout << "请输入第" << i + 1 << "个结点的权值:";
cin >> huffmanTree[i].weight;
}
}
//构建哈夫曼树
void createHuffmanTree(HuffmanTree &huffmanTree, int numLeaf) {
int minWeight1, minWeight2, minPos1, minPos2; //用于存储权值最小的两个结点的权值和下标
for (int i = 0; i < numLeaf - 1; ++i) {
minPos1 = minPos2 = -1; //下标默认为-1
minWeight1 = minWeight2 = INT_MAX; //将权值默认设置为最大的整型,叶子结点的权值一定比其小
for (int j = 0; j < numLeaf + i; ++j) { //循环寻找最小且不存在父结点的两个结点
if (huffmanTree[j].weight < minWeight2 && huffmanTree[j].parent == -1) {
minWeight2 = huffmanTree[j].weight;
minPos2 = j;
if (minWeight2 < minWeight1) { //将权值较小的结点放到左边
minWeight2 = minWeight1;
minPos2 = minPos1;
minWeight1 = huffmanTree[j].weight;
minPos1 = j;
}
}
} //以下是更改节点合并后它们的信息
huffmanTree[minPos1].parent = huffmanTree[minPos2].parent = numLeaf + i;
huffmanTree[numLeaf + i].weight = minWeight1 + minWeight2;
huffmanTree[numLeaf + i].parent = -1;
huffmanTree[numLeaf + i].lChild = minPos1;
huffmanTree[numLeaf + i].rChild = minPos2;
}
}
//构建哈夫曼编码
void createHuffmanCode(HuffmanTree huffmanTree, int numLeaf, char **&huffmanCode) {
huffmanCode = (char **) malloc(sizeof(char *) * numLeaf);
for (int i = 0; i < numLeaf; ++i) {
huffmanCode[i] = (char *) malloc(sizeof(char) * numLeaf);
memset(huffmanCode[i], '\0', sizeof(char *));
}
for (int i = 0; i < numLeaf; ++i) { //从叶子结点开始寻找根结点
HTNode *p = &huffmanTree[i];
while (p->parent != -1) {
char *tempCode = (char *) malloc(sizeof(char) * numLeaf);
memset(tempCode, '\0', sizeof(char *));
if (&huffmanTree[huffmanTree[p->parent].lChild] == p) {
tempCode[0] = '0';
} else {
tempCode[0] = '1';
}
strcat(tempCode, huffmanCode[i]);
free(huffmanCode[i]);
huffmanCode[i] = tempCode;
p = &huffmanTree[p->parent];
}
}
}
//计算带权路径长度
int calculateWPL(HuffmanTree huffmanTree, int numLeaf) {
int WPL = 0;
for (int i = 0; i < numLeaf; ++i) { //从叶子结点开始寻找根结点
HTNode *p = &huffmanTree[i];
int weight = p->weight;
int depth = 0; //记录叶子结点到根的深度
while (p->parent != -1) { //循环寻找根结点
depth++; //每向上寻找一次深度就加一
p = &huffmanTree[p->parent];
}
WPL += weight * depth; //将每个叶子结点的带权路径长度累加起来
}
return WPL;
}
//打印哈夫曼树信息
void printHuffmanTree(HuffmanTree huffmanTree, int numLeaf, char **huffmanCode) {
cout.setf(ios::left);
cout.width(15);
cout << "结点下标";
cout.width(15);
cout << "结点权值";
cout.width(15);
cout << "父结点下标";
cout.width(15);
cout << "左孩子下标";
cout.width(15);
cout << "右孩子下标";
cout.width(15);
cout << "哈夫曼编码" << endl;
for (int i = 0; i < numLeaf * 2 - 1; ++i) {
cout.width(15);
cout << i;
cout.width(15);
cout << huffmanTree[i].weight;
cout.width(15);
cout << huffmanTree[i].parent;
cout.width(15);
cout << huffmanTree[i].lChild;
cout.width(15);
cout << huffmanTree[i].rChild;
if (i < numLeaf) {
cout.width(15);
cout << huffmanCode[i];
}
cout << endl;
}
cout << "WPL=" << calculateWPL(huffmanTree, numLeaf) << endl;
}
int main() {
HuffmanTree huffmanTree;
char **huffmanCode = nullptr;
int numLeaf;
cout << "请输入叶子结点个数:";
cin >> numLeaf;
initHuffmanTree(huffmanTree, numLeaf);
createHuffmanTree(huffmanTree, numLeaf);
createHuffmanCode(huffmanTree, numLeaf, huffmanCode);
printHuffmanTree(huffmanTree, numLeaf, huffmanCode);
system("pause");
return 0;
}
程序运行截图