Huffman Coding

ํ—ˆํ”„๋งŒ ๋ถ€ํ˜ธํ™”(Huffman coding)๋Š” ๋ฌด์†์‹ค ์••์ถ•์— ์“ฐ์ด๋Š” ์—”ํŠธ๋กœํ”ผ ๋ถ€ํ˜ธํ™”์˜ ์ผ์ข…์œผ๋กœ, ๋ฐ์ดํ„ฐ ๋ฌธ์ž์˜ ๋“ฑ์žฅ ๋นˆ๋„์— ๋”ฐ๋ผ์„œ ๋‹ค๋ฅธ ๊ธธ์ด์˜ ๋ถ€ํ˜ธ๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ์•Œ๊ณ ๋ฆฌ์ฆ˜์ด๋‹ค.

  • ์••์ถ•์ด๋ž€ ์—„์ฒญ๋‚˜๊ฒŒ ํฐ ๋ฌธ์ œ(๋งŽ์€ ๋น„ํŠธ๋ฅผ ํ•„์š”๋กœ ํ•˜๋Š”)์˜ ์ •๋ณด๋ฅผ ๊ณ ์Šค๋ž€ํžˆ ํ‘œํ˜„ํ•˜๋ฉด์„œ๋„ ์ „์ฒด ๋น„ํŠธ๋Ÿ‰์„ ์ค„์ผ ์ˆ˜ ์žˆ๋Š” ํšจ๊ณผ์ ์ธ ๋ถ€ํ˜ธํ™” ๊ธฐ์ˆ ์˜ ๊ณผ์ •์ด๋‹ค.

  • ๋ฌด์†์‹ค ์••์ถ•์€ ๋ฐ์ดํ„ฐ ์••์ถ•์˜ ์ผ์ข…์œผ๋กœ ์†์‹ค ์••์ถ•์˜ ๋ฐ˜๋Œ€๋ง์ด๋‹ค. ์›๋ž˜์˜ ์ •๋ณด๋ฅผ ๊ทธ๋Œ€๋กœ ๋ณด์กดํ•ด์•ผ ํ•˜๊ธฐ ๋•Œ๋ฌธ์—, ์ •๋ณด ์—”ํŠธ๋กœํ”ผ์˜ ํ•œ๊ณ„๊ฐ€ ๊ทธ๋Œ€๋กœ ๋ฐ˜์˜๋œ๋‹ค. ์—ฌ๊ธฐ์—์„œ ์ •๋ณด ์—”ํŠธ๋กœํ”ผ์˜ ํ•œ๊ณ„๋ž€ ๊ฐœ๋ณ„ ์ •๋ณด์˜ ํ™•๋ฅ ๊ฐ’์— ์˜ํ•˜์—ฌ ๊ณ„์‚ฐ๋˜๋Š” ๊ฐ’์ด ์•„๋‹Œ, ์ „์ฒด ์‹ ํ˜ธ์˜ ์ƒ๊ด€๊ด€๊ณ„๋ฅผ ๋ฐ˜์˜ํ•œ ํ•œ๊ณ„๊ฐ’์ด๋‹ค.

  • ์—”ํŠธ๋กœํ”ผ ์ธ์ฝ”๋”ฉ or ์—”ํŠธ๋กœํ”ผ ๋ถ€ํ˜ธํ™”(entropy encoding) : ์‹ฌ๋ณผ์ด ๋‚˜์˜ฌ ํ™•๋ฅ ์— ๋”ฐ๋ผ ์‹ฌ๋ณผ์„ ๋‚˜ํƒ€๋‚ด๋Š” ์ฝ”๋“œ์˜ ๊ธธ์ด๋ฅผ ๋‹ฌ๋ฆฌํ•˜๋Š” ๋ถ€ํ˜ธํ™” ๋ฐฉ๋ฒ•

    • ์—”ํŠธ๋กœํ”ผ๋Š” ๊ฐ ๊ธฐํ˜ธ๊ฐ€ ํฌํ•จํ•˜๋Š” ํ‰๊ท  ์ •๋ณด๋Ÿ‰์„ ์˜๋ฏธ

    • Shannon ๋ถ€ํ˜ธํ™” ์ด๋ก ์—์„œ๋Š” ์—”ํŠธ๋กœํ”ผ๋ฅผ ์šฐ๋ฆฌ๊ฐ€ ํ•  ์ˆ˜ ์žˆ๋Š” ์ตœ์„ ์ด๋ผ ๋งํ•œ๋‹ค.

    • ์ž์ฃผ ๋ฐœ์ƒํ•˜๋Š” ๊ธฐํ˜ธ๋ฅผ ์ฐพ์•„๋‚ด๋Š” ๋ฐ ๋ชฉ์ ์ด์žˆ๋‹ค.

  • ๊ฐ€๋ณ€ ๊ธธ์ด ๋ถ€ํ˜ธํ™”(VLC) : ๋” ์ž์ฃผ ๋ฐœ์ƒํ•˜๋Š” ๊ธฐํ˜ธ๋Š” ๋” ์ ์€ ๋น„ํŠธ๋กœ ๋ถ€ํ˜ธํ™”

    • ์ฆ‰, ์ž์ฃผ ๋ฐœ์ƒํ•˜๋Š” ๊ธฐํ˜ธ๋Š” ๋นจ๋ฆฌ ์ „์†ก๋  ์ˆ˜ ์žˆ๋Š” ์ฝ”๋“œ(์ ์€ ๋น„ํŠธ), ์ž์ฃผ ๋ฐœ์ƒํ•˜์ง€ ์•Š๋Š” ๊ฒƒ์€ ๊ธด ์ฝ”๋“œ๊ฐ€ ๋ถ€์—ฌ๋œ๋‹ค.

ํ—ˆํ”„๋งŒ ์•Œ๊ณ ๋ฆฌ์ฆ˜์˜ ๋ถ€ํ˜ธํ™” ์ˆœ์„œ๋Š” ์•„๋ž˜์—์„œ ์œ„๋กœ ์ง„ํ–‰ํ•˜๋Š” ๋ฐฉ๋ฒ•์ด๋‹ค.

์•Œ๊ณ ๋ฆฌ์ฆ˜

  1. ์ดˆ๊ธฐํ™” : ๋ชจ๋“  ๊ธฐํ˜ธ๋ฅผ ์ถœํ˜„ ๋นˆ๋„์ˆ˜์— ๋”ฐ๋ผ ๋‚˜์—ดํ•œ๋‹ค.

  2. ๋‹จ ํ•œ ๊ฐ€์ง€ ๊ธฐํ˜ธ๊ฐ€ ๋‚จ์„ ๋•Œ๊นŒ์ง€ ์•„๋ž˜ ๋‹จ๊ณ„๋ฅผ ๋ฐ˜๋ณตํ•œ๋‹ค.

    1. ๋ชฉ๋ก์œผ๋กœ๋ถ€ํ„ฐ ๊ฐ€์žฅ ๋นˆ๋„๊ฐ€ ๋‚ฎ์€ ๊ฒƒ์„ 2๊ฐœ ๊ณ ๋ฅธ๋‹ค.

    2. ํ—ˆํ”„๋งŒ์ด ๋‘ ๊ฐ€์ง€ ๊ธฐํ˜ธ๋ฅผ ๋ถ€๋ชจ ๋…ธ๋“œ๋ฅผ ๊ฐ€์ง€๋Š” ๋ถ€ํŠธ๋ฆฌ๋ฅผ ๊ตฌ์„ฑํ•˜๊ณ  ์ž์‹ ๋…ธ๋“œ๋ฅด ์ƒ์„ฑํ•œ๋‹ค.

    3. ๋ถ€๋ชจ ๋…ธ๋“œ ๋‹จ ๊ธฐํ˜ธ๋“ค์˜ ๋นˆ๋„์ˆ˜๋ฅผ ๋”ํ•˜์—ฌ ์ฃผ ๋…ธ๋“œ์— ํ• ๋‹นํ•˜๊ณ  ๋ชฉ๋ก์˜ ์ˆœ์„œ์— ๋งž๋„๋ก ๋ชฉ๋ก์— ์‚ฝ์ž…ํ•œ๋‹ค.

    4. ๋ชฉ๋ก์—์„œ ๋ถ€๋ชจ๋…ธ๋“œ์— ํฌํ•จ๋œ ๊ธฐํ˜ธ๋ฅผ ์ œ๊ฑฐํ•œ๋‹ค.

  3. ๋ฃจํŠธ๋…ธ๋“œ๋กœ๋ถ€ํ„ฐ์˜ ๊ฒฝ๋กœ์—์„œ ๊ฐ ๊ฐ€์ง€์— ์ฝ”๋“œ์›Œ๋“œ๋ฅผ ๋ถ€์—ฌํ•œ๋‹ค.

ํ—ˆํ”„๋งŒ ์•Œ๊ณ ๋ฆฌ์ฆ˜์€ ์ž…๋ ฅ ๊ธฐํ˜ธ๋ฅผ ๋ฆฌํ”„ ๋…ธ๋“œ๋กœ ํ•˜๋Š” ์ด์ง„ ํŠธ๋ฆฌ๋ฅผ ๋งŒ๋“ค์–ด์„œ ์ ‘๋‘ ๋ถ€ํ˜ธ๋ฅผ ๋งŒ๋“ค์–ด ๋‚ด๋Š” ์•Œ๊ณ ๋ฆฌ์ฆ˜์ด๋‹ค.

๊ตฌํ˜„

Huffman ์•Œ๊ณ ๋ฆฌ์ฆ˜์€ ์ตœ์†Œ๋นˆ๋„ ์ˆ˜๋ฅผ ์ถœ๋ ฅํ•˜๊ธฐ ์œ„ํ•ด์„œ ์šฐ์„  ์ˆœ์œ„ ํ๋ฅผ ํ•„์š”๋กœํ•œ๋‹ค. ์ตœ์†Œํž™์„ ์‚ฌ์šฉํ•ด์„œ ๊ตฌํ˜„ํ•ด๋ณด์•˜๋‹ค.

๊ตฌ์กฐ์ฒด

// ๊ธฐ๋ณธ์ ์œผ๋กœ ๋…ธ๋“œ๋Š” data์ •๋ณด์™€ ๋นˆ๋„์ˆ˜๋Š” ์Œ์ˆ˜๊ฐ€์•„๋‹Œ int๊ฐ’์„ ๊ฐ–๋Š”๋‹ค.
// ํ—ˆํ”„๋งŒ์€ ์ด์ง„ํŠธ๋ฆฌ ๊ตฌ์กฐ์ด๋ฏ€๋กœ left์™€ right์˜ ํฌ์ธํ„ฐ๋ฅผ ๊ฐ–๊ณ ์žˆ๋‹ค.
typedef struct node{
    char data;
    unsigned int frequency;
    struct node * left, *right;
}Node;

// heap๋Š” heap์˜ ํฌ๊ธฐ, node array๋ฅผ ๊ฐ–๊ณ ์žˆ๋‹ค.
// capacity๋Š” ๋ฐฐ์—ด์˜ ํฌ๊ธฐ๋ฅผ ๋งํ•œ๋‹ค.
typedef struct heap{
    unsigned int size;
    unsigned int capacity;
    Node ** array;
}Heap;

new_node

Node * new_node(char data, unsigned freq){
    Node * new = (Node*)malloc(sizeof(Node));

    new->data=data;
    new->frequency=freq;
    new->left=new->right=NULL;

    return new;
}

Heap ์ดˆ๊ธฐํ™”

Heap * init(unsigned capacity){
    Heap * heap = (Heap*)malloc(sizeof(Heap));

    // ํ˜„์žฌ heap์˜ ํฌ๊ธฐ๋Š” 0์ด๋‹ค.
    heap->size=0;
    heap->capacity=capacity;
    // ๋ฐฐ์—ด์˜ ํฌ๊ธฐ ๋งŒํผ ๊ณต๊ฐ„์„ ํ• ๋‹นํ•ด์ค€๋‹ค.
    heap->array = (Node**)malloc(heap->capacity*sizeof(Node*));

    return heap;
}

heapify

๋‘ ๊ฐœ์˜ subtree๊ฐ€ min heap์ผ๋•Œ root์— ์ถ”๊ฐ€๋œ ๋…ธ๋“œ๋ฅผ ํฌํ•จํ•œ ์ „์ฒด๊ฐ€ heap์˜ ์กฐ๊ฑด์„ ๋งŒ์กฑํ•˜๋„๋ก ๊ฐ ๋…ธ๋“œ์˜ ์œ„์น˜๋ฅผ ์กฐ์ •

void swap_node(Node ** a, Node ** b){
    Node * tmp = *a;
    *a = *b;
    *b = tmp;
}

// heapify : ๋‘ ๊ฐœ์˜ subtree๊ฐ€ min heap์ผ๋•Œ root์— ์ถ”๊ฐ€๋œ ๋…ธ๋“œ๋ฅผ ํฌํ•จํ•œ ์ „์ฒด๊ฐ€
// heap์„ ๋งŒ์กฑํ•˜๋„๋ก ๊ฐ ๋…ธ๋“œ์˜ ์œ„์น˜๋ฅผ ์กฐ์ •
void heapify(Heap * h, int i){
    int min = i;
    int left = 2*i +1;    // ์™ผ์ชฝ ์ž์‹๋…ธ๋“œ
    int right = 2*i+2;    // ์˜ค๋ฅธ์ชฝ ์ž์‹๋…ธ๋“œ

    // ์™ผ์ชฝ ์ž์‹ ๋…ธ๋“œ์˜ ์ธ๋ฑ์Šค๊ฐ€ heap์˜ ํฌ๊ธฐ๋ณด๋‹ค ์ž‘๊ณ 
    // frequency๊ฐ’์ด ์ตœ์†Œ๊ฐ’์˜ frequency๋ณด๋‹ค ์ž‘์œผ๋ฉด ์ตœ์†Œ๊ฐ’์„ left๋กœ ๋ณ€๊ฒฝ
    if(left < h->size && h->array[left]->frequency < h->array[min]->frequency)min=left;
    // ์˜ค๋ฅธ์ชฝ ์ž์‹๋…ธ๋“œ๋„ ๋งˆ์ฐฌ๊ฐ€์ง€
    if(right < h->size && h->array[right]->frequency < h->array[min]->frequency)min=right;
    // ๋งŒ์•ฝ ์ตœ์†Œ๊ฐ’์ด i์™€ ๋‹ค๋ฅด๋‹ค๋ฉด ๋…ธ๋“œ์œ„์น˜๋ฅผ ๋ณ€๊ฒฝํ•ด์ค€๋‹ค.
    if(min!=i){
        swap_node(&h->array[min], &h->array[i]);
        heapify(h,min);
    }
}

delete

Node * delete(Heap * h){
    Node * tmp = h->array[0];
    h->array[0]=h->array[--h->size];
    heapify(h, 0);
    return tmp;
}

insert

void insert(Heap * h, Node * node){
    int p; //ํ˜„์žฌ ์œ„์น˜ ์ €์žฅ

    // ๋งˆ์ง€๋ง‰ ์œ„์น˜์— ์ถ”๊ฐ€
    p=++h->size-1;;

    // ์œ„์น˜ ์กฐ์ •
    while(node->frequency < h->array[(p-1)/2]->frequency){
        h->array[p] = h->array[(p-1)/2];
        p=(p-1)/2;
    }
    h->array[p]=node;
}

heap ์ƒ์„ฑ

void build_heap(Heap * h){
    int n = h->size-1;
    int i;
    for(i=(n-1)/2;i>=0;i--)
        heapify(h, i);
}

Heap * create_heap(char data[], int freq[],int size){
    //์ดˆ๊ธฐํ™”ํ•ด์„œ ์ƒ์„ฑํ•œ๋‹ค.
    Heap * heap = init(size);

    // ๋นˆ๋„์ˆ˜์™€ ๋ฐ์ดํ„ฐ๊ฐ’์„ ๋„ฃ์€ ๋…ธ๋“œ๋ฅผ ์ƒ์„ฑํ•ด์ค€๋‹ค.
    for(int i=0;i<size;i++)
        heap->array[i]=new_node(data[i], freq[i]);

    heap->size = size;
    build_heap(heap);
    return heap;
}

์ถœ๋ ฅ

//๋ฐฐ์—ด ์›์†Œ ๊ฐ’ ์ถœ๋ ฅ
void print(int arr[],int n){
    for(int i=0;i<n;i++)
        printf("%d",arr[i]);
    printf("\n");
}
int is_leaf(Node * root){
    return !(root->left) && !(root->right);   
}
// ์ตœ์ข… ์ฝ”๋“œ ์ถœ๋ ฅ
void print_code(Node * root, int arr[], int top){
    // ์™ผ์ชฝ ์ž์‹๋…ธ๋“œ(๊ฐ€์ง€)๋Š” 0์ €์žฅ
    if(root->left){
        arr[top]=0;
        print_code(root->left, arr,top+1);
    }
    // ์˜ค๋ฅธ์ชฝ ์ž์‹๋…ธ๋“œ(๊ฐ€์ง€)๋Š” 1์ €์žฅ
    if(root->right){
        arr[top]=1;
        print_code(root->right, arr,top+1);
    }
    // ๋‹จ๋ง๋…ธ๋“œ๋ฉด ์ฝ”๋“œ ์ถœ๋ ฅ
    if(is_leaf(root)){
        printf("%c: ",root->data);
        print(arr, top);
    }
}

Huffman ์ƒ์„ฑ

// heap์˜ ํฌ๊ธฐ๊ฐ€ 1์ธ์ง€ ์•„๋‹Œ์ง€ ๊ฒ€์‚ฌ
int is_one(Heap * h){
    return (h->size==1);
}
Node * build_huffman(char data[],int freq[], int size){
    Node * left, *right, *top;

    Heap * heap = create_heap(data, freq, size);

    while(!is_one(heap)){
        left = delete(heap);
        right = delete(heap);

        top = new_node('$',left->frequency+right->frequency);
        top->left=left;
        top->right= right;

        insert(heap, top);
    }
    return delete(heap);
}


void huffman(char data[],int freq[],int size){
    Node * root = build_huffman(data, freq, size);

    int arr[MAX],top=0;

    print_code(root, arr,top);
}

Main

int main(){
    char arr[] = { 'a', 'b', 'c', 'd', 'e', 'f' };
    int freq[] = { 5, 9, 12, 13, 16, 45 };

    int size = sizeof(arr) / sizeof(arr[0]);

    huffman(arr, freq, size);   
}

๊ฒฐ๊ณผ

f: 0
c: 100
d: 101
a: 1100
b: 1101
e: 111

์‘์šฉ

  • ํŒฉ์Šค

  • JPEG

  • MPEG

Last updated