Source for file HuffmanCompress.php
Documentation is available at HuffmanCompress.php
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
// +----------------------------------------------------------------------+
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2002 The PHP Group |
// +----------------------------------------------------------------------+
// | This source file is subject to version 2.0 of the PHP license, |
// | that is bundled with this package in the file LICENSE, and is |
// | available at through the world-wide-web at |
// | http://www.php.net/license/2_02.txt. |
// | If you did not receive a copy of the PHP license and are unable to |
// | obtain it through the world-wide-web, please send a note to |
// | license@php.net so we can mail you a copy immediately. |
// +----------------------------------------------------------------------+
// | Authors: Markus Nix <mnix@docuverse.de> |
// | David Holmes <exaton@free.fr> (original version) |
// +----------------------------------------------------------------------+
require_once 'Text/Huffman.php';
* Huffman Compression Class
* Size of the input file, in bytes
* Array of letter occurrences
* Index of the root of the Huffman tree
* Array of character codes
* Array of character code lengths
// Initializing compression-specific variables
throw new Exception ('Files not provided.');
// Counting letter occurrences in input file
$this->_countOccurrences ();
// Converting occurrences into basic nodes
// The nodes array has been initialized, as it will be filled with dynamic incrementation
$this->_occurrencesToNodes ();
// Construction of the Huffman tree
$this->_makeHuffmanTree ();
// Constructing character codes
// !! No need for 8 bits of nb of chars in alphabet ?? still use $this->nbchars ? NO
// !! No need for 8+5+codelen bits of chars & codes ?? still use $this->_codelens array ? YES
// Header : passing the Huffman tree with an automatically stopping algorithm
// End of header : number of chars actually encoded, over 3 bytes
// Contents: compressed data
// Finalising output, closing file handles
* setFiles() is called to specify the paths to the input and output files.
* It calls a parent function for its role, then sets some compression-
* specific variables concerning files.
public function setFiles($ifile = '', $ofile = '')
// Calling the parent function for this role
// Setting compression-specific variables concerning files
* Show info on characters codes created from the Huffman tree.
// Preparing informative $scodes array
foreach ($this->_occ as $char => $nbocc)
$tmp = ' (ASCII : ' . ord($char) . ')';
for ($i = 0; $i < 6 - strlen($nbocc); $i++ ) {
$scodes[$schar] = '(' . $nboccprefix . $nbocc . ' occurences, or ' . $occpercent . '%) ' . $this->_codes[$char] . ' (code on ' . $this->_codelens[$char] . ' bits)' . $tmp;
* Calculate compression ration.
// Simulating output file size
foreach ($this->_occ as $char => $nbocc)
$csize += 16 * ($nbchars - 1 ); // For Huffman tree in header
$csize += 24; // For nb. chars to read
$csize = ceil($csize / 8 );
* Count character occurrences in the file, to identify information
* quantities and later construct the Huffman tree.
private function _countOccurrences ()
if (!isset ($this->_occ[$char])) {
* Convert the character occurrences to basic Nodes of according weight.
private function _occurrencesToNodes ()
foreach ($this->_occ as $char => $nboccs)
* Get the index of the first node of lightest weight in the nodes array.
private function _findLightestNode ()
foreach ($this->_nodes as $nodenum => $node)
if (!$node['_lndone'] && ($minw == -1 || $node['_w'] < $minw)) {
$minw_nodenum = $nodenum;
* Create the Huffman tree, after the following algorithm :
* - Find the two nodes of least weight (least info value)
* - Set each one's parent to the index a new node which has a weight equal to the sum of weights of the two
* - At the same time, specify the new nodes children as being the two lightest nodes
* - Eliminate the two lightest nodes from further searches for lightest nodes
* This carries on until there is only one node difference between nodes
* constructed and nodes done : the root of the tree.
* By following the tree from root down to leaf, by successive children 0 or
* 1, we can thereafter establish the code for the character.
private function _makeHuffmanTree ()
while ($nbnodesdone < $nbnodes - 1 ) {
// Find two lightest nodes and consider them done
for ($i = 0; $i < 2; $i++ ) {
$ln[$i] = $this->_findLightestNode ();
$this->_nodes[$ln[$i]]['_lndone'] = true;
// Link them with a parent node of sum weight
// (whose parent is as yet unknown ; in the case of root, it will stay with -1)
'_w' => $this->_nodes[$ln[0 ]]['_w'] + $this->_nodes[$ln[1 ]]['_w'],
$this->_nodes[$ln[0 ]]['_par'] = $nbnodes; // The number of nodes before incrementation is the index
$this->_nodes[$ln[1 ]]['_par'] = $nbnodes; // of the node which has just been created
// Note that the last node is the root of the tree
* Read the Huffman tree to determine character codes.
private function _makeCharCodes ()
// Note : original alphabet is the keys of $occ
foreach ($this->_occ as $char => $nbocc) {
// Following tree back up to root
// (therefore _pre_positionning each new bit in the code)
// $this->nodes[$i] is the original Node of $char
$parnode = $this->_nodes[$curnode]['_par'];
$code = (($this->_nodes[$parnode]['_child0'] == $curnode)? '0' : '1') . $code;
} while ($curnode != $this->_hroot);
private function _transmitTree ()
// Launching the business, specifying that we are starting at root
$this->_transmitTreePart ($this->_hroot, true );
* Transmit the Huffman tree in header.
private function _transmitTreePart ($nodenum, $isroot)
// Transmitting current node representation, if we are not working with root (that's only the first time).
// Then looking at children if appropriate (gee that sounds bad).
$curnode = $this->_nodes[$nodenum];
$char = $curnode['_char'];
// Being root can only be in this case
$this->_transmitTreePart ($curnode['_child0'], false );
$this->_transmitTreePart ($curnode['_child1'], false );
// Just transmitting the char
Documentation generated on Mon, 11 Mar 2019 13:54:22 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|