Source for file Stats.php
Documentation is available at Stats.php
// +----------------------------------------------------------------------+
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2003 The PHP Group |
// +----------------------------------------------------------------------+
// | This source file is subject to version 2.0 of the PHP license, |
// | that is bundled with this package in the file LICENSE, and is |
// | available at through the world-wide-web at |
// | http://www.php.net/license/2_02.txt. |
// | If you did not receive a copy of the PHP license and are unable to |
// | obtain it through the world-wide-web, please send a note to |
// | license@php.net so we can mail you a copy immediately. |
// +----------------------------------------------------------------------+
// | Authors: Jesus M. Castagnetto <jmcastagnetto@php.net> |
// +----------------------------------------------------------------------+
// $Id: Stats.php 303981 2010-10-04 12:07:56Z clockwerx $
// Constants for defining the statistics to calculate /*{{{*/
* STATS_BASIC to generate the basic descriptive statistics
* STATS_FULL to generate also higher moments, mode, median, etc.
// Constants describing the data set format /*{{{*/
* STATS_DATA_SIMPLE for an array of numeric values. This is the default.
* e.g. $data = array(2,3,4,5,1,1,6);
define('STATS_DATA_SIMPLE', 0 );
* STATS_DATA_CUMMULATIVE for an associative array of frequency values,
* where in each array entry, the index is the data point and the
* value the count (frequency):
* e.g. $data = array(3=>4, 2.3=>5, 1.25=>6, 0.5=>3)
define('STATS_DATA_CUMMULATIVE', 1 );
// Constants defining how to handle nulls /*{{{*/
* STATS_REJECT_NULL, reject data sets with null values. This is the default.
* Any non-numeric value is considered a null in this context.
define('STATS_REJECT_NULL', -1 );
* STATS_IGNORE_NULL, ignore null values and prune them from the data.
* Any non-numeric value is considered a null in this context.
define('STATS_IGNORE_NULL', -2 );
* STATS_USE_NULL_AS_ZERO, assign the value of 0 (zero) to null values.
* Any non-numeric value is considered a null in this context.
define('STATS_USE_NULL_AS_ZERO', -3 );
* A class to calculate descriptive statistics from a data set.
* Data sets can be simple arrays of data, or a cummulative hash.
* The second form is useful when passing large data set,
* for example the data set:
* $data1 = array (1,2,1,1,1,1,3,3,4.1,3,2,2,4.1,1,1,2,3,3,2,2,1,1,2,2);
* can be epxressed more compactly as:
* $data2 = array('1'=>9, '2'=>8, '3'=>5, '4.1'=>2);
* include_once 'Math/Stats.php';
* // $s->setData($data2, STATS_DATA_CUMMULATIVE);
* $stats = $s->calcBasic();
* echo 'Mean: '.$stats['mean'].' StDev: '.$stats['stdev'].' <br />\n';
* // using data with nulls
* // first ignoring them:
* $data3 = array(1.2, 'foo', 2.4, 3.1, 4.2, 3.2, null, 5.1, 6.2);
* $s->setNullOption(STATS_IGNORE_NULL);
* $stats3 = $s->calcFull();
* // and then assuming nulls == 0
* $s->setNullOption(STATS_USE_NULL_AS_ZERO);
* $stats3 = $s->calcFull();
* Originally this class was part of NumPHP (Numeric PHP package)
* @author Jesus M. Castagnetto <jmcastagnetto@php.net>
* The simple or cummulative data set.
* Expanded data set. Only set when cummulative data
* is being used. Null by default.
var $_dataExpanded = null;
* Flag for data type, one of STATS_DATA_SIMPLE or
* STATS_DATA_CUMMULATIVE. Null by default.
* Flag for null handling options. One of STATS_REJECT_NULL,
* STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO
* Array for caching result values, should be reset
var $_calculatedValues = array ();
* Constructor for the class
* @param optional int $nullOption how to handle null values
* @return object Math_Stats
function Math_Stats($nullOption=STATS_REJECT_NULL ) {/*{{{*/
$this->_nullOption = $nullOption;
* Sets and verifies the data, checking for nulls and using
* the current null handling option
* @param array $arr the data set
* @param optional int $opt data format: STATS_DATA_CUMMULATIVE or STATS_DATA_SIMPLE (default)
* @return mixed true on success, a PEAR_Error object otherwise
function setData($arr, $opt=STATS_DATA_SIMPLE ) {/*{{{*/
return PEAR ::raiseError ('invalid data, an array of numeric data was expected');
$this->_dataExpanded = null;
$this->_dataOption = null;
$this->_calculatedValues = array ();
$this->_dataOption = $opt;
$this->_dataOption = $opt;
$this->_dataExpanded = array ();
return $this->_validate ();
* Returns the data which might have been modified
* according to the current null handling options.
* @param boolean $expanded whether to return a expanded list, default is false
* @return mixed array of data on success, a PEAR_Error object otherwise
function getData($expanded=false ) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
return $this->_dataExpanded;
* Sets the null handling option.
* Must be called before assigning a new data set containing null values
* @return mixed true on success, a PEAR_Error object otherwise
$this->_nullOption = $nullOption;
return PEAR ::raiseError ('invalid null handling option expecting: '.
'STATS_REJECT_NULL, STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO');
* Transforms the data by substracting each entry from the mean and
* dividing by its standard deviation. This will reset all pre-calculated
* values to their original (unset) defaults.
* @return mixed true on success, a PEAR_Error object otherwise
if (PEAR ::isError ($mean)) {
if (PEAR ::isError ($std)) {
return PEAR ::raiseError ('cannot studentize data, standard deviation is zero.');
foreach ($this->_data as $val=> $freq) {
$newval = ($val - $mean) / $std;
foreach ($this->_data as $val) {
$newval = ($val - $mean) / $std;
return $this->setData($arr, $this->_dataOption);
* Transforms the data by substracting each entry from the mean.
* This will reset all pre-calculated values to their original (unset) defaults.
* @return mixed true on success, a PEAR_Error object otherwise
if (PEAR ::isError ($mean)) {
foreach ($this->_data as $val=> $freq) {
foreach ($this->_data as $val) {
return $this->setData($arr, $this->_dataOption);
* Calculates the basic or full statistics for the data set
* @param int $mode one of STATS_BASIC or STATS_FULL
* @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
* or only the error message will be returned (when false), if an error happens.
* @return mixed an associative array of statistics on success, a PEAR_Error object otherwise
function calc($mode, $returnErrorObject=true ) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
return $this->calcFull($returnErrorObject);
return PEAR ::raiseError ('incorrect mode, expected STATS_BASIC or STATS_FULL');
* Calculates a basic set of statistics
* @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
* or only the error message will be returned (when false), if an error happens.
* @return mixed an associative array of statistics on success, a PEAR_Error object otherwise
function calcBasic($returnErrorObject=true ) {/*{{{*/
'min' => $this->__format ($this->min(), $returnErrorObject),
'max' => $this->__format ($this->max(), $returnErrorObject),
'sum' => $this->__format ($this->sum(), $returnErrorObject),
'sum2' => $this->__format ($this->sum2(), $returnErrorObject),
'count' => $this->__format ($this->count(), $returnErrorObject),
'mean' => $this->__format ($this->mean(), $returnErrorObject),
'stdev' => $this->__format ($this->stDev(), $returnErrorObject),
'variance' => $this->__format ($this->variance(), $returnErrorObject),
'range' => $this->__format ($this->range(), $returnErrorObject)
* Calculates a full set of statistics
* @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
* or only the error message will be returned (when false), if an error happens.
* @return mixed an associative array of statistics on success, a PEAR_Error object otherwise
function calcFull($returnErrorObject=true ) {/*{{{*/
'min' => $this->__format ($this->min(), $returnErrorObject),
'max' => $this->__format ($this->max(), $returnErrorObject),
'sum' => $this->__format ($this->sum(), $returnErrorObject),
'sum2' => $this->__format ($this->sum2(), $returnErrorObject),
'count' => $this->__format ($this->count(), $returnErrorObject),
'mean' => $this->__format ($this->mean(), $returnErrorObject),
'median' => $this->__format ($this->median(), $returnErrorObject),
'mode' => $this->__format ($this->mode(), $returnErrorObject),
'midrange' => $this->__format ($this->midrange(), $returnErrorObject),
'geometric_mean' => $this->__format ($this->geometricMean(), $returnErrorObject),
'harmonic_mean' => $this->__format ($this->harmonicMean(), $returnErrorObject),
'stdev' => $this->__format ($this->stDev(), $returnErrorObject),
'absdev' => $this->__format ($this->absDev(), $returnErrorObject),
'variance' => $this->__format ($this->variance(), $returnErrorObject),
'range' => $this->__format ($this->range(), $returnErrorObject),
'std_error_of_mean' => $this->__format ($this->stdErrorOfMean(), $returnErrorObject),
'skewness' => $this->__format ($this->skewness(), $returnErrorObject),
'kurtosis' => $this->__format ($this->kurtosis(), $returnErrorObject),
'coeff_of_variation' => $this->__format ($this->coeffOfVariation(), $returnErrorObject),
'sample_central_moments' => array (
'sample_raw_moments' => array (
'frequency' => $this->__format ($this->frequency(), $returnErrorObject),
'quartiles' => $this->__format ($this->quartiles(), $returnErrorObject),
'interquartile_range' => $this->__format ($this->interquartileRange(), $returnErrorObject),
'interquartile_mean' => $this->__format ($this->interquartileMean(), $returnErrorObject),
'quartile_deviation' => $this->__format ($this->quartileDeviation(), $returnErrorObject),
* Calculates the minimum of a data set.
* Handles cummulative data sets correctly$this->_data[0]
* @return mixed the minimum value on success, a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
$min = min($this->_data);
$this->_calculatedValues['min'] = $min;
return $this->_calculatedValues['min'];
* Calculates the maximum of a data set.
* Handles cummulative data sets correctly
* @return mixed the maximum value on success, a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
$max = max($this->_data);
$this->_calculatedValues['max'] = $max;
return $this->_calculatedValues['max'];
* Handles cummulative data sets correctly
* @return mixed the sum on success, a PEAR_Error object otherwise
if (PEAR ::isError ($sum)) {
$this->_calculatedValues['sum'] = $sum;
return $this->_calculatedValues['sum'];
* Calculates SUM { (xi)^2 }
* Handles cummulative data sets correctly
* @return mixed the sum on success, a PEAR_Error object otherwise
if (PEAR ::isError ($sum2)) {
$this->_calculatedValues['sum2'] = $sum2;
return $this->_calculatedValues['sum2'];
* Calculates SUM { (xi)^n }
* Handles cummulative data sets correctly
* @param numeric $n the exponent
* @return mixed the sum on success, a PEAR_Error object otherwise
function sumN($n) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
foreach($this->_data as $val=> $freq) {
$sumN += $freq * pow((double) $val, (double) $n);
foreach($this->_data as $val) {
$sumN += pow((double) $val, (double) $n);
* Calculates PROD { (xi) }, (the product of all observations)
* Handles cummulative data sets correctly
* @return numeric|array|PEAR_Error the product as a number or an array of numbers
* (if there is numeric overflow) on success,
* a PEAR_Error object otherwise
if (PEAR ::isError ($product)) {
$this->_calculatedValues['product'] = $product;
return $this->_calculatedValues['product'];
* Calculates PROD { (xi)^n }, which is the product of all observations
* Handles cummulative data sets correctly
* @param numeric $n the exponent
* @return numeric|array|PEAR_Error the product as a number or an array of numbers
* (if there is numeric overflow) on success,
* a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
foreach($this->_data as $val=> $freq) {
$prodN *= $freq * pow((double) $val, (double) $n);
foreach($this->_data as $val) {
$prodN *= pow((double) $val, (double) $n);
// try to reduce to a single value
foreach ($partial as $val) {
// cannot reduce, return an array
* Calculates the number of data points in the set
* Handles cummulative data sets correctly
* @return mixed the count on success, a PEAR_Error object otherwise
function count() {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
$count = count($this->_dataExpanded);
$count = count($this->_data);
$this->_calculatedValues['count'] = $count;
return $this->_calculatedValues['count'];
* Calculates the mean (average) of the data points in the set
* Handles cummulative data sets correctly
* @return mixed the mean value on success, a PEAR_Error object otherwise
if (PEAR ::isError ($sum)) {
if (PEAR ::isError ($count)) {
$this->_calculatedValues['mean'] = $sum / $count;
return $this->_calculatedValues['mean'];
* Calculates the range of the data set = max - min
* @return mixed the value of the range on success, a PEAR_Error object otherwise.
function range() {/*{{{*/
if (PEAR ::isError ($min)) {
if (PEAR ::isError ($max)) {
$this->_calculatedValues['range'] = $max - $min;
return $this->_calculatedValues['range'];
* Calculates the variance (unbiased) of the data points in the set
* Handles cummulative data sets correctly
* @return mixed the variance value on success, a PEAR_Error object otherwise
$variance = $this->__calcVariance ();
if (PEAR ::isError ($variance)) {
$this->_calculatedValues['variance'] = $variance;
return $this->_calculatedValues['variance'];
* Calculates the standard deviation (unbiased) of the data points in the set
* Handles cummulative data sets correctly
* @return mixed the standard deviation on success, a PEAR_Error object otherwise
function stDev() {/*{{{*/
if (PEAR ::isError ($variance)) {
$this->_calculatedValues['stDev'] = sqrt($variance);
return $this->_calculatedValues['stDev'];
* Calculates the variance (unbiased) of the data points in the set
* given a fixed mean (average) value. Not used in calcBasic(), calcFull()
* Handles cummulative data sets correctly
* @param numeric $mean the fixed mean value
* @return mixed the variance on success, a PEAR_Error object otherwise
return $this->__calcVariance ($mean);
* Calculates the standard deviation (unbiased) of the data points in the set
* given a fixed mean (average) value. Not used in calcBasic(), calcFull()
* Handles cummulative data sets correctly
* @param numeric $mean the fixed mean value
* @return mixed the standard deviation on success, a PEAR_Error object otherwise
* @see varianceWithMean()
if (PEAR ::isError ($varianceWM)) {
return sqrt($varianceWM);
* Calculates the absolute deviation of the data points in the set
* Handles cummulative data sets correctly
* @return mixed the absolute deviation on success, a PEAR_Error object otherwise
$absDev = $this->__calcAbsoluteDeviation ();
if (PEAR ::isError ($absDev)) {
$this->_calculatedValues['absDev'] = $absDev;
return $this->_calculatedValues['absDev'];
* Calculates the absolute deviation of the data points in the set
* given a fixed mean (average) value. Not used in calcBasic(), calcFull()
* Handles cummulative data sets correctly
* @param numeric $mean the fixed mean value
* @return mixed the absolute deviation on success, a PEAR_Error object otherwise
return $this->__calcAbsoluteDeviation ($mean);
* Calculates the skewness of the data distribution in the set
* The skewness measures the degree of asymmetry of a distribution,
* and is related to the third central moment of a distribution.
* A normal distribution has a skewness = 0
* A distribution with a tail off towards the high end of the scale
* (positive skew) has a skewness > 0
* A distribution with a tail off towards the low end of the scale
* (negative skew) has a skewness < 0
* Handles cummulative data sets correctly
* @return mixed the skewness value on success, a PEAR_Error object otherwise
if (PEAR ::isError ($count)) {
if (PEAR ::isError ($stDev)) {
$sumdiff3 = $this->__sumdiff (3 );
if (PEAR ::isError ($sumdiff3)) {
$this->_calculatedValues['skewness'] = ($sumdiff3 / ($count * pow($stDev, 3 )));
return $this->_calculatedValues['skewness'];
* Calculates the kurtosis of the data distribution in the set
* The kurtosis measures the degrees of peakedness of a distribution.
* It is also called the "excess" or "excess coefficient", and is
* a normalized form of the fourth central moment of a distribution.
* A normal distributions has kurtosis = 0
* A narrow and peaked (leptokurtic) distribution has a
* A flat and wide (platykurtic) distribution has a kurtosis < 0
* Handles cummulative data sets correctly
* @return mixed the kurtosis value on success, a PEAR_Error object otherwise
if (PEAR ::isError ($count)) {
if (PEAR ::isError ($stDev)) {
$sumdiff4 = $this->__sumdiff (4 );
if (PEAR ::isError ($sumdiff4)) {
$this->_calculatedValues['kurtosis'] = ($sumdiff4 / ($count * pow($stDev, 4 ))) - 3;
return $this->_calculatedValues['kurtosis'];
* Calculates the median of a data set.
* The median is the value such that half of the points are below it
* If the number of values is odd, it is the middle item.
* If the number of values is even, is the average of the two middle items.
* Handles cummulative data sets correctly
* @return mixed the median value on success, a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
$arr = & $this->_dataExpanded;
$median = ($arr[$h] + $arr[$h - 1 ]) / 2;
$this->_calculatedValues['median'] = $median;
return $this->_calculatedValues['median'];
* Calculates the mode of a data set.
* The mode is the value with the highest frequency in the data set.
* There can be more than one mode.
* Handles cummulative data sets correctly
* @return mixed an array of mode value on success, a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
foreach ($arr as $val=> $freq) {
$this->_calculatedValues['mode'] = $mode;
return $this->_calculatedValues['mode'];
* Calculates the midrange of a data set.
* The midrange is the average of the minimum and maximum of the data set.
* Handles cummulative data sets correctly
* @return mixed the midrange value on success, a PEAR_Error object otherwise
if (PEAR ::isError ($min)) {
if (PEAR ::isError ($max)) {
$this->_calculatedValues['midrange'] = (($max + $min) / 2 );
return $this->_calculatedValues['midrange'];
* Calculates the geometrical mean of the data points in the set
* Handles cummulative data sets correctly
* @return mixed the geometrical mean value on success, a PEAR_Error object otherwise
if (PEAR ::isError ($count)) {
if (PEAR ::isError ($prod)) {
$geomMean *= pow($val, 1/ $count);
$this->_calculatedValues['geometricMean'] = $geomMean;
return PEAR ::raiseError ('The product of the data set is negative, geometric mean undefined.');
$this->_calculatedValues['geometricMean'] = pow($prod , 1 / $count);
return $this->_calculatedValues['geometricMean'];
* Calculates the harmonic mean of the data points in the set
* Handles cummulative data sets correctly
* @return mixed the harmonic mean value on success, a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
if (PEAR ::isError ($count)) {
foreach($this->_data as $val=> $freq) {
return PEAR ::raiseError ('cannot calculate a '.
'harmonic mean with data values of zero.');
foreach($this->_data as $val) {
return PEAR ::raiseError ('cannot calculate a '.
'harmonic mean with data values of zero.');
$this->_calculatedValues['harmonicMean'] = $count / $invsum;
return $this->_calculatedValues['harmonicMean'];
* Calculates the nth central moment (m{n}) of a data set.
* The definition of a sample central moment is:
* m{n} = 1/N * SUM { (xi - avg)^n }
* where: N = sample size, avg = sample mean.
* @param integer $n moment to calculate
* @return mixed the numeric value of the moment on success, PEAR_Error otherwise
return PEAR ::raiseError ('moment must be a positive integer >= 1.');
if (PEAR ::isError ($count)) {
return PEAR ::raiseError (" Cannot calculate {$n}th sample moment, ".
'there are zero data entries');
$sum = $this->__sumdiff ($n);
if (PEAR ::isError ($sum)) {
* Calculates the nth raw moment (m{n}) of a data set.
* The definition of a sample central moment is:
* m{n} = 1/N * SUM { xi^n }
* where: N = sample size, avg = sample mean.
* @param integer $n moment to calculate
* @return mixed the numeric value of the moment on success, PEAR_Error otherwise
return PEAR ::raiseError ('moment must be a positive integer >= 1.');
if (PEAR ::isError ($count)) {
return PEAR ::raiseError (" Cannot calculate {$n}th raw moment, ".
'there are zero data entries.');
if (PEAR ::isError ($sum)) {
* Calculates the coefficient of variation of a data set.
* The coefficient of variation measures the spread of a set of data
* as a proportion of its mean. It is often expressed as a percentage.
* Handles cummulative data sets correctly
* @return mixed the coefficient of variation on success, a PEAR_Error object otherwise
if (PEAR ::isError ($mean)) {
return PEAR ::raiseError ('cannot calculate the coefficient '.
'of variation, mean of sample is zero');
if (PEAR ::isError ($stDev)) {
$this->_calculatedValues['coeffOfVariation'] = $stDev / $mean;
return $this->_calculatedValues['coeffOfVariation'];
* Calculates the standard error of the mean.
* It is the standard deviation of the sampling distribution of
* the mean. The formula is:
* S.E. Mean = SD / (N)^(1/2)
* This formula does not assume a normal distribution, and shows
* that the size of the standard error of the mean is inversely
* proportional to the square root of the sample size.
* @return mixed the standard error of the mean on success, a PEAR_Error object otherwise
if (PEAR ::isError ($count)) {
if (PEAR ::isError ($stDev)) {
$this->_calculatedValues['stdErrorOfMean'] = $stDev / sqrt($count);
return $this->_calculatedValues['stdErrorOfMean'];
* Calculates the value frequency table of a data set.
* Handles cummulative data sets correctly
* @return mixed an associative array of value=>frequency items on success, a PEAR_Error object otherwise
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
foreach ($this->_data as $val) {
if (!isset ($freq[" $val" ])) {
$this->_calculatedValues['frequency'] = $freq;
return $this->_calculatedValues['frequency'];
* The quartiles are defined as the values that divide a sorted
* data set into four equal-sized subsets, and correspond to the
* 25th, 50th, and 75th percentiles.
* @return mixed an associative array of quartiles on success, a PEAR_Error otherwise
if (PEAR ::isError ($q1)) {
if (PEAR ::isError ($q2)) {
if (PEAR ::isError ($q3)) {
$this->_calculatedValues['quartiles'] = array (
return $this->_calculatedValues['quartiles'];
* The interquartile mean is defined as the mean of the values left
* after discarding the lower 25% and top 25% ranked values, i.e.:
* interquart mean = mean(<P(25),P(75)>)
* @todo need to double check the equation
* @return mixed a numeric value on success, a PEAR_Error otherwise
if (PEAR ::isError ($quart)) {
foreach ($this->getData(true ) as $val) {
if ($val >= $q1 && $val <= $q3) {
return PEAR ::raiseError ('error calculating interquartile mean, '.
'empty interquartile range of values.');
$this->_calculatedValues['interquartileMean'] = $sum / $n;
return $this->_calculatedValues['interquartileMean'];
* The interquartile range is the distance between the 75th and 25th
* percentiles. Basically the range of the middle 50% of the data set,
* and thus is not affected by outliers or extreme values.
* interquart range = P(75) - P(25)
* @return mixed a numeric value on success, a PEAR_Error otherwise
if (PEAR ::isError ($quart)) {
$this->_calculatedValues['interquartileRange'] = $q3 - $q1;
return $this->_calculatedValues['interquartileRange'];
* The quartile deviation is half of the interquartile range value
* quart dev = (P(75) - P(25)) / 2
* @return mixed a numeric value on success, a PEAR_Error otherwise
* @see interquartileRange()
if (PEAR ::isError ($iqr)) {
$this->_calculatedValues['quartileDeviation'] = $iqr / 2;
return $this->_calculatedValues['quartileDeviation'];
* The quartile variation coefficient is defined as follows:
* quart var coeff = 100 * (P(75) - P(25)) / (P(75) + P(25))
* @todo need to double check the equation
* @return mixed a numeric value on success, a PEAR_Error otherwise
if (!array_key_exists('quartileVariationCoefficient', $this->_calculatedValues)) {
if (PEAR ::isError ($quart)) {
$this->_calculatedValues['quartileVariationCoefficient'] = 100 * $d / $s;
return $this->_calculatedValues['quartileVariationCoefficient'];
* The quartile skewness coefficient (also known as Bowley Skewness),
* quart skewness coeff = (P(25) - 2*P(50) + P(75)) / (P(75) - P(25))
* @todo need to double check the equation
* @return mixed a numeric value on success, a PEAR_Error otherwise
if (!array_key_exists('quartileSkewnessCoefficient', $this->_calculatedValues)) {
if (PEAR ::isError ($quart)) {
$this->_calculatedValues['quartileSkewnessCoefficient'] = $d / $s;
return $this->_calculatedValues['quartileSkewnessCoefficient'];
* The pth percentile is the value such that p% of the a sorted data set
* is smaller than it, and (100 - p)% of the data is larger.
* A quick algorithm to pick the appropriate value from a sorted data
* - Count the number of values: n
* - Calculate the position of the value in the data list: i = p * (n + 1)
* - if i is an integer, return the data at that position
* - if i < 1, return the minimum of the data set
* - if i > n, return the maximum of the data set
* - otherwise, average the entries at adjacent positions to i
* The median is the 50th percentile value.
* @todo need to double check generality of the algorithm
* @param numeric $p the percentile to estimate, e.g. 25 for 25th percentile
* @return mixed a numeric value on success, a PEAR_Error otherwise
if (PEAR ::isError ($count)) {
$data = & $this->_dataExpanded;
$obsidx = $p * ($count + 1 ) / 100;
if (intval($obsidx) == $obsidx) {
return $data[($obsidx - 1 )];
} elseif ($obsidx > $count) {
return $data[($count - 1 )];
$left = floor($obsidx - 1 );
$right = ceil($obsidx - 1 );
return ($data[$left] + $data[$right]) / 2;
* Utility function to calculate: SUM { (xi - mean)^n }
* @param numeric $power the exponent
* @param optional double $mean the data set mean value
* @return mixed the sum on success, a PEAR_Error object otherwise
* @see variaceWithMean();
function __sumdiff ($power, $mean=null ) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
if (PEAR ::isError ($mean)) {
foreach ($this->_data as $val=> $freq) {
$sdiff += $freq * pow((double) ($val - $mean), (double) $power);
foreach ($this->_data as $val)
$sdiff += pow((double) ($val - $mean), (double) $power);
* Utility function to calculate the variance with or without
* @param $mean the fixed mean to use, null as default
* @return mixed a numeric value on success, a PEAR_Error otherwise
* @see varianceWithMean()
function __calcVariance ($mean = null ) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
$sumdiff2 = $this->__sumdiff (2 , $mean);
if (PEAR ::isError ($sumdiff2)) {
if (PEAR ::isError ($count)) {
return PEAR ::raiseError ('cannot calculate variance of a singe data point');
return ($sumdiff2 / ($count - 1 ));
* Utility function to calculate the absolute deviation with or without
* @param $mean the fixed mean to use, null as default
* @return mixed a numeric value on success, a PEAR_Error otherwise
function __calcAbsoluteDeviation ($mean = null ) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
if (PEAR ::isError ($count)) {
$sumabsdev = $this->__sumabsdev ($mean);
if (PEAR ::isError ($sumabsdev)) {
return $sumabsdev / $count;
* Utility function to calculate: SUM { | xi - mean | }
* @param optional double $mean the mean value for the set or population
* @return mixed the sum on success, a PEAR_Error object otherwise
function __sumabsdev ($mean=null ) {/*{{{*/
if ($this->_data == null ) {
return PEAR ::raiseError ('data has not been set');
foreach ($this->_data as $val=> $freq) {
$sdev += $freq * abs($val - $mean);
foreach ($this->_data as $val) {
$sdev += abs($val - $mean);
* Utility function to format a PEAR_Error to be used by calc(),
* calcBasic() and calcFull()
* @param mixed $v value to be formatted
* @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default),
* or only the error message will be returned (when false)
* @return mixed if the value is a PEAR_Error object, and $useErrorObject
* is false, then a string with the error message will be returned,
* otherwise the value will not be modified and returned as passed.
function __format ($v, $useErrorObject=true ) {/*{{{*/
if (PEAR ::isError ($v) && $useErrorObject == false ) {
* Utility function to validate the data and modify it
* according to the current null handling option
* @return mixed true on success, a PEAR_Error object otherwise
function _validate () {/*{{{*/
foreach ($this->_data as $key=> $value) {
$d = ($cummulativeData) ? $key : $value;
$v = ($cummulativeData) ? $value : $key;
switch ($this->_nullOption) {
unset ($this->_data[" $key" ]);
unset ($this->_data[" $key" ]);
if (!isset ($this->_data[0 ])) {
return PEAR ::raiseError ('data rejected, contains NULL values');
// expand cummulative data
$this->_dataExpanded = array ();
// code below avoids using array_pad, because in PHP 4 that
// function has a hard-coded limit of 1048576 array items
// see php-src/ext/standard/array.c)
//$array_pad_magic_limit = 1048576;
foreach ($this->_data as $val=> $freq) {
for ($k=0; $k < $freq; $k++ ) {
$this->_dataExpanded[] = $val;
/* the code below causes a core dump
$valArr = array_fill(0, $freq, $val);
$this->_dataExpanded = array_merge($this->_dataExpanded, $valArr);
/* the code below gives incorrect values
// kludge to cover for array_pad's *features*
$newcount = count($this->_dataExpanded) + $freq;
while ($newcount > $array_pad_magic_limit) {
$this->_dataExpanded = array_pad($this->_dataExpanded, $array_pad_magic_limit, $val);
$newcount -= $array_pad_magic_limit;
$this->_dataExpanded = array_pad($this->_dataExpanded, $newcount, $val);
//sort($this->_dataExpanded);
// vim6: fdl=1: fdm=marker:
Documentation generated on Mon, 11 Mar 2019 15:39:18 -0400 by phpDocumentor 1.4.4. PEAR Logo Copyright © PHP Group 2004.
|