/* Test Jacob Navia's lcc-Win32 statistical functions.
In the original test, 6 of the 11 functions
returned incorrect results:
kurtosis (broken)
variance_mle (broken)
skewness (indirectly calls broken variance_mle)
standard_deviation_mle (calls broken variance_mle)
geometric_mean (see corrected trivial error)
central_moment (see corrected trivial error)
kurtosis needs to be implemented with a different
algorithm. I think skewness will be OK if it gets
the correct value for std dev. I can't think of a
fix for variance_mle except rewriting it as a
separate function for sample variance.
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <errno.h>
/* this returns the correct value for
population variance */
double variance( double* Items, int Count )
{
double ItemCount;
int i;
double a;
double Sum;
double SumOfSquared;
double Mean;
double MeanSquared;
double Var;
Sum = 0;
SumOfSquared = 0;
for( i = 0; i < Count; i++ )
{
a = *Items++;
Sum += a;
SumOfSquared += a * a;
}
ItemCount = (double) Count;
Mean = Sum / ItemCount;
MeanSquared = Mean * Mean;
Var = ( SumOfSquared - ( MeanSquared * ItemCount ) ) /
ItemCount;
return(Var);
}
/* this returns the correct value for
population standard deviation */
double standard_deviation( double* Items, int Count )
{
return( sqrt( variance( Items, Count ) ) );
}
double geometric_mean(double *data,int n)
{
long double result=1;
double ni = 1.0/n;
int i;
for (i=0; i<n;i++) {
//result *= pow(*data,ni); /* error */
result *= pow(*data++, ni); /* pointer incremented */
}
return result;
}
double arithmetic_mean(double *data,int n)
{
long double result = 0;
int i;
for (i=0; i<n;i++) {
result += *data++;
}
return result/n;
}
double harmonic_mean(double *data,int n)
{
double result=0;
int i;
for (i=0; i<n;i++) {
result += 1.0/ *data++;
}
return n/result;
}
double rms(double *data,int n)
{
long double result = 0;
int i;
for (i=0; i<n;i++) {
result += (*data) * (*data);
data++;
}
return sqrt(result/n);
}
int compare_double( const void * A, const void * B )
{
double a,b;
int r;
a = *((double*) A);
b = *((double*) B);
if( a > b )
{
r = 1;
}
else
{
if( a < b )
{
r = -1;
}
else
{
r = 0;
}
}
return( r );
}
double median(double *data,int n)
{
double result;
if (n < 1 || data == NULL) {
errno = ERANGE;
return 0;
}
double *pdata = malloc(n*sizeof(double));
if (pdata == 0) {
errno = ENOMEM;
return 0;
}
memcpy(pdata,data,n*sizeof(double));
qsort(pdata,n,sizeof(double),compare_double);
if (n&1)
result = pdata[(n+1)/2];
else
result = (pdata[n/2 - 1] + pdata[n/2])/2.0;
free(pdata);
return result;
}
double percentile(double *data,int n,double K)
{
double result;
if (K< 0.0 || K > 1.0 || data == NULL) {
errno = ERANGE;
return 0;
}
double *copy = malloc(n*sizeof(double));
if (copy == NULL) {
errno = ENOMEM;
return 0;
}
qsort(copy,n,sizeof(double),compare_double);
double index = n * K;
if (index != floor(index)) {
result = copy[(int)index];
}
else {
int i = ((int) index) -1;
result = (copy[i]+copy[i+1])/2.0;
}
free(copy);
return result;
}
double central_moment(double *data,int n,double K)
{
double am = arithmetic_mean(data,n);
double sum = 0;
int i;
for (i=0; i<n;i++) {
//sum += pow(*data - am,K); /* error */
sum += pow(*data++ - am,K); /* pointer incremented */
}
return sum / n;
}
double variance_mle(double *data,int n)
{
double v = variance(data,n);
return (v * (n-1))/n; /* ??? */
}
double standard_deviation_mle(double *data, int n)
{
double v = variance_mle(data,n);
return sqrt(v);
}
/* Definition is OK, but calls standard_deviation_mle
which calls broken variance_mle. */
double skewness(double *data, int n)
{
double sigma = standard_deviation_mle(data,n);
return central_moment(data,n,3.0) / sigma * sigma * sigma;
}
/* This is apparently an obsolete definition of kurtosis
and implemented incorrectly here anyway. It should be:
u4/sigma^4. See Wikipedia article on kurtosis. */
double kurtosis(double *data,int n)
{
double sigma = variance_mle(data,n);
return central_moment(data,n,4.0) / (sigma*sigma);
}
int main(int argc, char *argv[])
{
int idx, n;
double var, varmle, sdev, sdevmle, gmean, amean, hmean,
rmsv, med, ptile30, cmoment3, skew, kurt, datum;
double *data;
FILE *datafile; /* read data from a file */
if(argc != 2)
{
fprintf(stderr, "usage: >pgm <input file>\n");
exit(EXIT_FAILURE);
}
datafile = fopen(argv[1], "r");
if(datafile == NULL)
{
fprintf(stderr, "can't open %s\n", argv[1]);
exit(EXIT_FAILURE);
}
n = 0;
while(fscanf(datafile, "%lf", &datum) == 1)
{
n++;
}
rewind(datafile);
data = malloc(n * sizeof(*data));
if (data == NULL)
{
fprintf(stderr, "malloc failed\n");
exit(EXIT_FAILURE);
}
idx = 0;
while(fscanf(datafile, "%lf", &datum) == 1)
{
data[idx++] = datum;
}
fclose(datafile);
varmle = variance_mle(data, n);
var = variance(data, n);
sdevmle = standard_deviation_mle(data, n);
sdev = standard_deviation(data, n);
amean = arithmetic_mean(data, n);
gmean = geometric_mean(data, n);
hmean = harmonic_mean(data, n);
rmsv = rms(data, n);
med = median(data, n);
ptile30 = percentile(data, n, .3);
cmoment3 = central_moment(data, n, 3.);
skew = skewness(data, n);
kurt = kurtosis(data, n);
printf("\ndataset: %s\n", argv[1]);
printf("--------------------------------\n\n");
printf("observations: %d\n", n);
printf("arithmetic mean: %.3f\n", amean);
printf("geometric mean: %.3f\n", gmean);
printf("harmonic mean: %.3f\n", hmean);
printf("quadratic mean(rms): %.3f\n", rmsv);
printf("median: %.3f\n", med);
printf("variance_mle: %.3f\n", varmle);
printf("variance: %.3f\n", var);
printf("standard_deviation: %.3f\n", sdev);
printf("standard_deviation_mle: %.3f\n", sdevmle);
printf("30th percentile: %.3f\n", ptile30);
printf("3rd central moment: %.3f\n", cmoment3);
printf("skewness: %.3f\n", skew);
printf("kurtosis: %.3f\n", kurt);
return 0;
}


|