Source for org.jfree.data.statistics.Statistics

   1: /* ===========================================================
   2:  * JFreeChart : a free chart library for the Java(tm) platform
   3:  * ===========================================================
   4:  *
   5:  * (C) Copyright 2000-2005, by Object Refinery Limited and Contributors.
   6:  *
   7:  * Project Info:  http://www.jfree.org/jfreechart/index.html
   8:  *
   9:  * This library is free software; you can redistribute it and/or modify it 
  10:  * under the terms of the GNU Lesser General Public License as published by 
  11:  * the Free Software Foundation; either version 2.1 of the License, or 
  12:  * (at your option) any later version.
  13:  *
  14:  * This library is distributed in the hope that it will be useful, but 
  15:  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  16:  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
  17:  * License for more details.
  18:  *
  19:  * You should have received a copy of the GNU Lesser General Public
  20:  * License along with this library; if not, write to the Free Software
  21:  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
  22:  * USA.  
  23:  *
  24:  * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 
  25:  * in the United States and other countries.]
  26:  *
  27:  * ---------------
  28:  * Statistics.java
  29:  * ---------------
  30:  * (C) Copyright 2000-2005, by Matthew Wright and Contributors.
  31:  *
  32:  * Original Author:  Matthew Wright;
  33:  * Contributor(s):   David Gilbert (for Object Refinery Limited);
  34:  *
  35:  * $Id: Statistics.java,v 1.5.2.1 2005/10/25 21:34:46 mungady Exp $
  36:  *
  37:  * Changes (from 08-Nov-2001)
  38:  * --------------------------
  39:  * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
  40:  *               Moved from JFreeChart to package com.jrefinery.data.* in 
  41:  *               JCommon class library (DG);
  42:  * 24-Jun-2002 : Removed unnecessary local variable (DG);
  43:  * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
  44:  * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
  45:  * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
  46:  * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 
  47:  *               release (DG);
  48:  *
  49:  */
  50: 
  51: package org.jfree.data.statistics;
  52: 
  53: import java.util.ArrayList;
  54: import java.util.Collection;
  55: import java.util.Collections;
  56: import java.util.Iterator;
  57: import java.util.List;
  58: 
  59: /**
  60:  * A utility class that provides some simple statistical functions.
  61:  */
  62: public abstract class Statistics {
  63: 
  64:     /**
  65:      * Returns the mean of an array of numbers.
  66:      *
  67:      * @param values  the values (<code>null</code> permitted, returns 
  68:      *                <code>Double.NaN</code>).
  69:      *
  70:      * @return The mean.
  71:      */
  72:     public static double calculateMean(Number[] values) {
  73:         double result = Double.NaN;
  74:         if (values != null && values.length > 0) {
  75:             double sum = 0.0;
  76:             int counter = 0;
  77:             for (; counter < values.length; counter++) {
  78:                 sum = sum + values[counter].doubleValue();
  79:             }
  80:             result = (sum / counter);
  81:         }
  82:         return result;
  83:     }
  84: 
  85:     /**
  86:      * Returns the mean of a collection of <code>Number</code> objects.
  87:      * 
  88:      * @param values  the values (<code>null</code> permitted, returns 
  89:      *                <code>Double.NaN</code>).
  90:      * 
  91:      * @return The mean.
  92:      */
  93:     public static double calculateMean(Collection values) {
  94:         
  95:         double result = Double.NaN;
  96:         int count = 0;
  97:         double total = 0.0;
  98:         Iterator iterator = values.iterator();
  99:         while (iterator.hasNext()) {
 100:             Object object = iterator.next();
 101:             if (object != null && object instanceof Number) {
 102:                 Number number = (Number) object;
 103:                 total = total + number.doubleValue();
 104:                 count = count + 1;
 105:             }
 106:         }
 107:         if (count > 0) {
 108:             result = total / count;
 109:         }        
 110:         return result;
 111:         
 112:     }
 113:     
 114:     /**
 115:      * Calculates the median for a list of values (<code>Number</code> objects).
 116:      * The list of values will be sorted first.
 117:      * 
 118:      * @param values  the values.
 119:      * 
 120:      * @return The median.
 121:      */
 122:     public static double calculateMedian(List values) {
 123:         return calculateMedian(values, true);
 124:     }
 125:     
 126:     /**
 127:      * Calculates the median for a list of values (<code>Number</code> objects)
 128:      * that are assumed to be in ascending order.
 129:      * 
 130:      * @param values  the values.
 131:      * @param copyAndSort  a flag that controls whether the list of values is
 132:      *                     copied and sorted.
 133:      * 
 134:      * @return The median.
 135:      */
 136:     public static double calculateMedian(List values, boolean copyAndSort) {
 137:         
 138:         double result = Double.NaN;
 139:         if (values != null) {
 140:             if (copyAndSort) {
 141:                 int itemCount = values.size();
 142:                 List copy = new ArrayList(itemCount);
 143:                 for (int i = 0; i < itemCount; i++) {
 144:                     copy.add(i, values.get(i));   
 145:                 }
 146:                 Collections.sort(copy);
 147:                 values = copy;
 148:             }
 149:             int count = values.size();
 150:             if (count > 0) {
 151:                 if (count % 2 == 1) {
 152:                     if (count > 1) {
 153:                         Number value = (Number) values.get((count - 1) / 2);
 154:                         result = value.doubleValue();
 155:                     }
 156:                     else {
 157:                         Number value = (Number) values.get(0);
 158:                         result = value.doubleValue();
 159:                     }
 160:                 }
 161:                 else {
 162:                     Number value1 = (Number) values.get(count / 2 - 1);
 163:                     Number value2 = (Number) values.get(count / 2);
 164:                     result = (value1.doubleValue() + value2.doubleValue()) 
 165:                              / 2.0;
 166:                 }
 167:             }
 168:         }
 169:         return result;
 170:     }
 171:     
 172:     /**
 173:      * Calculates the median for a sublist within a list of values 
 174:      * (<code>Number</code> objects).
 175:      * 
 176:      * @param values  the values (in any order).
 177:      * @param start  the start index.
 178:      * @param end  the end index.
 179:      * 
 180:      * @return The median.
 181:      */
 182:     public static double calculateMedian(List values, int start, int end) {
 183:         return calculateMedian(values, start, end, true);
 184:     }
 185: 
 186:     /**
 187:      * Calculates the median for a sublist within a list of values 
 188:      * (<code>Number</code> objects).  The entire list will be sorted if the 
 189:      * <code>ascending</code< argument is <code>false</code>.
 190:      * 
 191:      * @param values  the values.
 192:      * @param start  the start index.
 193:      * @param end  the end index.
 194:      * @param copyAndSort  a flag that that controls whether the list of values 
 195:      *                     is copied and sorted.
 196:      * 
 197:      * @return The median.
 198:      */
 199:     public static double calculateMedian(List values, int start, int end,
 200:                                          boolean copyAndSort) {
 201:         
 202:         double result = Double.NaN;
 203:         if (copyAndSort) {
 204:             List working = new ArrayList(end - start + 1);
 205:             for (int i = start; i <= end; i++) {
 206:                 working.add(values.get(i));  
 207:             }
 208:             Collections.sort(working); 
 209:             result = calculateMedian(working, false);
 210:         }
 211:         else {
 212:             int count = end - start + 1;
 213:             if (count > 0) {
 214:                 if (count % 2 == 1) {
 215:                     if (count > 1) {
 216:                         Number value 
 217:                             = (Number) values.get(start + (count - 1) / 2);
 218:                         result = value.doubleValue();
 219:                     }
 220:                     else {
 221:                         Number value = (Number) values.get(start);
 222:                         result = value.doubleValue();
 223:                     }
 224:                 }
 225:                 else {
 226:                     Number value1 = (Number) values.get(start + count / 2 - 1);
 227:                     Number value2 = (Number) values.get(start + count / 2);
 228:                     result 
 229:                         = (value1.doubleValue() + value2.doubleValue()) / 2.0;
 230:                 }
 231:             }
 232:         }
 233:         return result;    
 234:         
 235:     }
 236:     
 237:     /**
 238:      * Returns the standard deviation of a set of numbers.
 239:      *
 240:      * @param data  the data.
 241:      *
 242:      * @return The standard deviation of a set of numbers.
 243:      */
 244:     public static double getStdDev(Number[] data) {
 245:         double avg = calculateMean(data);
 246:         double sum = 0.0;
 247: 
 248:         for (int counter = 0; counter < data.length; counter++) {
 249:             double diff = data[counter].doubleValue() - avg;
 250:             sum = sum + diff * diff;
 251:         }
 252:         return Math.sqrt(sum / (data.length - 1));
 253:     }
 254: 
 255:     /**
 256:      * Fits a straight line to a set of (x, y) data, returning the slope and
 257:      * intercept.
 258:      *
 259:      * @param xData  the x-data.
 260:      * @param yData  the y-data.
 261:      *
 262:      * @return A double array with the intercept in [0] and the slope in [1].
 263:      */
 264:     public static double[] getLinearFit(Number[] xData, Number[] yData) {
 265: 
 266:         // check arguments...
 267:         if (xData.length != yData.length) {
 268:             throw new IllegalArgumentException(
 269:                 "Statistics.getLinearFit(): array lengths must be equal.");
 270:         }
 271: 
 272:         double[] result = new double[2];
 273:         // slope
 274:         result[1] = getSlope(xData, yData);
 275:         // intercept
 276:         result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
 277: 
 278:         return result;
 279: 
 280:     }
 281: 
 282:     /**
 283:      * Finds the slope of a regression line using least squares.
 284:      *
 285:      * @param xData  an array of Numbers (the x values).
 286:      * @param yData  an array of Numbers (the y values).
 287:      *
 288:      * @return The slope.
 289:      */
 290:     public static double getSlope(Number[] xData, Number[] yData) {
 291: 
 292:         // check arguments...
 293:         if (xData.length != yData.length) {
 294:             throw new IllegalArgumentException("Array lengths must be equal.");
 295:         }
 296: 
 297:         // ********* stat function for linear slope ********
 298:         // y = a + bx
 299:         // a = ybar - b * xbar
 300:         //     sum(x * y) - (sum (x) * sum(y)) / n
 301:         // b = ------------------------------------
 302:         //     sum (x^2) - (sum(x)^2 / n
 303:         // *************************************************
 304: 
 305:         // sum of x, x^2, x * y, y
 306:         double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
 307:         int counter;
 308:         for (counter = 0; counter < xData.length; counter++) {
 309:             sx = sx + xData[counter].doubleValue();
 310:             sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
 311:             sxy = sxy + yData[counter].doubleValue() 
 312:                       * xData[counter].doubleValue();
 313:             sy = sy + yData[counter].doubleValue();
 314:         }
 315:         return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
 316: 
 317:     }
 318: 
 319:     /**
 320:      * Calculates the correlation between two datasets.  Both arrays should 
 321:      * contain the same number of items.  Null values are treated as zero.
 322:      * <P>
 323:      * Information about the correlation calculation was obtained from:
 324:      * 
 325:      * http://trochim.human.cornell.edu/kb/statcorr.htm
 326:      * 
 327:      * @param data1  the first dataset.
 328:      * @param data2  the second dataset.
 329:      * 
 330:      * @return The correlation.
 331:      */
 332:     public static double getCorrelation(Number[] data1, Number[] data2) {
 333:         if (data1 == null) {
 334:             throw new IllegalArgumentException("Null 'data1' argument.");
 335:         }
 336:         if (data2 == null) {
 337:             throw new IllegalArgumentException("Null 'data2' argument.");
 338:         }
 339:         if (data1.length != data2.length) {
 340:             throw new IllegalArgumentException(
 341:                 "'data1' and 'data2' arrays must have same length."
 342:             );   
 343:         }
 344:         int n = data1.length;
 345:         double sumX = 0.0;
 346:         double sumY = 0.0;
 347:         double sumX2 = 0.0;
 348:         double sumY2 = 0.0;
 349:         double sumXY = 0.0;
 350:         for (int i = 0; i < n; i++) {
 351:             double x = 0.0;
 352:             if (data1[i] != null) {
 353:                 x = data1[i].doubleValue();   
 354:             }
 355:             double y = 0.0;
 356:             if (data2[i] != null) {
 357:                 y = data2[i].doubleValue();   
 358:             }
 359:             sumX = sumX + x;
 360:             sumY = sumY + y;
 361:             sumXY = sumXY + (x * y);
 362:             sumX2 = sumX2 + (x * x);
 363:             sumY2 = sumY2 + (y * y);
 364:         }
 365:         return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 
 366:                 * (n * sumY2 - sumY * sumY), 0.5);      
 367:     }
 368: 
 369:     /**
 370:      * Returns a data set for a moving average on the data set passed in.
 371:      *
 372:      * @param xData  an array of the x data.
 373:      * @param yData  an array of the y data.
 374:      * @param period  the number of data points to average
 375:      *
 376:      * @return A double[][] the length of the data set in the first dimension,
 377:      *         with two doubles for x and y in the second dimension
 378:      */
 379:     public static double[][] getMovingAverage(Number[] xData, 
 380:                                               Number[] yData, 
 381:                                               int period) {
 382: 
 383:         // check arguments...
 384:         if (xData.length != yData.length) {
 385:             throw new IllegalArgumentException("Array lengths must be equal.");
 386:         }
 387: 
 388:         if (period > xData.length) {
 389:             throw new IllegalArgumentException(
 390:                 "Period can't be longer than dataset."
 391:             );
 392:         }
 393: 
 394:         double[][] result = new double[xData.length - period][2];
 395:         for (int i = 0; i < result.length; i++) {
 396:             result[i][0] = xData[i + period].doubleValue();
 397:             // holds the moving average sum
 398:             double sum = 0.0;
 399:             for (int j = 0; j < period; j++) {
 400:                 sum += yData[i + j].doubleValue();
 401:             }
 402:             sum = sum / period;
 403:             result[i][1] = sum;
 404:         }
 405:         return result;
 406: 
 407:     }
 408: 
 409: }