001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive.moment; 018 019 import java.io.Serializable; 020 021 import org.apache.commons.math.MathRuntimeException; 022 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; 023 024 /** 025 * Computes the variance of the available values. By default, the unbiased 026 * "sample variance" definitional formula is used: 027 * <p> 028 * variance = sum((x_i - mean)^2) / (n - 1) </p> 029 * <p> 030 * where mean is the {@link Mean} and <code>n</code> is the number 031 * of sample observations.</p> 032 * <p> 033 * The definitional formula does not have good numerical properties, so 034 * this implementation does not compute the statistic using the definitional 035 * formula. <ul> 036 * <li> The <code>getResult</code> method computes the variance using 037 * updating formulas based on West's algorithm, as described in 038 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and 039 * J. G. Lewis 1979, <i>Communications of the ACM</i>, 040 * vol. 22 no. 9, pp. 526-531.</a></li> 041 * <li> The <code>evaluate</code> methods leverage the fact that they have the 042 * full array of values in memory to execute a two-pass algorithm. 043 * Specifically, these methods use the "corrected two-pass algorithm" from 044 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, 045 * American Statistician, August 1983.</li></ul> 046 * Note that adding values using <code>increment</code> or 047 * <code>incrementAll</code> and then executing <code>getResult</code> will 048 * sometimes give a different, less accurate, result than executing 049 * <code>evaluate</code> with the full array of values. The former approach 050 * should only be used when the full array of values is not available.</p> 051 * <p> 052 * The "population variance" ( sum((x_i - mean)^2) / n ) can also 053 * be computed using this statistic. The <code>isBiasCorrected</code> 054 * property determines whether the "population" or "sample" value is 055 * returned by the <code>evaluate</code> and <code>getResult</code> methods. 056 * To compute population variances, set this property to <code>false.</code> 057 * </p> 058 * <p> 059 * <strong>Note that this implementation is not synchronized.</strong> If 060 * multiple threads access an instance of this class concurrently, and at least 061 * one of the threads invokes the <code>increment()</code> or 062 * <code>clear()</code> method, it must be synchronized externally.</p> 063 * 064 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $ 065 */ 066 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable { 067 068 /** Serializable version identifier */ 069 private static final long serialVersionUID = -9111962718267217978L; 070 071 /** SecondMoment is used in incremental calculation of Variance*/ 072 protected SecondMoment moment = null; 073 074 /** 075 * Boolean test to determine if this Variance should also increment 076 * the second moment, this evaluates to false when this Variance is 077 * constructed with an external SecondMoment as a parameter. 078 */ 079 protected boolean incMoment = true; 080 081 /** 082 * Determines whether or not bias correction is applied when computing the 083 * value of the statisic. True means that bias is corrected. See 084 * {@link Variance} for details on the formula. 085 */ 086 private boolean isBiasCorrected = true; 087 088 /** 089 * Constructs a Variance with default (true) <code>isBiasCorrected</code> 090 * property. 091 */ 092 public Variance() { 093 moment = new SecondMoment(); 094 } 095 096 /** 097 * Constructs a Variance based on an external second moment. 098 * 099 * @param m2 the SecondMoment (Third or Fourth moments work 100 * here as well.) 101 */ 102 public Variance(final SecondMoment m2) { 103 incMoment = false; 104 this.moment = m2; 105 } 106 107 /** 108 * Constructs a Variance with the specified <code>isBiasCorrected</code> 109 * property 110 * 111 * @param isBiasCorrected setting for bias correction - true means 112 * bias will be corrected and is equivalent to using the argumentless 113 * constructor 114 */ 115 public Variance(boolean isBiasCorrected) { 116 moment = new SecondMoment(); 117 this.isBiasCorrected = isBiasCorrected; 118 } 119 120 /** 121 * Constructs a Variance with the specified <code>isBiasCorrected</code> 122 * property and the supplied external second moment. 123 * 124 * @param isBiasCorrected setting for bias correction - true means 125 * bias will be corrected 126 * @param m2 the SecondMoment (Third or Fourth moments work 127 * here as well.) 128 */ 129 public Variance(boolean isBiasCorrected, SecondMoment m2) { 130 incMoment = false; 131 this.moment = m2; 132 this.isBiasCorrected = isBiasCorrected; 133 } 134 135 /** 136 * Copy constructor, creates a new {@code Variance} identical 137 * to the {@code original} 138 * 139 * @param original the {@code Variance} instance to copy 140 */ 141 public Variance(Variance original) { 142 copy(original, this); 143 } 144 145 /** 146 * {@inheritDoc} 147 * <p>If all values are available, it is more accurate to use 148 * {@link #evaluate(double[])} rather than adding values one at a time 149 * using this method and then executing {@link #getResult}, since 150 * <code>evaluate</code> leverages the fact that is has the full 151 * list of values together to execute a two-pass algorithm. 152 * See {@link Variance}.</p> 153 */ 154 @Override 155 public void increment(final double d) { 156 if (incMoment) { 157 moment.increment(d); 158 } 159 } 160 161 /** 162 * {@inheritDoc} 163 */ 164 @Override 165 public double getResult() { 166 if (moment.n == 0) { 167 return Double.NaN; 168 } else if (moment.n == 1) { 169 return 0d; 170 } else { 171 if (isBiasCorrected) { 172 return moment.m2 / (moment.n - 1d); 173 } else { 174 return moment.m2 / (moment.n); 175 } 176 } 177 } 178 179 /** 180 * {@inheritDoc} 181 */ 182 public long getN() { 183 return moment.getN(); 184 } 185 186 /** 187 * {@inheritDoc} 188 */ 189 @Override 190 public void clear() { 191 if (incMoment) { 192 moment.clear(); 193 } 194 } 195 196 /** 197 * Returns the variance of the entries in the input array, or 198 * <code>Double.NaN</code> if the array is empty. 199 * <p> 200 * See {@link Variance} for details on the computing algorithm.</p> 201 * <p> 202 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 203 * <p> 204 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 205 * <p> 206 * Does not change the internal state of the statistic.</p> 207 * 208 * @param values the input array 209 * @return the variance of the values or Double.NaN if length = 0 210 * @throws IllegalArgumentException if the array is null 211 */ 212 @Override 213 public double evaluate(final double[] values) { 214 if (values == null) { 215 throw MathRuntimeException.createIllegalArgumentException("input values array is null"); 216 } 217 return evaluate(values, 0, values.length); 218 } 219 220 /** 221 * Returns the variance of the entries in the specified portion of 222 * the input array, or <code>Double.NaN</code> if the designated subarray 223 * is empty. 224 * <p> 225 * See {@link Variance} for details on the computing algorithm.</p> 226 * <p> 227 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 228 * <p> 229 * Does not change the internal state of the statistic.</p> 230 * <p> 231 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 232 * 233 * @param values the input array 234 * @param begin index of the first array element to include 235 * @param length the number of elements to include 236 * @return the variance of the values or Double.NaN if length = 0 237 * @throws IllegalArgumentException if the array is null or the array index 238 * parameters are not valid 239 */ 240 @Override 241 public double evaluate(final double[] values, final int begin, final int length) { 242 243 double var = Double.NaN; 244 245 if (test(values, begin, length)) { 246 clear(); 247 if (length == 1) { 248 var = 0.0; 249 } else if (length > 1) { 250 Mean mean = new Mean(); 251 double m = mean.evaluate(values, begin, length); 252 var = evaluate(values, m, begin, length); 253 } 254 } 255 return var; 256 } 257 258 /** 259 * Returns the variance of the entries in the specified portion of 260 * the input array, using the precomputed mean value. Returns 261 * <code>Double.NaN</code> if the designated subarray is empty. 262 * <p> 263 * See {@link Variance} for details on the computing algorithm.</p> 264 * <p> 265 * The formula used assumes that the supplied mean value is the arithmetic 266 * mean of the sample data, not a known population parameter. This method 267 * is supplied only to save computation when the mean has already been 268 * computed.</p> 269 * <p> 270 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 271 * <p> 272 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 273 * <p> 274 * Does not change the internal state of the statistic.</p> 275 * 276 * @param values the input array 277 * @param mean the precomputed mean value 278 * @param begin index of the first array element to include 279 * @param length the number of elements to include 280 * @return the variance of the values or Double.NaN if length = 0 281 * @throws IllegalArgumentException if the array is null or the array index 282 * parameters are not valid 283 */ 284 public double evaluate(final double[] values, final double mean, 285 final int begin, final int length) { 286 287 double var = Double.NaN; 288 289 if (test(values, begin, length)) { 290 if (length == 1) { 291 var = 0.0; 292 } else if (length > 1) { 293 double accum = 0.0; 294 double dev = 0.0; 295 double accum2 = 0.0; 296 for (int i = begin; i < begin + length; i++) { 297 dev = values[i] - mean; 298 accum += dev * dev; 299 accum2 += dev; 300 } 301 double len = length; 302 if (isBiasCorrected) { 303 var = (accum - (accum2 * accum2 / len)) / (len - 1.0); 304 } else { 305 var = (accum - (accum2 * accum2 / len)) / len; 306 } 307 } 308 } 309 return var; 310 } 311 312 /** 313 * Returns the variance of the entries in the input array, using the 314 * precomputed mean value. Returns <code>Double.NaN</code> if the array 315 * is empty. 316 * <p> 317 * See {@link Variance} for details on the computing algorithm.</p> 318 * <p> 319 * If <code>isBiasCorrected</code> is <code>true</code> the formula used 320 * assumes that the supplied mean value is the arithmetic mean of the 321 * sample data, not a known population parameter. If the mean is a known 322 * population parameter, or if the "population" version of the variance is 323 * desired, set <code>isBiasCorrected</code> to <code>false</code> before 324 * invoking this method.</p> 325 * <p> 326 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 327 * <p> 328 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 329 * <p> 330 * Does not change the internal state of the statistic.</p> 331 * 332 * @param values the input array 333 * @param mean the precomputed mean value 334 * @return the variance of the values or Double.NaN if the array is empty 335 * @throws IllegalArgumentException if the array is null 336 */ 337 public double evaluate(final double[] values, final double mean) { 338 return evaluate(values, mean, 0, values.length); 339 } 340 341 /** 342 * @return Returns the isBiasCorrected. 343 */ 344 public boolean isBiasCorrected() { 345 return isBiasCorrected; 346 } 347 348 /** 349 * @param isBiasCorrected The isBiasCorrected to set. 350 */ 351 public void setBiasCorrected(boolean isBiasCorrected) { 352 this.isBiasCorrected = isBiasCorrected; 353 } 354 355 /** 356 * {@inheritDoc} 357 */ 358 @Override 359 public Variance copy() { 360 Variance result = new Variance(); 361 copy(this, result); 362 return result; 363 } 364 365 366 /** 367 * Copies source to dest. 368 * <p>Neither source nor dest can be null.</p> 369 * 370 * @param source Variance to copy 371 * @param dest Variance to copy to 372 * @throws NullPointerException if either source or dest is null 373 */ 374 public static void copy(Variance source, Variance dest) { 375 dest.moment = source.moment.copy(); 376 dest.isBiasCorrected = source.isBiasCorrected; 377 dest.incMoment = source.incMoment; 378 } 379 380 }