Arduino Playground is read-only starting December 31st, 2018. For more info please look at this Forum Post

QuickStats Library - For Descriptive Statistics on Float Arrays

QuickStats is a library that provides simple descriptive statistics and linear regression (slope and intercept) for elements in float arrays, in Arduino.

Link to library: https://github.com/dndubins/QuickStats

I developed this library to help quickly accomplish median and mode filtering when collecting sensor data. Functions in this library operate on an array of float variables, of dimension "m", and return the corresponding statistic. This library was originally created for a data smoothing strategy for float variables. Using a median or mode filtering strategy (opposed to mean filtering) is better at removing spikes from aberrant readings. The other functions (stdev, CV, etc.) were included for fun.

A bubble sort algorithm is also contained in this library which was necessary to calculate median and mode.

The functions available in the library include:

    average(samples[],m); // the average of elements in samples[m]
    g_average(samples[],m); // the geometric mean of elements in samples[m]
    minimum(samples[],m); // the minimum value in samples[m]
    maximum(samples[],m); // the maximum value in samples[m]
    stdev(samples[],m); // the sample standard deviation of elements in samples[m]
    stderror(samples[],m); // the standard error of elements in samples[m] 
    CV(samples[],m); // the coefficient of variation in samples[m] in percent
    bubbleSort(samples[],m); // sorting algorithm to arrange the elements in samples[m]
    fabs(sample); // absolute value of a float, used in mode()
    median(samples[],m); // the median of elements in samples[m]
    mode(samples[],m,epsilon); // the mode of elements in samples[m] (returns 0 if no mode)
    slope(x[],samples[],m); // slope of linear regression dsamples/dx
    intercept(x[],samples[],m);// intercept of linear regression dsamples/dx
    filternan(float samples[],int &m); // filter nan and inf values out of a 1-dimensional array (destructive void function)
    f_round(float samples[],int m,int p); // overwrites samples[m] with values rounded to p decimals

  1. /* // Example program for use with QuickStats.h
  2.  
  3. #include "QuickStats.h"
  4.  
  5. int numreadings = 9;
  6. float readings[]={1.0,2.2,4.8,3.3,6.1,2.2,3.8,7.0,2.2};
  7.  
  8. QuickStats stats; //initialize an instance of this class
  9. Serial.begin(9600);
  10. Serial.println("Descriptive Statistics");
  11. Serial.print("Average: ");
  12. Serial.println(stats.average(readings,numreadings));
  13. Serial.print("Geometric mean: ");
  14. Serial.println(stats.g_average(readings,numreadings));
  15. Serial.print("Minimum: ");
  16. Serial.println(stats.minimum(readings,numreadings));
  17. Serial.print("Maximum: ");
  18. Serial.println(stats.maximum(readings,numreadings));
  19. Serial.print("Standard Deviation: ");
  20. Serial.println(stats.stdev(readings,numreadings));
  21. Serial.print("Standard Error: ");
  22. Serial.println(stats.stderror(readings,numreadings));
  23. Serial.print("Coefficient of Variation (%): ");
  24. Serial.println(stats.CV(readings,numreadings));
  25. Serial.print("Median: ");
  26. Serial.println(stats.median(readings,numreadings));
  27. Serial.print("Mode: ");
  28. Serial.println(stats.mode(readings,numreadings,0.00001));
  29. }
  30. //END OF FILE

Notes

To use the library, make a folder in your SKETCHBOOKPATH\libraries with the name QuickStats and put the .h and .cpp there.

To do

Looking at creating a version for integers.

QuickStats.h file

QuickStats.h:

  1. /* QuickStats.h - Library for quick descriptive statistics of an array samples[] of size m,
  2. * assuming a normal distribution.
  3. * Created by David Dubins, January 10th, 2016.
  4. * Released into the public domain.
  5. */
  6.  
  7. #ifndef QuickStats_h
  8. #define QuickStats_h
  9.  
  10. #include <Arduino.h>
  11.  
  12. class QuickStats {
  13. public:
  14. QuickStats();
  15. ~QuickStats();
  16. float average(float samples[],int m);
  17. float g_average(float samples[],int m);
  18. float minimum(float samples[],int m);
  19. float maximum(float samples[],int m);
  20. float stdev(float samples[],int m);
  21. float stderror(float samples[],int m);
  22. float CV(float samples[],int m);
  23. void bubbleSort(float A[],int len);
  24. float median(float samples[],int m);
  25. float mode(float samples[],int m,float epsilon);
  26. float slope(float x[],float samples[],int m);
  27. float intercept(float x[],float samples[],int m);
  28. void filternan(float samples[],int &m);
  29. };
  30.  
  31. #endif
  32. //END OF FILE

QuickStats.cpp

  1. /* QuickStats.cpp - Library for quick descriptive statistics of an array samples[] of size m
  2. * Created by David Dubins, January 10th, 2016.
  3. * Released into the public domain.
  4. * Requires Arduino 1.6.6 or greater.
  5. * https://pb860.pbworks.com
  6. */
  7.  
  8. #include "Arduino.h"
  9. #include "QuickStats.h"
  10. #include <math.h>
  11.  
  12. QuickStats::QuickStats(){/*nothing to construct*/}
  13. QuickStats::~QuickStats(){/*nothing to destruct*/}
  14.  
  15. float QuickStats::average(float samples[],int m)
  16. {
  17. float total1=0.0;
  18. for(int i=0;i<m;i++){
  19. total1=total1+samples[i];
  20. }
  21. return total1/(float)m;
  22. }
  23.  
  24. float QuickStats::g_average(float samples[],int m)
  25. {
  26. float total1=0.0;
  27. for(int i=0;i<m;i++){
  28. total1=total1+log(samples[i]);
  29. }
  30. return exp(total1/(float)m);
  31. }
  32.  
  33. float QuickStats::minimum(float samples[],int m)
  34. {
  35. float sorted[m]; //Define and initialize sorted array
  36. for(int i=0;i<m;i++){
  37. sorted[i]=samples[i];
  38. }
  39. bubbleSort(sorted,m); // Sort the values
  40. return(sorted[0]); // first element is the minimum
  41. }
  42.  
  43. float QuickStats::maximum(float samples[],int m)
  44. {
  45. float sorted[m]; //Define and initialize sorted array
  46. for(int i=0;i<m;i++){
  47. sorted[i]=samples[i];
  48. }
  49. bubbleSort(sorted,m); // Sort the values
  50. return(sorted[m-1]); // last element is the maximum
  51. }
  52.  
  53. float QuickStats::stdev(float samples[],int m)
  54. {
  55. float avg=0.0;
  56. float total2=0.0;
  57. avg=average(samples,m);
  58. for(int i=0;i<m;i++){
  59. total2 = total2 + pow(samples[i] - avg,2);
  60. }
  61. return sqrt(total2/((float)m-1.0));
  62. }
  63.  
  64. float QuickStats::stderror(float samples[],int m)
  65. {
  66. float temp1=0.0;
  67. temp1=stdev(samples,m);
  68. return (temp1/sqrt((float)m));
  69. }
  70.  
  71. float QuickStats::CV(float samples[],int m) //Coefficient of variation (%RSD, or relative stdev)
  72. {
  73. float avg=0.0;
  74. float sd=0.0;
  75. avg=average(samples,m);
  76. sd=stdev(samples,m);
  77. return 100.0*sd/avg;
  78. }
  79.  
  80. void QuickStats::bubbleSort(float A[],int len)
  81. {
  82. unsigned long newn;
  83. unsigned long n=len;
  84. float temp=0.0;
  85. do {
  86. newn=1;
  87. for(int p=1;p<len;p++){
  88. if(A[p-1]>A[p]){
  89. temp=A[p]; //swap places in array
  90. A[p]=A[p-1];
  91. A[p-1]=temp;
  92. newn=p;
  93. } //end if
  94. } //end for
  95. n=newn;
  96. } while(n>1);
  97. }
  98.  
  99. float QuickStats::fabs(float sample) // calculate absolute value
  100. {
  101. if(sample<0.f){
  102. return -sample;
  103. }else{
  104. return sample;
  105. }
  106. }
  107.  
  108. float QuickStats::median(float samples[],int m) //calculate the median
  109. {
  110. //First bubble sort the values: https://en.wikipedia.org/wiki/Bubble_sort
  111. float sorted[m]; //Define and initialize sorted array.
  112. float temp=0.0; //Temporary float for swapping elements
  113. /*Serial.println("Before:");
  114. for(int j=0;j<m;j++){
  115. Serial.println(samples[j]);
  116. }*/
  117. for(int i=0;i<m;i++){
  118. sorted[i]=samples[i];
  119. }
  120. bubbleSort(sorted,m); // Sort the values
  121. /*Serial.println("After:");
  122. for(int i=0;i<m;i++){
  123. Serial.println(sorted[i]);
  124. }*/
  125. if (bitRead(m,0)==1) { //If the last bit of a number is 1, it's odd. This is equivalent to "TRUE". Also use if m%2!=0.
  126. return sorted[m/2]; //If the number of data points is odd, return middle number.
  127. } else {
  128. return (sorted[(m/2)-1]+sorted[m/2])/2; //If the number of data points is even, return avg of the middle two numbers.
  129. }
  130. }
  131.  
  132. float QuickStats::mode(float samples[],int m,float epsilon) //calculate the mode.
  133. //epsilon is the tolerance for two measurements to be equivalent.
  134. {
  135. //First bubble sort the values: https://en.wikipedia.org/wiki/Bubble_sort
  136. float sorted[m]; //Temporary array to sort values.
  137. float temp=0; //Temporary float for swapping elements
  138. float unique[m]; //Temporary array to store unique values
  139. int uniquect[m]; //Temporary array to store unique counts
  140. /*Serial.println("Before:");
  141. for(int i=0;i<m;i++){
  142. Serial.println(samples[i]);
  143. }*/
  144. for(int i=0;i<m;i++){
  145. sorted[i]=samples[i];
  146. }
  147. bubbleSort(sorted,m); // Sort the values
  148. /*Serial.println("Sorted:");
  149. for(int i=0;i<m;i++){
  150. Serial.println(sorted[i]);
  151. }*/
  152. // Now count the number of times each unique number appears in the sorted array.
  153. unique[0]=sorted[0];
  154. uniquect[0]=1;
  155. int p=0; // counter for # unique numbers
  156. int maxp=0;
  157. int maxidx=0;
  158. for(int i=1;i<m;i++){
  159. if(fabs(sorted[i]-sorted[p])<epsilon){
  160. uniquect[p]++; //if same number again, add to count
  161. if(uniquect[p]>maxp){
  162. maxp=uniquect[p];
  163. maxidx=p;
  164. }
  165. } else {
  166. p++;
  167. unique[p]=sorted[i];
  168. uniquect[p]=1;
  169. }
  170. }
  171. /*for(int i=0;i<p+1;i++){
  172. Serial.println("Num: " + (String)unique[i] +" Count: " + (String)uniquect[i]);
  173. }*/
  174. if (maxp>1) {
  175. return unique[maxidx]; //If there is more than one mode, return the lowest one.
  176. } else {
  177. return 0.0; //If there is no mode, return a zero.
  178. }
  179. }
  180.  
  181. float QuickStats::slope(float x[],float samples[],int m) //calculate the slope (dsamples/dx)
  182. {
  183. float xavg=average(x,m);
  184. float yavg=average(samples,m);
  185. float numerator = 0.0;
  186. float denominator = 0.0;
  187. for(int i=0;i<m;i++){
  188. if(x[i]-xavg!=0.0){ // protect against dividing by zero
  189. numerator = numerator + (x[i]-xavg)*(samples[i]-yavg);
  190. denominator = denominator + ((x[i]-xavg)*(x[i]-xavg));
  191. }
  192. }
  193. return numerator/denominator;
  194. }
  195.  
  196. float QuickStats::intercept(float x[],float samples[],int m) //calculate the intercept (dsamples/dx)
  197. {
  198. float xavg=average(x,m);
  199. float yavg=average(samples,m);
  200. float beta=slope(x,samples,m);
  201. return yavg-(beta*xavg);
  202. }
  203.  
  204. void QuickStats::filternan(float samples[],int &m) //removes nan values and returns size of filtered matrix (destructive)
  205. {
  206. int duds=0; //keep track of #nans
  207. int nums=0; //keep track of numbers
  208. float filtered[m];
  209. for(int i=0;i<m;i++){
  210. if(isnan(samples[i])||isinf(samples[i])){
  211. duds++; // found a nan
  212. }else{
  213. filtered[nums]=samples[i];
  214. nums++; // found a number
  215. }
  216. }
  217. for(int i=0;i<nums;i++){
  218. samples[i]=filtered[i]; //overwrite sample matrix with filtered matrix
  219. }
  220. m=nums; //overwrite matrix size
  221. }
  222.  
  223. void QuickStats::f_round(float samples[], int m, int p) //round float variable to a given # decimals, p
  224. {
  225. float precision=pow(10.0,p);
  226. for(int i=0;i<m;i++){
  227. samples[i]=round(samples[i]*precision)/precision;
  228. }
  229. }
  230.  
  231. //END OF FILE