1 /* 2 ------------------------------------------------------------------- 3 4 Copyright (C) 2014, Edwin van Leeuwen 5 6 This file is part of plotd plotting library. 7 8 Plotd is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 Plotd is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with Plotd. If not, see <http://www.gnu.org/licenses/>. 20 21 ------------------------------------------------------------------- 22 */ 23 24 module plotd.binning; 25 26 import std.array; 27 import std.conv : to; 28 import std.range; 29 30 import plotd.primitives : Bounds; 31 32 version( unittest ) { 33 import std.algorithm; 34 import std.stdio; 35 } 36 37 /** 38 The struct Bins is a container holding binned data 39 */ 40 struct Bins(T) { 41 double min; 42 double width; 43 44 @property double max() { 45 return min + width*(mybins.length); 46 } 47 48 /// How many bins does the container have 49 @property size_t length() { 50 return mybins.length; 51 } 52 53 /// Set length/number of bins 54 @property void length( size_t noBins ) { 55 mybins.length = noBins; 56 } 57 58 /// save the container position 59 @property Bins!T save() { return this; } 60 61 /// Access bin by index 62 ref T opIndex(size_t index) 63 { 64 return mybins[index]; 65 } 66 67 int opApply(int delegate( ref T ) dg) 68 { 69 int result; 70 71 double x = min; 72 foreach ( el; mybins ) 73 { 74 result = dg( el ); 75 if (result) 76 break; 77 78 x += width; 79 } 80 return result; 81 } 82 83 int opApply(int delegate( double, ref T ) dg) 84 { 85 int result; 86 87 double x = min; 88 foreach ( el; mybins ) 89 { 90 result = dg( x, el ); 91 if (result) 92 break; 93 94 x += width; 95 } 96 return result; 97 } 98 99 private: 100 T[] mybins; 101 } 102 103 /// For loop over Bins 104 unittest { 105 Bins!size_t bins; 106 bins.min = -1; 107 bins.width = 0.5; 108 bins.mybins = [1,2,3,4]; 109 110 size_t correct_el = 1; 111 foreach ( el; bins ) { 112 assert( correct_el == el ); 113 correct_el++; 114 } 115 116 double correct_x = bins.min; 117 correct_el = 1; 118 foreach ( x, el; bins ) { 119 assert( correct_x == x ); 120 assert( correct_el == el ); 121 correct_x += bins.width; 122 correct_el++; 123 } 124 } 125 126 /// Number of Bins 127 unittest { 128 Bins!size_t bins; 129 bins.min = -1; 130 bins.width = 0.5; 131 bins.length = 3; 132 assert( bins.length == 3 ); 133 assert( equal( bins.mybins, [0,0,0] ) ); 134 } 135 136 /** 137 Calculate bin id based on data value and Bins 138 139 TODO implement this for multidimensional bins 140 */ 141 size_t binId(T)( in Bins!T bins, double data ) { 142 assert( data >= bins.min ); 143 return cast(size_t)( (data-bins.min)/bins.width ); 144 } 145 146 unittest { 147 Bins!size_t bins; 148 bins.min = -1; 149 bins.width = 0.5; 150 assert( binId!size_t( bins, -1.0 ) == 0 ); 151 assert( binId( bins, -0.5 ) == 1 ); 152 assert( bins.binId( -0.25 ) == 1 ); 153 } 154 155 /** 156 Add data to the given bin id 157 158 bin_ids is an array in case of multidimensional bins 159 160 Ignore values that fall outside of the existing bin range 161 */ 162 Bins!T addDataToBin( T )( Bins!T bins, const size_t[] binIds ) 163 { 164 static if (__traits( compiles, 165 bins[binIds[0]].addDataToBin( binIds[1..$] ) ) ) { 166 if (binIds[0] < bins.length) { 167 bins[binIds[0]].addDataToBin( binIds[1..$] ); 168 } 169 } 170 else { 171 if (binIds[0] < bins.length) { 172 bins[binIds[0]]++; 173 } 174 } 175 return bins; 176 } 177 178 unittest { 179 Bins!size_t bins; 180 bins.min = -1; 181 bins.width = 0.5; 182 bins.mybins = [1,2,3,4]; 183 184 bins.addDataToBin( [1] ); 185 assert( bins.mybins[1] == 3 ); 186 bins.addDataToBin( [3] ); 187 assert( bins.mybins[3] == 5 ); 188 189 Bins!(Bins!size_t) mbins; 190 mbins.min = -1; 191 mbins.width = 0.5; 192 mbins.mybins = [bins, bins]; 193 mbins.addDataToBin( [1,2] ); 194 assert( mbins.mybins[1].mybins[2] == 4 ); 195 mbins.addDataToBin( [1,3] ); 196 assert( mbins.mybins[1].mybins[3] == 6 ); 197 } 198 199 /** 200 Calculate bounds that will at least cover the given percentage of data 201 */ 202 Bounds optimalBounds( T )( Bins!T bins, double coverage = 0.95 ) { 203 Bounds bounds; 204 bounds.min_y = 0; 205 size_t maxCount = 0; 206 size_t maxID = 0; 207 double[2] sides = [0,0]; 208 double sum = 0; 209 foreach( i; 0..bins.length ) { 210 auto count = bins.mybins[i]; 211 sum += count; 212 if (count > maxCount) { 213 maxCount = count; 214 maxID = i; 215 sides[0] += sides[1]; 216 sides[1] = maxCount; 217 } else if (sides[1] > 0) 218 sides [1] += count; 219 else 220 sides[0] += count; 221 } 222 223 size_t[2] sideIDs = [maxID,maxID+1]; 224 sides[1] -= maxCount; 225 double covered = (maxCount.to!double)/sum; 226 while (covered < coverage) { 227 if (sides[0] > sides[1]) { 228 --sideIDs[0]; 229 sides[0] -= bins.mybins[sideIDs[0]]; 230 covered += bins.mybins[sideIDs[0]].to!double/sum; 231 } else { 232 sides[1] -= bins.mybins[sideIDs[1]]; 233 covered += bins.mybins[sideIDs[1]].to!double/sum; 234 ++sideIDs[1]; 235 } 236 } 237 bounds.min_x = bins.min + sideIDs[0]*bins.width; 238 bounds.max_x = bins.min + sideIDs[1]*bins.width; 239 bounds.max_y = 1.5*maxCount; 240 return bounds; 241 } 242 243 unittest { 244 Bins!size_t bins; 245 bins.min = -1; 246 bins.width = 0.5; 247 248 bins.mybins = [1,3,4,1]; 249 auto bounds = bins.optimalBounds; 250 assert( bounds == Bounds( -1, 1, 0, 6 ) ); 251 252 bins.mybins = [0,0,4,0]; 253 bounds = bins.optimalBounds; 254 assert( bounds == Bounds( 0, 0.5, 0, 6 ) ); 255 256 bins.mybins = [2,0,4,0]; 257 bounds = bins.optimalBounds; 258 assert( bounds == Bounds( -1, 0.5, 0, 6 ) ); 259 } 260 261 /** 262 Resize the bins 263 264 Again an array in case of resizing multidmensional arrays 265 */ 266 /*Bins!T resize( T )( Bins!T bins, const size_t[] new_length ) { 267 T default_value; 268 assert( bins.mybins.length > 0, 269 "Multidimensional need to have at least one bin to correctly resize" ); 270 default_value = new T; 271 default_value.min = bins.mybins[0].min; 272 default_value.width = bins.mybins[0].width; 273 while ( bins.mybins.length < new_length[0] ) 274 bins.mybins ~= [default_value]; 275 276 if ( new_length.length > 1 ) 277 foreach ( ref T bin; bins.mybins ) 278 bin.resize( new_length[1..$] ); 279 280 return bins; 281 } 282 283 Bins!T resize( T : size_t )( Bins!T bins, const size_t[] new_length ) { 284 T default_value = 0; 285 286 while ( bins.mybins.length < new_length[0] ) 287 bins.mybins ~= [default_value]; 288 return bins; 289 } 290 291 unittest { 292 auto bins = new Bins!size_t; 293 bins.min = -1; 294 bins.width = 0.5; 295 bins.mybins = [1,2,3,4]; 296 297 bins.resize( [3] ); 298 assert( bins.length == 4 ); 299 bins.resize( [6] ); 300 assert( bins.length == 6 ); 301 302 auto mbins = new Bins!(Bins!size_t); 303 mbins.min = -1; 304 mbins.width = 0.5; 305 mbins.mybins = [bins, bins.dup]; 306 mbins.resize( [6] ); 307 assert( mbins.length == 6 ); 308 assert( mbins.mybins[5].min == bins.min ); 309 assert( mbins.mybins[5].width == bins.width ); 310 mbins.resize( [7,8] ); 311 assert( mbins.length == 7 ); 312 313 foreach( x, bin; mbins ) 314 assert( bin.length == 8 ); 315 }*/ 316 317 318 /** 319 Calculate bin id based on data value and Bins 320 */ 321 /*size_t bin_id(T)( const Bins!T bins, double data ) { 322 assert( data >= bins.min ); 323 return cast(size_t)( (data-bins.min)/bins.width ); 324 } 325 unittest { 326 auto bins = new Bins!size_t; 327 bins.min = -1; 328 bins.width = 0.5; 329 assert( bin_id( bins, -1 ) == 0 ); 330 assert( bin_id( bins, -0.5 ) == 1 ); 331 assert( bin_id( bins, -0.25 ) == 1 ); 332 }*/ 333 /** 334 Add data to existing Bins 335 */ 336 /*Bins!T addData( T )( Bins!T bins, const double[] data ) { 337 size_t[] ids; 338 auto bins_step = bins; 339 for ( size_t i = 0; i < data.length; i++ ) { 340 ids ~= bin_id( bins_step, data[i] ); 341 if (i < data.length -1) 342 bins_step = bins_step.mybins[0]; 343 } 344 return addDataToBin( bins, ids.reverse ); 345 } 346 347 Bins!T addData( T : size_t )( Bins!T bins, const double[] data ) { 348 return addDataToBin( bins, [bin_id( bins, data[0] )] ); 349 } 350 351 unittest { 352 auto bins = new Bins!size_t; 353 bins.min = -1; 354 bins.width = 0.5; 355 bins.max_size = 4; 356 bins.mybins = [1,2,3,4]; 357 358 bins = bins.addData( [-0.25] ); 359 assert( bins.mybins[1] == 3 ); 360 bins = addData( bins, [0.75] ); 361 assert( bins.mybins[3] == 5 ); 362 assert( bins.max_size == 5 ); 363 364 auto mbins = new Bins!(Bins!size_t); 365 mbins.min = -1; 366 mbins.width = 0.5; 367 mbins.mybins = [bins, bins.dup]; 368 mbins.max_size = 5; 369 mbins.addDataToBin( [1,2] ); 370 assert( mbins.mybins[1].mybins[2] == 4 ); 371 mbins.addDataToBin( [1,3] ); 372 assert( mbins.mybins[1].mybins[3] == 6 ); 373 assert( mbins.max_size == 6 ); 374 }*/