1 /*
2 	 -------------------------------------------------------------------
3 
4 	 Copyright (C) 2014, Edwin van Leeuwen
5 
6 	 This file is part of plotd plotting library.
7 
8 	 Plotd is free software; you can redistribute it and/or modify
9 	 it under the terms of the GNU General Public License as published by
10 	 the Free Software Foundation; either version 3 of the License, or
11 	 (at your option) any later version.
12 
13 	 Plotd is distributed in the hope that it will be useful,
14 	 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 	 GNU General Public License for more details.
17 
18 	 You should have received a copy of the GNU General Public License
19 	 along with Plotd. If not, see <http://www.gnu.org/licenses/>.
20 
21 	 -------------------------------------------------------------------
22 	 */
23 
24 module plotd.binning;
25 
26 import std.array;
27 import std.conv : to;
28 import std.range;
29 
30 import plotd.primitives : Bounds;
31 
32 version( unittest ) {
33 	import std.algorithm;
34 	import std.stdio;
35 }
36 
37 /**
38   The struct Bins is a container holding binned data
39  */
40 struct Bins(T) {
41     double min;
42     double width;
43 
44 		 @property double max() {
45 			return min + width*(mybins.length);
46 		}
47 
48     /// How many bins does the container have
49     @property size_t length() {
50         return mybins.length;
51     }
52 	
53 		/// Set length/number of bins
54     @property void length( size_t noBins ) {
55  			mybins.length = noBins;
56     }
57 
58     /// save the container position
59     @property Bins!T save() { return this; }
60 
61     /// Access bin by index
62     ref T opIndex(size_t index)
63     {
64         return mybins[index];
65     }
66 
67     int opApply(int delegate( ref T ) dg)
68     {
69         int result;
70 
71         double x = min;
72         foreach ( el; mybins )
73         {
74             result = dg( el );
75             if (result)
76                 break;
77 
78             x += width;
79         }
80         return result;
81     }
82     
83     int opApply(int delegate( double, ref T ) dg)
84     {
85         int result;
86 
87         double x = min;
88         foreach ( el; mybins )
89         {
90             result = dg( x, el );
91             if (result)
92                 break;
93 
94             x += width;
95         }
96         return result;
97     }
98 
99     private:
100         T[] mybins;
101 }
102 
103 /// For loop over Bins
104 unittest {
105     Bins!size_t bins;
106     bins.min = -1;
107     bins.width = 0.5;
108     bins.mybins = [1,2,3,4];
109 
110     size_t correct_el = 1;
111     foreach ( el; bins ) {
112         assert( correct_el == el );
113         correct_el++;
114     }
115 
116     double correct_x = bins.min;
117     correct_el = 1;
118     foreach ( x, el; bins ) {
119         assert( correct_x == x );
120         assert( correct_el == el );
121         correct_x += bins.width;
122         correct_el++;
123     }
124 }
125 
126 /// Number of Bins
127 unittest {
128     Bins!size_t bins;
129     bins.min = -1;
130     bins.width = 0.5;
131 		bins.length = 3;
132 		assert( bins.length == 3 );
133 		assert( equal( bins.mybins, [0,0,0] ) );
134 }
135  
136 /**
137   Calculate bin id based on data value and Bins
138 
139   TODO implement this for multidimensional bins
140   */
141 size_t binId(T)( in Bins!T bins, double data ) {
142     assert( data >= bins.min );
143     return cast(size_t)( (data-bins.min)/bins.width );
144 }
145 
146 unittest {
147     Bins!size_t bins;
148     bins.min = -1;
149     bins.width = 0.5;
150     assert( binId!size_t( bins, -1.0 ) == 0 );
151     assert( binId( bins, -0.5 ) == 1 );
152     assert( bins.binId( -0.25 ) == 1 );
153 }
154 
155 /**
156   Add data to the given bin id
157 
158   bin_ids is an array in case of multidimensional bins
159 
160   Ignore values that fall outside of the existing bin range
161  */
162 Bins!T addDataToBin( T )( Bins!T bins, const size_t[] binIds ) 
163 {
164     static if (__traits( compiles, 
165                 bins[binIds[0]].addDataToBin( binIds[1..$] ) ) ) {
166         if (binIds[0] < bins.length) {
167             bins[binIds[0]].addDataToBin( binIds[1..$] );
168         }
169     }
170     else {
171         if (binIds[0] < bins.length) {
172             bins[binIds[0]]++;
173         }
174     }
175     return bins;
176 }
177 
178 unittest {
179     Bins!size_t bins;
180     bins.min = -1;
181     bins.width = 0.5;
182     bins.mybins = [1,2,3,4];
183 
184     bins.addDataToBin( [1] );
185     assert( bins.mybins[1] == 3 );
186     bins.addDataToBin( [3] );
187     assert( bins.mybins[3] == 5 );
188 
189     Bins!(Bins!size_t) mbins;
190     mbins.min = -1;
191     mbins.width = 0.5;
192     mbins.mybins = [bins, bins];
193     mbins.addDataToBin( [1,2] );
194     assert( mbins.mybins[1].mybins[2] == 4 );
195     mbins.addDataToBin( [1,3] );
196     assert( mbins.mybins[1].mybins[3] == 6 );
197 }
198 
199 /**
200 	Calculate bounds that will at least cover the given percentage of data
201 	*/
202 Bounds optimalBounds( T )( Bins!T bins, double coverage = 0.95 ) {
203 	Bounds bounds;
204 	bounds.min_y = 0;
205 	size_t maxCount = 0;
206 	size_t maxID = 0;
207 	double[2] sides = [0,0];
208 	double sum = 0;
209 	foreach( i; 0..bins.length ) {
210 		auto count = bins.mybins[i];
211 		sum += count;
212 		if (count > maxCount) {
213 			maxCount = count;
214 			maxID = i;
215 			sides[0] += sides[1];
216 			sides[1] = maxCount;
217 		} else if (sides[1] > 0)
218 			sides [1] += count;
219 		else
220 			sides[0] += count;
221 	}
222 
223 	size_t[2] sideIDs = [maxID,maxID+1];
224 	sides[1] -= maxCount;
225 	double covered = (maxCount.to!double)/sum;
226 	while (covered < coverage) {
227 		if (sides[0] > sides[1]) {
228 			--sideIDs[0];
229 			sides[0] -= bins.mybins[sideIDs[0]];
230 			covered += bins.mybins[sideIDs[0]].to!double/sum;
231 		} else {
232 			sides[1] -= bins.mybins[sideIDs[1]];
233 			covered += bins.mybins[sideIDs[1]].to!double/sum;
234 			++sideIDs[1];
235 		}
236 	}
237 	bounds.min_x = bins.min + sideIDs[0]*bins.width;
238 	bounds.max_x = bins.min + sideIDs[1]*bins.width;
239 	bounds.max_y = 1.5*maxCount;
240 	return bounds;
241 }
242 
243 unittest {
244 	Bins!size_t bins;
245 	bins.min = -1;
246 	bins.width = 0.5;
247 	
248 	bins.mybins = [1,3,4,1];
249 	auto bounds = bins.optimalBounds;
250 	assert( bounds == Bounds( -1, 1, 0, 6 ) );
251 
252 	bins.mybins = [0,0,4,0];
253 	bounds = bins.optimalBounds;
254 	assert( bounds == Bounds( 0, 0.5, 0, 6 ) );
255 
256 	bins.mybins = [2,0,4,0];
257 	bounds = bins.optimalBounds;
258 	assert( bounds == Bounds( -1, 0.5, 0, 6 ) );
259 }
260 
261 /**
262   Resize the bins
263 
264   Again an array in case of resizing multidmensional arrays
265  */
266 /*Bins!T resize( T )( Bins!T bins, const size_t[] new_length ) {
267     T default_value;
268     assert( bins.mybins.length > 0, 
269             "Multidimensional need to have at least one bin to correctly resize" );
270     default_value = new T;
271     default_value.min = bins.mybins[0].min;
272     default_value.width = bins.mybins[0].width;
273     while ( bins.mybins.length < new_length[0] )
274         bins.mybins ~= [default_value];
275 
276     if ( new_length.length > 1 )
277         foreach ( ref T bin; bins.mybins )
278             bin.resize( new_length[1..$] );
279 
280     return bins;
281 }
282 
283 Bins!T resize( T : size_t )( Bins!T bins, const size_t[] new_length ) {
284     T default_value = 0;
285 
286     while ( bins.mybins.length < new_length[0] )
287         bins.mybins ~= [default_value];
288     return bins;
289 }
290 
291 unittest {
292     auto bins = new Bins!size_t;
293     bins.min = -1;
294     bins.width = 0.5;
295     bins.mybins = [1,2,3,4];
296 
297     bins.resize( [3] );
298     assert( bins.length == 4 );
299     bins.resize( [6] );
300     assert( bins.length == 6 );
301 
302     auto mbins = new Bins!(Bins!size_t);
303     mbins.min = -1;
304     mbins.width = 0.5;
305     mbins.mybins = [bins, bins.dup];
306     mbins.resize( [6] );
307     assert( mbins.length == 6 );
308     assert( mbins.mybins[5].min == bins.min );
309     assert( mbins.mybins[5].width == bins.width );
310     mbins.resize( [7,8] );
311     assert( mbins.length == 7 );
312 
313     foreach( x, bin; mbins )
314         assert( bin.length == 8 );
315 }*/
316 
317  
318 /**
319   Calculate bin id based on data value and Bins
320   */
321 /*size_t bin_id(T)( const Bins!T bins, double data ) {
322     assert( data >= bins.min );
323     return cast(size_t)( (data-bins.min)/bins.width );
324 }
325 unittest {
326     auto bins = new Bins!size_t;
327     bins.min = -1;
328     bins.width = 0.5;
329     assert( bin_id( bins, -1 ) == 0 );
330     assert( bin_id( bins, -0.5 ) == 1 );
331     assert( bin_id( bins, -0.25 ) == 1 );
332 }*/
333 /**
334   Add data to existing Bins
335   */
336 /*Bins!T addData( T )( Bins!T bins, const double[] data ) {
337     size_t[] ids;
338     auto bins_step = bins;
339     for ( size_t i = 0; i < data.length; i++ ) {
340         ids ~= bin_id( bins_step, data[i] );
341         if (i < data.length -1)
342             bins_step = bins_step.mybins[0];
343     }
344     return addDataToBin( bins, ids.reverse );
345 }
346 
347 Bins!T addData( T : size_t )( Bins!T bins, const double[] data ) {
348     return addDataToBin( bins, [bin_id( bins, data[0] )] );
349 }
350 
351 unittest {
352     auto bins = new Bins!size_t;
353     bins.min = -1;
354     bins.width = 0.5;
355     bins.max_size = 4;
356     bins.mybins = [1,2,3,4];
357 
358     bins = bins.addData( [-0.25] );
359     assert( bins.mybins[1] == 3 );
360     bins = addData( bins, [0.75] );
361     assert( bins.mybins[3] == 5 );
362     assert( bins.max_size == 5 );
363 
364     auto mbins = new Bins!(Bins!size_t);
365     mbins.min = -1;
366     mbins.width = 0.5;
367     mbins.mybins = [bins, bins.dup];
368     mbins.max_size = 5;
369     mbins.addDataToBin( [1,2] );
370     assert( mbins.mybins[1].mybins[2] == 4 );
371     mbins.addDataToBin( [1,3] );
372     assert( mbins.mybins[1].mybins[3] == 6 );
373     assert( mbins.max_size == 6 );
374 }*/