1 /*
2 	 -------------------------------------------------------------------
3 
4 	 Copyright (C) 2014, Edwin van Leeuwen
5 
6 	 This file is part of plotd plotting library.
7 
8 	 Plotd is free software; you can redistribute it and/or modify
9 	 it under the terms of the GNU General Public License as published by
10 	 the Free Software Foundation; either version 3 of the License, or
11 	 (at your option) any later version.
12 
13 	 Plotd is distributed in the hope that it will be useful,
14 	 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 	 GNU General Public License for more details.
17 
18 	 You should have received a copy of the GNU General Public License
19 	 along with Plotd. If not, see <http://www.gnu.org/licenses/>.
20 
21 	 -------------------------------------------------------------------
22 	 */
23 
24 module plotd.binning;
25 
26 import std.array;
27 import std.range;
28 
29 version( unittest ) {
30 	import std.algorithm;
31 }
32 
33 /**
34   The struct Bins is a container holding binned data
35  */
36 struct Bins(T) {
37     double min;
38     double width;
39 
40 		 @property double max() {
41 			return min + width*(mybins.length);
42 		}
43 
44     /// How many bins does the container have
45     @property size_t length() {
46         return mybins.length;
47     }
48 	
49 		/// Set length/number of bins
50     @property void length( size_t noBins ) {
51  			mybins.length = noBins;
52     }
53 
54     /// save the container position
55     @property Bins!T save() { return this; }
56 
57     /// Access bin by index
58     ref T opIndex(size_t index)
59     {
60         return mybins[index];
61     }
62 
63     int opApply(int delegate( ref T ) dg)
64     {
65         int result;
66 
67         double x = min;
68         foreach ( el; mybins )
69         {
70             result = dg( el );
71             if (result)
72                 break;
73 
74             x += width;
75         }
76         return result;
77     }
78     
79     int opApply(int delegate( double, ref T ) dg)
80     {
81         int result;
82 
83         double x = min;
84         foreach ( el; mybins )
85         {
86             result = dg( x, el );
87             if (result)
88                 break;
89 
90             x += width;
91         }
92         return result;
93     }
94 
95     private:
96         T[] mybins;
97 }
98 
99 /// For loop over Bins
100 unittest {
101     Bins!size_t bins;
102     bins.min = -1;
103     bins.width = 0.5;
104     bins.mybins = [1,2,3,4];
105 
106     size_t correct_el = 1;
107     foreach ( el; bins ) {
108         assert( correct_el == el );
109         correct_el++;
110     }
111 
112     double correct_x = bins.min;
113     correct_el = 1;
114     foreach ( x, el; bins ) {
115         assert( correct_x == x );
116         assert( correct_el == el );
117         correct_x += bins.width;
118         correct_el++;
119     }
120 }
121 
122 /// Number of Bins
123 unittest {
124     Bins!size_t bins;
125     bins.min = -1;
126     bins.width = 0.5;
127 		bins.length = 3;
128 		assert( bins.length == 3 );
129 		assert( equal( bins.mybins, [0,0,0] ) );
130 }
131  
132 /**
133   Calculate bin id based on data value and Bins
134 
135   TODO implement this for multidimensional bins
136   */
137 size_t binId(T)( in Bins!T bins, double data ) {
138     assert( data >= bins.min );
139     return cast(size_t)( (data-bins.min)/bins.width );
140 }
141 
142 unittest {
143     Bins!size_t bins;
144     bins.min = -1;
145     bins.width = 0.5;
146     assert( binId!size_t( bins, -1.0 ) == 0 );
147     assert( binId( bins, -0.5 ) == 1 );
148     assert( bins.binId( -0.25 ) == 1 );
149 }
150 
151 /**
152   Add data to the given bin id
153 
154   bin_ids is an array in case of multidimensional bins
155 
156   Ignore values that fall outside of the existing bin range
157  */
158 Bins!T addDataToBin( T )( Bins!T bins, const size_t[] binIds ) 
159 {
160     static if (__traits( compiles, 
161                 bins[binIds[0]].addDataToBin( binIds[1..$] ) ) ) {
162         if (binIds[0] < bins.length) {
163             bins[binIds[0]].addDataToBin( binIds[1..$] );
164         }
165     }
166     else {
167         if (binIds[0] < bins.length) {
168             bins[binIds[0]]++;
169         }
170     }
171     return bins;
172 }
173 
174 unittest {
175     Bins!size_t bins;
176     bins.min = -1;
177     bins.width = 0.5;
178     bins.mybins = [1,2,3,4];
179 
180     bins.addDataToBin( [1] );
181     assert( bins.mybins[1] == 3 );
182     bins.addDataToBin( [3] );
183     assert( bins.mybins[3] == 5 );
184 
185     Bins!(Bins!size_t) mbins;
186     mbins.min = -1;
187     mbins.width = 0.5;
188     mbins.mybins = [bins, bins];
189     mbins.addDataToBin( [1,2] );
190     assert( mbins.mybins[1].mybins[2] == 4 );
191     mbins.addDataToBin( [1,3] );
192     assert( mbins.mybins[1].mybins[3] == 6 );
193 }
194 /**
195   Resize the bins
196 
197   Again an array in case of resizing multidmensional arrays
198  */
199 /*Bins!T resize( T )( Bins!T bins, const size_t[] new_length ) {
200     T default_value;
201     assert( bins.mybins.length > 0, 
202             "Multidimensional need to have at least one bin to correctly resize" );
203     default_value = new T;
204     default_value.min = bins.mybins[0].min;
205     default_value.width = bins.mybins[0].width;
206     while ( bins.mybins.length < new_length[0] )
207         bins.mybins ~= [default_value];
208 
209     if ( new_length.length > 1 )
210         foreach ( ref T bin; bins.mybins )
211             bin.resize( new_length[1..$] );
212 
213     return bins;
214 }
215 
216 Bins!T resize( T : size_t )( Bins!T bins, const size_t[] new_length ) {
217     T default_value = 0;
218 
219     while ( bins.mybins.length < new_length[0] )
220         bins.mybins ~= [default_value];
221     return bins;
222 }
223 
224 unittest {
225     auto bins = new Bins!size_t;
226     bins.min = -1;
227     bins.width = 0.5;
228     bins.mybins = [1,2,3,4];
229 
230     bins.resize( [3] );
231     assert( bins.length == 4 );
232     bins.resize( [6] );
233     assert( bins.length == 6 );
234 
235     auto mbins = new Bins!(Bins!size_t);
236     mbins.min = -1;
237     mbins.width = 0.5;
238     mbins.mybins = [bins, bins.dup];
239     mbins.resize( [6] );
240     assert( mbins.length == 6 );
241     assert( mbins.mybins[5].min == bins.min );
242     assert( mbins.mybins[5].width == bins.width );
243     mbins.resize( [7,8] );
244     assert( mbins.length == 7 );
245 
246     foreach( x, bin; mbins )
247         assert( bin.length == 8 );
248 }*/
249 
250  
251 /**
252   Calculate bin id based on data value and Bins
253   */
254 /*size_t bin_id(T)( const Bins!T bins, double data ) {
255     assert( data >= bins.min );
256     return cast(size_t)( (data-bins.min)/bins.width );
257 }
258 unittest {
259     auto bins = new Bins!size_t;
260     bins.min = -1;
261     bins.width = 0.5;
262     assert( bin_id( bins, -1 ) == 0 );
263     assert( bin_id( bins, -0.5 ) == 1 );
264     assert( bin_id( bins, -0.25 ) == 1 );
265 }*/
266 /**
267   Add data to existing Bins
268   */
269 /*Bins!T addData( T )( Bins!T bins, const double[] data ) {
270     size_t[] ids;
271     auto bins_step = bins;
272     for ( size_t i = 0; i < data.length; i++ ) {
273         ids ~= bin_id( bins_step, data[i] );
274         if (i < data.length -1)
275             bins_step = bins_step.mybins[0];
276     }
277     return addDataToBin( bins, ids.reverse );
278 }
279 
280 Bins!T addData( T : size_t )( Bins!T bins, const double[] data ) {
281     return addDataToBin( bins, [bin_id( bins, data[0] )] );
282 }
283 
284 unittest {
285     auto bins = new Bins!size_t;
286     bins.min = -1;
287     bins.width = 0.5;
288     bins.max_size = 4;
289     bins.mybins = [1,2,3,4];
290 
291     bins = bins.addData( [-0.25] );
292     assert( bins.mybins[1] == 3 );
293     bins = addData( bins, [0.75] );
294     assert( bins.mybins[3] == 5 );
295     assert( bins.max_size == 5 );
296 
297     auto mbins = new Bins!(Bins!size_t);
298     mbins.min = -1;
299     mbins.width = 0.5;
300     mbins.mybins = [bins, bins.dup];
301     mbins.max_size = 5;
302     mbins.addDataToBin( [1,2] );
303     assert( mbins.mybins[1].mybins[2] == 4 );
304     mbins.addDataToBin( [1,3] );
305     assert( mbins.mybins[1].mybins[3] == 6 );
306     assert( mbins.max_size == 6 );
307 }*/