1 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
2 | /** |
---|
3 | * Contains source code from the article "Radix Sort Revisited". |
---|
4 | * \file IceRevisitedRadix.cpp |
---|
5 | * \author Pierre Terdiman |
---|
6 | * \date April, 4, 2000 |
---|
7 | */ |
---|
8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
9 | |
---|
10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
11 | /** |
---|
12 | * Revisited Radix Sort. |
---|
13 | * This is my new radix routine: |
---|
14 | * - it uses indices and doesn't recopy the values anymore, hence wasting less ram |
---|
15 | * - it creates all the histograms in one run instead of four |
---|
16 | * - it sorts words faster than dwords and bytes faster than words |
---|
17 | * - it correctly sorts negative floating-point values by patching the offsets |
---|
18 | * - it automatically takes advantage of temporal coherence |
---|
19 | * - multiple keys support is a side effect of temporal coherence |
---|
20 | * - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway] |
---|
21 | * |
---|
22 | * History: |
---|
23 | * - 08.15.98: very first version |
---|
24 | * - 04.04.00: recoded for the radix article |
---|
25 | * - 12.xx.00: code lifting |
---|
26 | * - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here) |
---|
27 | * - 10.11.01: added local ram support |
---|
28 | * - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting...... |
---|
29 | * - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all. |
---|
30 | * - ranks are not "reset" anymore, but implicit on first calls |
---|
31 | * - 07.05.02: - offsets rewritten with one less indirection. |
---|
32 | * - 11.03.02: - "bool" replaced with RadixHint enum |
---|
33 | * |
---|
34 | * \class RadixSort |
---|
35 | * \author Pierre Terdiman |
---|
36 | * \version 1.4 |
---|
37 | * \date August, 15, 1998 |
---|
38 | */ |
---|
39 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
40 | |
---|
41 | /* |
---|
42 | To do: |
---|
43 | - add an offset parameter between two input values (avoid some data recopy sometimes) |
---|
44 | - unroll ? asm ? |
---|
45 | - 11 bits trick & 3 passes as Michael did |
---|
46 | - prefetch stuff the day I have a P3 |
---|
47 | - make a version with 16-bits indices ? |
---|
48 | */ |
---|
49 | |
---|
50 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
51 | // Precompiled Header |
---|
52 | #include "Stdafx.h" |
---|
53 | |
---|
54 | using namespace IceCore; |
---|
55 | |
---|
56 | #define INVALIDATE_RANKS mCurrentSize|=0x80000000 |
---|
57 | #define VALIDATE_RANKS mCurrentSize&=0x7fffffff |
---|
58 | #define CURRENT_SIZE (mCurrentSize&0x7fffffff) |
---|
59 | #define INVALID_RANKS (mCurrentSize&0x80000000) |
---|
60 | |
---|
61 | #define CHECK_RESIZE(n) \ |
---|
62 | if(n!=mPreviousSize) \ |
---|
63 | { \ |
---|
64 | if(n>mCurrentSize) Resize(n); \ |
---|
65 | else ResetRanks(); \ |
---|
66 | mPreviousSize = n; \ |
---|
67 | } |
---|
68 | |
---|
69 | #define CREATE_HISTOGRAMS(type, buffer) \ |
---|
70 | /* Clear counters/histograms */ \ |
---|
71 | ZeroMemory(mHistogram, 256*4*sizeof(udword)); \ |
---|
72 | \ |
---|
73 | /* Prepare to count */ \ |
---|
74 | ubyte* p = (ubyte*)input; \ |
---|
75 | ubyte* pe = &p[nb*4]; \ |
---|
76 | udword* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \ |
---|
77 | udword* h1= &mHistogram[256]; /* Histogram for second pass */ \ |
---|
78 | udword* h2= &mHistogram[512]; /* Histogram for third pass */ \ |
---|
79 | udword* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \ |
---|
80 | \ |
---|
81 | bool AlreadySorted = true; /* Optimism... */ \ |
---|
82 | \ |
---|
83 | if(INVALID_RANKS) \ |
---|
84 | { \ |
---|
85 | /* Prepare for temporal coherence */ \ |
---|
86 | type* Running = (type*)buffer; \ |
---|
87 | type PrevVal = *Running; \ |
---|
88 | \ |
---|
89 | while(p!=pe) \ |
---|
90 | { \ |
---|
91 | /* Read input buffer in previous sorted order */ \ |
---|
92 | type Val = *Running++; \ |
---|
93 | /* Check whether already sorted or not */ \ |
---|
94 | if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \ |
---|
95 | /* Update for next iteration */ \ |
---|
96 | PrevVal = Val; \ |
---|
97 | \ |
---|
98 | /* Create histograms */ \ |
---|
99 | h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \ |
---|
100 | } \ |
---|
101 | \ |
---|
102 | /* If all input values are already sorted, we just have to return and leave the */ \ |
---|
103 | /* previous list unchanged. That way the routine may take advantage of temporal */ \ |
---|
104 | /* coherence, for example when used to sort transparent faces. */ \ |
---|
105 | if(AlreadySorted) \ |
---|
106 | { \ |
---|
107 | mNbHits++; \ |
---|
108 | for(udword i=0;i<nb;i++) mRanks[i] = i; \ |
---|
109 | return *this; \ |
---|
110 | } \ |
---|
111 | } \ |
---|
112 | else \ |
---|
113 | { \ |
---|
114 | /* Prepare for temporal coherence */ \ |
---|
115 | udword* Indices = mRanks; \ |
---|
116 | type PrevVal = (type)buffer[*Indices]; \ |
---|
117 | \ |
---|
118 | while(p!=pe) \ |
---|
119 | { \ |
---|
120 | /* Read input buffer in previous sorted order */ \ |
---|
121 | type Val = (type)buffer[*Indices++]; \ |
---|
122 | /* Check whether already sorted or not */ \ |
---|
123 | if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \ |
---|
124 | /* Update for next iteration */ \ |
---|
125 | PrevVal = Val; \ |
---|
126 | \ |
---|
127 | /* Create histograms */ \ |
---|
128 | h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \ |
---|
129 | } \ |
---|
130 | \ |
---|
131 | /* If all input values are already sorted, we just have to return and leave the */ \ |
---|
132 | /* previous list unchanged. That way the routine may take advantage of temporal */ \ |
---|
133 | /* coherence, for example when used to sort transparent faces. */ \ |
---|
134 | if(AlreadySorted) { mNbHits++; return *this; } \ |
---|
135 | } \ |
---|
136 | \ |
---|
137 | /* Else there has been an early out and we must finish computing the histograms */ \ |
---|
138 | while(p!=pe) \ |
---|
139 | { \ |
---|
140 | /* Create histograms without the previous overhead */ \ |
---|
141 | h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \ |
---|
142 | } |
---|
143 | |
---|
144 | #define CHECK_PASS_VALIDITY(pass) \ |
---|
145 | /* Shortcut to current counters */ \ |
---|
146 | udword* CurCount = &mHistogram[pass<<8]; \ |
---|
147 | \ |
---|
148 | /* Reset flag. The sorting pass is supposed to be performed. (default) */ \ |
---|
149 | bool PerformPass = true; \ |
---|
150 | \ |
---|
151 | /* Check pass validity */ \ |
---|
152 | \ |
---|
153 | /* If all values have the same byte, sorting is useless. */ \ |
---|
154 | /* It may happen when sorting bytes or words instead of dwords. */ \ |
---|
155 | /* This routine actually sorts words faster than dwords, and bytes */ \ |
---|
156 | /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \ |
---|
157 | /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \ |
---|
158 | \ |
---|
159 | /* Get first byte */ \ |
---|
160 | ubyte UniqueVal = *(((ubyte*)input)+pass); \ |
---|
161 | \ |
---|
162 | /* Check that byte's counter */ \ |
---|
163 | if(CurCount[UniqueVal]==nb) PerformPass=false; |
---|
164 | |
---|
165 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
166 | /** |
---|
167 | * Constructor. |
---|
168 | */ |
---|
169 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
170 | RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0) |
---|
171 | { |
---|
172 | #ifndef RADIX_LOCAL_RAM |
---|
173 | // Allocate input-independent ram |
---|
174 | mHistogram = new udword[256*4]; |
---|
175 | mOffset = new udword[256]; |
---|
176 | #endif |
---|
177 | // Initialize indices |
---|
178 | INVALIDATE_RANKS; |
---|
179 | } |
---|
180 | |
---|
181 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
182 | /** |
---|
183 | * Destructor. |
---|
184 | */ |
---|
185 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
186 | RadixSort::~RadixSort() |
---|
187 | { |
---|
188 | // Release everything |
---|
189 | #ifndef RADIX_LOCAL_RAM |
---|
190 | DELETEARRAY(mOffset); |
---|
191 | DELETEARRAY(mHistogram); |
---|
192 | #endif |
---|
193 | DELETEARRAY(mRanks2); |
---|
194 | DELETEARRAY(mRanks); |
---|
195 | } |
---|
196 | |
---|
197 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
198 | /** |
---|
199 | * Resizes the inner lists. |
---|
200 | * \param nb [in] new size (number of dwords) |
---|
201 | * \return true if success |
---|
202 | */ |
---|
203 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
204 | bool RadixSort::Resize(udword nb) |
---|
205 | { |
---|
206 | // Free previously used ram |
---|
207 | DELETEARRAY(mRanks2); |
---|
208 | DELETEARRAY(mRanks); |
---|
209 | |
---|
210 | // Get some fresh one |
---|
211 | mRanks = new udword[nb]; CHECKALLOC(mRanks); |
---|
212 | mRanks2 = new udword[nb]; CHECKALLOC(mRanks2); |
---|
213 | |
---|
214 | return true; |
---|
215 | } |
---|
216 | |
---|
217 | inline_ void RadixSort::CheckResize(udword nb) |
---|
218 | { |
---|
219 | udword CurSize = CURRENT_SIZE; |
---|
220 | if(nb!=CurSize) |
---|
221 | { |
---|
222 | if(nb>CurSize) Resize(nb); |
---|
223 | mCurrentSize = nb; |
---|
224 | INVALIDATE_RANKS; |
---|
225 | } |
---|
226 | } |
---|
227 | |
---|
228 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
229 | /** |
---|
230 | * Main sort routine. |
---|
231 | * This one is for integer values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data. |
---|
232 | * \param input [in] a list of integer values to sort |
---|
233 | * \param nb [in] number of values to sort, must be < 2^31 |
---|
234 | * \param hint [in] RADIX_SIGNED to handle negative values, RADIX_UNSIGNED if you know your input buffer only contains positive values |
---|
235 | * \return Self-Reference |
---|
236 | */ |
---|
237 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
238 | RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint) |
---|
239 | { |
---|
240 | // Checkings |
---|
241 | if(!input || !nb || nb&0x80000000) return *this; |
---|
242 | |
---|
243 | // Stats |
---|
244 | mTotalCalls++; |
---|
245 | |
---|
246 | // Resize lists if needed |
---|
247 | CheckResize(nb); |
---|
248 | |
---|
249 | #ifdef RADIX_LOCAL_RAM |
---|
250 | // Allocate histograms & offsets on the stack |
---|
251 | udword mHistogram[256*4]; |
---|
252 | // udword mOffset[256]; |
---|
253 | udword* mLink[256]; |
---|
254 | #endif |
---|
255 | |
---|
256 | // Create histograms (counters). Counters for all passes are created in one run. |
---|
257 | // Pros: read input buffer once instead of four times |
---|
258 | // Cons: mHistogram is 4Kb instead of 1Kb |
---|
259 | // We must take care of signed/unsigned values for temporal coherence.... I just |
---|
260 | // have 2 code paths even if just a single opcode changes. Self-modifying code, someone? |
---|
261 | if(hint==RADIX_UNSIGNED) { CREATE_HISTOGRAMS(udword, input); } |
---|
262 | else { CREATE_HISTOGRAMS(sdword, input); } |
---|
263 | |
---|
264 | // Compute #negative values involved if needed |
---|
265 | udword NbNegativeValues = 0; |
---|
266 | if(hint==RADIX_SIGNED) |
---|
267 | { |
---|
268 | // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128 |
---|
269 | // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte, |
---|
270 | // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers. |
---|
271 | udword* h3= &mHistogram[768]; |
---|
272 | for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part |
---|
273 | } |
---|
274 | |
---|
275 | // Radix sort, j is the pass number (0=LSB, 3=MSB) |
---|
276 | for(udword j=0;j<4;j++) |
---|
277 | { |
---|
278 | CHECK_PASS_VALIDITY(j); |
---|
279 | |
---|
280 | // Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is |
---|
281 | // not a problem, numbers are correctly sorted anyway. |
---|
282 | if(PerformPass) |
---|
283 | { |
---|
284 | // Should we care about negative values? |
---|
285 | if(j!=3 || hint==RADIX_UNSIGNED) |
---|
286 | { |
---|
287 | // Here we deal with positive values only |
---|
288 | |
---|
289 | // Create offsets |
---|
290 | // mOffset[0] = 0; |
---|
291 | // for(udword i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; |
---|
292 | mLink[0] = mRanks2; |
---|
293 | for(udword i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; |
---|
294 | } |
---|
295 | else |
---|
296 | { |
---|
297 | // This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place. |
---|
298 | |
---|
299 | // Create biased offsets, in order for negative numbers to be sorted as well |
---|
300 | // mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones |
---|
301 | mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones |
---|
302 | // for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers |
---|
303 | for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers |
---|
304 | |
---|
305 | // Fixing the wrong place for negative values |
---|
306 | // mOffset[128] = 0; |
---|
307 | mLink[128] = mRanks2; |
---|
308 | // for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; |
---|
309 | for(udword i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; |
---|
310 | } |
---|
311 | |
---|
312 | // Perform Radix Sort |
---|
313 | ubyte* InputBytes = (ubyte*)input; |
---|
314 | InputBytes += j; |
---|
315 | if(INVALID_RANKS) |
---|
316 | { |
---|
317 | // for(udword i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i; |
---|
318 | for(udword i=0;i<nb;i++) *mLink[InputBytes[i<<2]]++ = i; |
---|
319 | VALIDATE_RANKS; |
---|
320 | } |
---|
321 | else |
---|
322 | { |
---|
323 | udword* Indices = mRanks; |
---|
324 | udword* IndicesEnd = &mRanks[nb]; |
---|
325 | while(Indices!=IndicesEnd) |
---|
326 | { |
---|
327 | udword id = *Indices++; |
---|
328 | // mRanks2[mOffset[InputBytes[id<<2]]++] = id; |
---|
329 | *mLink[InputBytes[id<<2]]++ = id; |
---|
330 | } |
---|
331 | } |
---|
332 | |
---|
333 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. |
---|
334 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; |
---|
335 | } |
---|
336 | } |
---|
337 | return *this; |
---|
338 | } |
---|
339 | |
---|
340 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
341 | /** |
---|
342 | * Main sort routine. |
---|
343 | * This one is for floating-point values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data. |
---|
344 | * \param input [in] a list of floating-point values to sort |
---|
345 | * \param nb [in] number of values to sort, must be < 2^31 |
---|
346 | * \return Self-Reference |
---|
347 | * \warning only sorts IEEE floating-point values |
---|
348 | */ |
---|
349 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
350 | RadixSort& RadixSort::Sort(const float* input2, udword nb) |
---|
351 | { |
---|
352 | // Checkings |
---|
353 | if(!input2 || !nb || nb&0x80000000) return *this; |
---|
354 | |
---|
355 | // Stats |
---|
356 | mTotalCalls++; |
---|
357 | |
---|
358 | udword* input = (udword*)input2; |
---|
359 | |
---|
360 | // Resize lists if needed |
---|
361 | CheckResize(nb); |
---|
362 | |
---|
363 | #ifdef RADIX_LOCAL_RAM |
---|
364 | // Allocate histograms & offsets on the stack |
---|
365 | udword mHistogram[256*4]; |
---|
366 | // udword mOffset[256]; |
---|
367 | udword* mLink[256]; |
---|
368 | #endif |
---|
369 | |
---|
370 | // Create histograms (counters). Counters for all passes are created in one run. |
---|
371 | // Pros: read input buffer once instead of four times |
---|
372 | // Cons: mHistogram is 4Kb instead of 1Kb |
---|
373 | // Floating-point values are always supposed to be signed values, so there's only one code path there. |
---|
374 | // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code |
---|
375 | // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first |
---|
376 | // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just |
---|
377 | // wouldn't work with mixed positive/negative values.... |
---|
378 | { CREATE_HISTOGRAMS(float, input2); } |
---|
379 | |
---|
380 | // Compute #negative values involved if needed |
---|
381 | udword NbNegativeValues = 0; |
---|
382 | // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128 |
---|
383 | // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte, |
---|
384 | // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers. |
---|
385 | udword* h3= &mHistogram[768]; |
---|
386 | for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part |
---|
387 | |
---|
388 | // Radix sort, j is the pass number (0=LSB, 3=MSB) |
---|
389 | for(udword j=0;j<4;j++) |
---|
390 | { |
---|
391 | // Should we care about negative values? |
---|
392 | if(j!=3) |
---|
393 | { |
---|
394 | // Here we deal with positive values only |
---|
395 | CHECK_PASS_VALIDITY(j); |
---|
396 | |
---|
397 | if(PerformPass) |
---|
398 | { |
---|
399 | // Create offsets |
---|
400 | // mOffset[0] = 0; |
---|
401 | mLink[0] = mRanks2; |
---|
402 | // for(udword i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; |
---|
403 | for(udword i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; |
---|
404 | |
---|
405 | // Perform Radix Sort |
---|
406 | ubyte* InputBytes = (ubyte*)input; |
---|
407 | InputBytes += j; |
---|
408 | if(INVALID_RANKS) |
---|
409 | { |
---|
410 | // for(i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i; |
---|
411 | for(udword i=0;i<nb;i++) *mLink[InputBytes[i<<2]]++ = i; |
---|
412 | VALIDATE_RANKS; |
---|
413 | } |
---|
414 | else |
---|
415 | { |
---|
416 | udword* Indices = mRanks; |
---|
417 | udword* IndicesEnd = &mRanks[nb]; |
---|
418 | while(Indices!=IndicesEnd) |
---|
419 | { |
---|
420 | udword id = *Indices++; |
---|
421 | // mRanks2[mOffset[InputBytes[id<<2]]++] = id; |
---|
422 | *mLink[InputBytes[id<<2]]++ = id; |
---|
423 | } |
---|
424 | } |
---|
425 | |
---|
426 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. |
---|
427 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; |
---|
428 | } |
---|
429 | } |
---|
430 | else |
---|
431 | { |
---|
432 | // This is a special case to correctly handle negative values |
---|
433 | CHECK_PASS_VALIDITY(j); |
---|
434 | |
---|
435 | if(PerformPass) |
---|
436 | { |
---|
437 | // Create biased offsets, in order for negative numbers to be sorted as well |
---|
438 | // mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones |
---|
439 | mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones |
---|
440 | // for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers |
---|
441 | for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers |
---|
442 | |
---|
443 | // We must reverse the sorting order for negative numbers! |
---|
444 | // mOffset[255] = 0; |
---|
445 | mLink[255] = mRanks2; |
---|
446 | // for(i=0;i<127;i++) mOffset[254-i] = mOffset[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values |
---|
447 | for(udword i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values |
---|
448 | // for(i=128;i<256;i++) mOffset[i] += CurCount[i]; // Fixing the wrong place for negative values |
---|
449 | for(udword i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values |
---|
450 | |
---|
451 | // Perform Radix Sort |
---|
452 | if(INVALID_RANKS) |
---|
453 | { |
---|
454 | for(udword i=0;i<nb;i++) |
---|
455 | { |
---|
456 | udword Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (udword). |
---|
457 | // ### cmp to be killed. Not good. Later. |
---|
458 | // if(Radix<128) mRanks2[mOffset[Radix]++] = i; // Number is positive, same as above |
---|
459 | // else mRanks2[--mOffset[Radix]] = i; // Number is negative, flip the sorting order |
---|
460 | if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above |
---|
461 | else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order |
---|
462 | } |
---|
463 | VALIDATE_RANKS; |
---|
464 | } |
---|
465 | else |
---|
466 | { |
---|
467 | for(udword i=0;i<nb;i++) |
---|
468 | { |
---|
469 | udword Radix = input[mRanks[i]]>>24; // Radix byte, same as above. AND is useless here (udword). |
---|
470 | // ### cmp to be killed. Not good. Later. |
---|
471 | // if(Radix<128) mRanks2[mOffset[Radix]++] = mRanks[i]; // Number is positive, same as above |
---|
472 | // else mRanks2[--mOffset[Radix]] = mRanks[i]; // Number is negative, flip the sorting order |
---|
473 | if(Radix<128) *mLink[Radix]++ = mRanks[i]; // Number is positive, same as above |
---|
474 | else *(--mLink[Radix]) = mRanks[i]; // Number is negative, flip the sorting order |
---|
475 | } |
---|
476 | } |
---|
477 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. |
---|
478 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; |
---|
479 | } |
---|
480 | else |
---|
481 | { |
---|
482 | // The pass is useless, yet we still have to reverse the order of current list if all values are negative. |
---|
483 | if(UniqueVal>=128) |
---|
484 | { |
---|
485 | if(INVALID_RANKS) |
---|
486 | { |
---|
487 | // ###Possible? |
---|
488 | for(udword i=0;i<nb;i++) mRanks2[i] = nb-i-1; |
---|
489 | VALIDATE_RANKS; |
---|
490 | } |
---|
491 | else |
---|
492 | { |
---|
493 | for(udword i=0;i<nb;i++) mRanks2[i] = mRanks[nb-i-1]; |
---|
494 | } |
---|
495 | |
---|
496 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. |
---|
497 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; |
---|
498 | } |
---|
499 | } |
---|
500 | } |
---|
501 | } |
---|
502 | return *this; |
---|
503 | } |
---|
504 | |
---|
505 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
506 | /** |
---|
507 | * Gets the ram used. |
---|
508 | * \return memory used in bytes |
---|
509 | */ |
---|
510 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
---|
511 | udword RadixSort::GetUsedRam() const |
---|
512 | { |
---|
513 | udword UsedRam = sizeof(RadixSort); |
---|
514 | #ifndef RADIX_LOCAL_RAM |
---|
515 | UsedRam += 256*4*sizeof(udword); // Histograms |
---|
516 | UsedRam += 256*sizeof(udword); // Offsets |
---|
517 | #endif |
---|
518 | UsedRam += 2*CURRENT_SIZE*sizeof(udword); // 2 lists of indices |
---|
519 | return UsedRam; |
---|
520 | } |
---|