1 | /******************************************************************** |
---|
2 | * * |
---|
3 | * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * |
---|
4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
---|
5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
---|
6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
---|
7 | * * |
---|
8 | * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * |
---|
9 | * by the Xiph.Org Foundation http://www.xiph.org/ * |
---|
10 | * * |
---|
11 | ******************************************************************** |
---|
12 | |
---|
13 | function: build a VQ codebook and the encoding decision 'tree' |
---|
14 | last mod: $Id: vqsplit.c 13293 2007-07-24 00:09:47Z xiphmont $ |
---|
15 | |
---|
16 | ********************************************************************/ |
---|
17 | |
---|
18 | /* This code is *not* part of libvorbis. It is used to generate |
---|
19 | trained codebooks offline and then spit the results into a |
---|
20 | pregenerated codebook that is compiled into libvorbis. It is an |
---|
21 | expensive (but good) algorithm. Run it on big iron. */ |
---|
22 | |
---|
23 | /* There are so many optimizations to explore in *both* stages that |
---|
24 | considering the undertaking is almost withering. For now, we brute |
---|
25 | force it all */ |
---|
26 | |
---|
27 | #include <stdlib.h> |
---|
28 | #include <stdio.h> |
---|
29 | #include <math.h> |
---|
30 | #include <string.h> |
---|
31 | #include <sys/time.h> |
---|
32 | |
---|
33 | #include "vqgen.h" |
---|
34 | #include "vqsplit.h" |
---|
35 | #include "bookutil.h" |
---|
36 | |
---|
37 | /* Codebook generation happens in two steps: |
---|
38 | |
---|
39 | 1) Train the codebook with data collected from the encoder: We use |
---|
40 | one of a few error metrics (which represent the distance between a |
---|
41 | given data point and a candidate point in the training set) to |
---|
42 | divide the training set up into cells representing roughly equal |
---|
43 | probability of occurring. |
---|
44 | |
---|
45 | 2) Generate the codebook and auxiliary data from the trained data set |
---|
46 | */ |
---|
47 | |
---|
48 | /* Building a codebook from trained set ********************************** |
---|
49 | |
---|
50 | The codebook in raw form is technically finished once it's trained. |
---|
51 | However, we want to finalize the representative codebook values for |
---|
52 | each entry and generate auxiliary information to optimize encoding. |
---|
53 | We generate the auxiliary coding tree using collected data, |
---|
54 | probably the same data as in the original training */ |
---|
55 | |
---|
56 | /* At each recursion, the data set is split in half. Cells with data |
---|
57 | points on side A go into set A, same with set B. The sets may |
---|
58 | overlap. If the cell overlaps the deviding line only very slightly |
---|
59 | (provided parameter), we may choose to ignore the overlap in order |
---|
60 | to pare the tree down */ |
---|
61 | |
---|
62 | long *isortvals; |
---|
63 | int iascsort(const void *a,const void *b){ |
---|
64 | long av=isortvals[*((long *)a)]; |
---|
65 | long bv=isortvals[*((long *)b)]; |
---|
66 | return(av-bv); |
---|
67 | } |
---|
68 | |
---|
69 | static float _Ndist(int el,float *a, float *b){ |
---|
70 | int i; |
---|
71 | float acc=0.f; |
---|
72 | for(i=0;i<el;i++){ |
---|
73 | float val=(a[i]-b[i]); |
---|
74 | acc+=val*val; |
---|
75 | } |
---|
76 | return sqrt(acc); |
---|
77 | } |
---|
78 | |
---|
79 | #define _Npoint(i) (pointlist+dim*(i)) |
---|
80 | #define _Nnow(i) (entrylist+dim*(i)) |
---|
81 | |
---|
82 | |
---|
83 | /* goes through the split, but just counts it and returns a metric*/ |
---|
84 | int vqsp_count(float *entrylist,float *pointlist,int dim, |
---|
85 | long *membership,long *reventry, |
---|
86 | long *entryindex,long entries, |
---|
87 | long *pointindex,long points,int splitp, |
---|
88 | long *entryA,long *entryB, |
---|
89 | long besti,long bestj, |
---|
90 | long *entriesA,long *entriesB,long *entriesC){ |
---|
91 | long i,j; |
---|
92 | long A=0,B=0,C=0; |
---|
93 | long pointsA=0; |
---|
94 | long pointsB=0; |
---|
95 | long *temppointsA=NULL; |
---|
96 | long *temppointsB=NULL; |
---|
97 | |
---|
98 | if(splitp){ |
---|
99 | temppointsA=_ogg_malloc(points*sizeof(long)); |
---|
100 | temppointsB=_ogg_malloc(points*sizeof(long)); |
---|
101 | } |
---|
102 | |
---|
103 | memset(entryA,0,sizeof(long)*entries); |
---|
104 | memset(entryB,0,sizeof(long)*entries); |
---|
105 | |
---|
106 | /* Do the points belonging to this cell occur on sideA, sideB or |
---|
107 | both? */ |
---|
108 | |
---|
109 | for(i=0;i<points;i++){ |
---|
110 | float *ppt=_Npoint(pointindex[i]); |
---|
111 | long firstentry=membership[pointindex[i]]; |
---|
112 | |
---|
113 | if(firstentry==besti){ |
---|
114 | entryA[reventry[firstentry]]=1; |
---|
115 | if(splitp)temppointsA[pointsA++]=pointindex[i]; |
---|
116 | continue; |
---|
117 | } |
---|
118 | if(firstentry==bestj){ |
---|
119 | entryB[reventry[firstentry]]=1; |
---|
120 | if(splitp)temppointsB[pointsB++]=pointindex[i]; |
---|
121 | continue; |
---|
122 | } |
---|
123 | { |
---|
124 | float distA=_Ndist(dim,ppt,_Nnow(besti)); |
---|
125 | float distB=_Ndist(dim,ppt,_Nnow(bestj)); |
---|
126 | if(distA<distB){ |
---|
127 | entryA[reventry[firstentry]]=1; |
---|
128 | if(splitp)temppointsA[pointsA++]=pointindex[i]; |
---|
129 | }else{ |
---|
130 | entryB[reventry[firstentry]]=1; |
---|
131 | if(splitp)temppointsB[pointsB++]=pointindex[i]; |
---|
132 | } |
---|
133 | } |
---|
134 | } |
---|
135 | |
---|
136 | /* The entry splitting isn't total, so that storage has to be |
---|
137 | allocated for recursion. Reuse the entryA/entryB vectors */ |
---|
138 | /* keep the entries in ascending order (relative to the original |
---|
139 | list); we rely on that stability when ordering p/q choice */ |
---|
140 | for(j=0;j<entries;j++){ |
---|
141 | if(entryA[j] && entryB[j])C++; |
---|
142 | if(entryA[j])entryA[A++]=entryindex[j]; |
---|
143 | if(entryB[j])entryB[B++]=entryindex[j]; |
---|
144 | } |
---|
145 | *entriesA=A; |
---|
146 | *entriesB=B; |
---|
147 | *entriesC=C; |
---|
148 | if(splitp){ |
---|
149 | memcpy(pointindex,temppointsA,sizeof(long)*pointsA); |
---|
150 | memcpy(pointindex+pointsA,temppointsB,sizeof(long)*pointsB); |
---|
151 | free(temppointsA); |
---|
152 | free(temppointsB); |
---|
153 | } |
---|
154 | return(pointsA); |
---|
155 | } |
---|
156 | |
---|
157 | int lp_split(float *pointlist,long totalpoints, |
---|
158 | codebook *b, |
---|
159 | long *entryindex,long entries, |
---|
160 | long *pointindex,long points, |
---|
161 | long *membership,long *reventry, |
---|
162 | long depth, long *pointsofar){ |
---|
163 | |
---|
164 | encode_aux_nearestmatch *t=b->c->nearest_tree; |
---|
165 | |
---|
166 | /* The encoder, regardless of book, will be using a straight |
---|
167 | euclidian distance-to-point metric to determine closest point. |
---|
168 | Thus we split the cells using the same (we've already trained the |
---|
169 | codebook set spacing and distribution using special metrics and |
---|
170 | even a midpoint division won't disturb the basic properties) */ |
---|
171 | |
---|
172 | int dim=b->dim; |
---|
173 | float *entrylist=b->valuelist; |
---|
174 | long ret; |
---|
175 | long *entryA=_ogg_calloc(entries,sizeof(long)); |
---|
176 | long *entryB=_ogg_calloc(entries,sizeof(long)); |
---|
177 | long entriesA=0; |
---|
178 | long entriesB=0; |
---|
179 | long entriesC=0; |
---|
180 | long pointsA=0; |
---|
181 | long i,j,k; |
---|
182 | |
---|
183 | long besti=-1; |
---|
184 | long bestj=-1; |
---|
185 | |
---|
186 | char spinbuf[80]; |
---|
187 | sprintf(spinbuf,"splitting [%ld left]... ",totalpoints-*pointsofar); |
---|
188 | |
---|
189 | /* one reverse index needed */ |
---|
190 | for(i=0;i<b->entries;i++)reventry[i]=-1; |
---|
191 | for(i=0;i<entries;i++)reventry[entryindex[i]]=i; |
---|
192 | |
---|
193 | /* We need to find the dividing hyperplane. find the median of each |
---|
194 | axis as the centerpoint and the normal facing farthest point */ |
---|
195 | |
---|
196 | /* more than one way to do this part. For small sets, we can brute |
---|
197 | force it. */ |
---|
198 | |
---|
199 | if(entries<8 || (float)points*entries*entries<16.f*1024*1024){ |
---|
200 | /* try every pair possibility */ |
---|
201 | float best=0; |
---|
202 | float this; |
---|
203 | for(i=0;i<entries-1;i++){ |
---|
204 | for(j=i+1;j<entries;j++){ |
---|
205 | spinnit(spinbuf,entries-i); |
---|
206 | vqsp_count(b->valuelist,pointlist,dim, |
---|
207 | membership,reventry, |
---|
208 | entryindex,entries, |
---|
209 | pointindex,points,0, |
---|
210 | entryA,entryB, |
---|
211 | entryindex[i],entryindex[j], |
---|
212 | &entriesA,&entriesB,&entriesC); |
---|
213 | this=(entriesA-entriesC)*(entriesB-entriesC); |
---|
214 | |
---|
215 | /* when choosing best, we also want some form of stability to |
---|
216 | make sure more branches are pared later; secondary |
---|
217 | weighting isn;t needed as the entry lists are in ascending |
---|
218 | order, and we always try p/q in the same sequence */ |
---|
219 | |
---|
220 | if( (besti==-1) || |
---|
221 | (this>best) ){ |
---|
222 | |
---|
223 | best=this; |
---|
224 | besti=entryindex[i]; |
---|
225 | bestj=entryindex[j]; |
---|
226 | |
---|
227 | } |
---|
228 | } |
---|
229 | } |
---|
230 | }else{ |
---|
231 | float *p=alloca(dim*sizeof(float)); |
---|
232 | float *q=alloca(dim*sizeof(float)); |
---|
233 | float best=0.f; |
---|
234 | |
---|
235 | /* try COG/normal and furthest pairs */ |
---|
236 | /* meanpoint */ |
---|
237 | /* eventually, we want to select the closest entry and figure n/c |
---|
238 | from p/q (because storing n/c is too large */ |
---|
239 | for(k=0;k<dim;k++){ |
---|
240 | spinnit(spinbuf,entries); |
---|
241 | |
---|
242 | p[k]=0.f; |
---|
243 | for(j=0;j<entries;j++) |
---|
244 | p[k]+=b->valuelist[entryindex[j]*dim+k]; |
---|
245 | p[k]/=entries; |
---|
246 | |
---|
247 | } |
---|
248 | |
---|
249 | /* we go through the entries one by one, looking for the entry on |
---|
250 | the other side closest to the point of reflection through the |
---|
251 | center */ |
---|
252 | |
---|
253 | for(i=0;i<entries;i++){ |
---|
254 | float *ppi=_Nnow(entryindex[i]); |
---|
255 | float ref_best=0.f; |
---|
256 | float ref_j=-1; |
---|
257 | float this; |
---|
258 | spinnit(spinbuf,entries-i); |
---|
259 | |
---|
260 | for(k=0;k<dim;k++) |
---|
261 | q[k]=2*p[k]-ppi[k]; |
---|
262 | |
---|
263 | for(j=0;j<entries;j++){ |
---|
264 | if(j!=i){ |
---|
265 | float this=_Ndist(dim,q,_Nnow(entryindex[j])); |
---|
266 | if(ref_j==-1 || this<=ref_best){ /* <=, not <; very important */ |
---|
267 | ref_best=this; |
---|
268 | ref_j=entryindex[j]; |
---|
269 | } |
---|
270 | } |
---|
271 | } |
---|
272 | |
---|
273 | vqsp_count(b->valuelist,pointlist,dim, |
---|
274 | membership,reventry, |
---|
275 | entryindex,entries, |
---|
276 | pointindex,points,0, |
---|
277 | entryA,entryB, |
---|
278 | entryindex[i],ref_j, |
---|
279 | &entriesA,&entriesB,&entriesC); |
---|
280 | this=(entriesA-entriesC)*(entriesB-entriesC); |
---|
281 | |
---|
282 | /* when choosing best, we also want some form of stability to |
---|
283 | make sure more branches are pared later; secondary |
---|
284 | weighting isn;t needed as the entry lists are in ascending |
---|
285 | order, and we always try p/q in the same sequence */ |
---|
286 | |
---|
287 | if( (besti==-1) || |
---|
288 | (this>best) ){ |
---|
289 | |
---|
290 | best=this; |
---|
291 | besti=entryindex[i]; |
---|
292 | bestj=ref_j; |
---|
293 | |
---|
294 | } |
---|
295 | } |
---|
296 | if(besti>bestj){ |
---|
297 | long temp=besti; |
---|
298 | besti=bestj; |
---|
299 | bestj=temp; |
---|
300 | } |
---|
301 | |
---|
302 | } |
---|
303 | |
---|
304 | /* find cells enclosing points */ |
---|
305 | /* count A/B points */ |
---|
306 | |
---|
307 | pointsA=vqsp_count(b->valuelist,pointlist,dim, |
---|
308 | membership,reventry, |
---|
309 | entryindex,entries, |
---|
310 | pointindex,points,1, |
---|
311 | entryA,entryB, |
---|
312 | besti,bestj, |
---|
313 | &entriesA,&entriesB,&entriesC); |
---|
314 | |
---|
315 | /* fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n", |
---|
316 | entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);*/ |
---|
317 | { |
---|
318 | long thisaux=t->aux++; |
---|
319 | if(t->aux>=t->alloc){ |
---|
320 | t->alloc*=2; |
---|
321 | t->ptr0=_ogg_realloc(t->ptr0,sizeof(long)*t->alloc); |
---|
322 | t->ptr1=_ogg_realloc(t->ptr1,sizeof(long)*t->alloc); |
---|
323 | t->p=_ogg_realloc(t->p,sizeof(long)*t->alloc); |
---|
324 | t->q=_ogg_realloc(t->q,sizeof(long)*t->alloc); |
---|
325 | } |
---|
326 | |
---|
327 | t->p[thisaux]=besti; |
---|
328 | t->q[thisaux]=bestj; |
---|
329 | |
---|
330 | if(entriesA==1){ |
---|
331 | ret=1; |
---|
332 | t->ptr0[thisaux]=entryA[0]; |
---|
333 | *pointsofar+=pointsA; |
---|
334 | }else{ |
---|
335 | t->ptr0[thisaux]= -t->aux; |
---|
336 | ret=lp_split(pointlist,totalpoints,b,entryA,entriesA,pointindex,pointsA, |
---|
337 | membership,reventry,depth+1,pointsofar); |
---|
338 | } |
---|
339 | if(entriesB==1){ |
---|
340 | ret++; |
---|
341 | t->ptr1[thisaux]=entryB[0]; |
---|
342 | *pointsofar+=points-pointsA; |
---|
343 | }else{ |
---|
344 | t->ptr1[thisaux]= -t->aux; |
---|
345 | ret+=lp_split(pointlist,totalpoints,b,entryB,entriesB,pointindex+pointsA, |
---|
346 | points-pointsA,membership,reventry, |
---|
347 | depth+1,pointsofar); |
---|
348 | } |
---|
349 | } |
---|
350 | free(entryA); |
---|
351 | free(entryB); |
---|
352 | return(ret); |
---|
353 | } |
---|
354 | |
---|
355 | static int _node_eq(encode_aux_nearestmatch *v, long a, long b){ |
---|
356 | long Aptr0=v->ptr0[a]; |
---|
357 | long Aptr1=v->ptr1[a]; |
---|
358 | long Bptr0=v->ptr0[b]; |
---|
359 | long Bptr1=v->ptr1[b]; |
---|
360 | |
---|
361 | /* the possibility of choosing the same p and q, but switched, can;t |
---|
362 | happen because we always look for the best p/q in the same search |
---|
363 | order and the search is stable */ |
---|
364 | |
---|
365 | if(Aptr0==Bptr0 && Aptr1==Bptr1) |
---|
366 | return(1); |
---|
367 | |
---|
368 | return(0); |
---|
369 | } |
---|
370 | |
---|
371 | void vqsp_book(vqgen *v, codebook *b, long *quantlist){ |
---|
372 | long i,j; |
---|
373 | static_codebook *c=(static_codebook *)b->c; |
---|
374 | encode_aux_nearestmatch *t; |
---|
375 | |
---|
376 | memset(b,0,sizeof(codebook)); |
---|
377 | memset(c,0,sizeof(static_codebook)); |
---|
378 | b->c=c; |
---|
379 | t=c->nearest_tree=_ogg_calloc(1,sizeof(encode_aux_nearestmatch)); |
---|
380 | c->maptype=2; |
---|
381 | |
---|
382 | /* make sure there are no duplicate entries and that every |
---|
383 | entry has points */ |
---|
384 | |
---|
385 | for(i=0;i<v->entries;){ |
---|
386 | /* duplicate? if so, eliminate */ |
---|
387 | for(j=0;j<i;j++){ |
---|
388 | if(_Ndist(v->elements,_now(v,i),_now(v,j))==0.f){ |
---|
389 | fprintf(stderr,"found a duplicate entry! removing...\n"); |
---|
390 | v->entries--; |
---|
391 | memcpy(_now(v,i),_now(v,v->entries),sizeof(float)*v->elements); |
---|
392 | memcpy(quantlist+i*v->elements,quantlist+v->entries*v->elements, |
---|
393 | sizeof(long)*v->elements); |
---|
394 | break; |
---|
395 | } |
---|
396 | } |
---|
397 | if(j==i)i++; |
---|
398 | } |
---|
399 | |
---|
400 | { |
---|
401 | v->assigned=_ogg_calloc(v->entries,sizeof(long)); |
---|
402 | for(i=0;i<v->points;i++){ |
---|
403 | float *ppt=_point(v,i); |
---|
404 | float firstmetric=_Ndist(v->elements,_now(v,0),ppt); |
---|
405 | long firstentry=0; |
---|
406 | |
---|
407 | if(!(i&0xff))spinnit("checking... ",v->points-i); |
---|
408 | |
---|
409 | for(j=0;j<v->entries;j++){ |
---|
410 | float thismetric=_Ndist(v->elements,_now(v,j),ppt); |
---|
411 | if(thismetric<firstmetric){ |
---|
412 | firstmetric=thismetric; |
---|
413 | firstentry=j; |
---|
414 | } |
---|
415 | } |
---|
416 | |
---|
417 | v->assigned[firstentry]++; |
---|
418 | } |
---|
419 | |
---|
420 | for(j=0;j<v->entries;){ |
---|
421 | if(v->assigned[j]==0){ |
---|
422 | fprintf(stderr,"found an unused entry! removing...\n"); |
---|
423 | v->entries--; |
---|
424 | memcpy(_now(v,j),_now(v,v->entries),sizeof(float)*v->elements); |
---|
425 | v->assigned[j]=v->assigned[v->elements]; |
---|
426 | memcpy(quantlist+j*v->elements,quantlist+v->entries*v->elements, |
---|
427 | sizeof(long)*v->elements); |
---|
428 | continue; |
---|
429 | } |
---|
430 | j++; |
---|
431 | } |
---|
432 | } |
---|
433 | |
---|
434 | fprintf(stderr,"Building a book with %ld unique entries...\n",v->entries); |
---|
435 | |
---|
436 | { |
---|
437 | long *entryindex=_ogg_malloc(v->entries*sizeof(long *)); |
---|
438 | long *pointindex=_ogg_malloc(v->points*sizeof(long)); |
---|
439 | long *membership=_ogg_malloc(v->points*sizeof(long)); |
---|
440 | long *reventry=_ogg_malloc(v->entries*sizeof(long)); |
---|
441 | long pointssofar=0; |
---|
442 | |
---|
443 | for(i=0;i<v->entries;i++)entryindex[i]=i; |
---|
444 | for(i=0;i<v->points;i++)pointindex[i]=i; |
---|
445 | |
---|
446 | t->alloc=4096; |
---|
447 | t->ptr0=_ogg_malloc(sizeof(long)*t->alloc); |
---|
448 | t->ptr1=_ogg_malloc(sizeof(long)*t->alloc); |
---|
449 | t->p=_ogg_malloc(sizeof(long)*t->alloc); |
---|
450 | t->q=_ogg_malloc(sizeof(long)*t->alloc); |
---|
451 | t->aux=0; |
---|
452 | c->dim=v->elements; |
---|
453 | c->entries=v->entries; |
---|
454 | c->lengthlist=_ogg_calloc(c->entries,sizeof(long)); |
---|
455 | b->valuelist=v->entrylist; /* temporary; replaced later */ |
---|
456 | b->dim=c->dim; |
---|
457 | b->entries=c->entries; |
---|
458 | |
---|
459 | for(i=0;i<v->points;i++)membership[i]=-1; |
---|
460 | for(i=0;i<v->points;i++){ |
---|
461 | float *ppt=_point(v,i); |
---|
462 | long firstentry=0; |
---|
463 | float firstmetric=_Ndist(v->elements,_now(v,0),ppt); |
---|
464 | |
---|
465 | if(!(i&0xff))spinnit("assigning... ",v->points-i); |
---|
466 | |
---|
467 | for(j=1;j<v->entries;j++){ |
---|
468 | if(v->assigned[j]!=-1){ |
---|
469 | float thismetric=_Ndist(v->elements,_now(v,j),ppt); |
---|
470 | if(thismetric<=firstmetric){ |
---|
471 | firstmetric=thismetric; |
---|
472 | firstentry=j; |
---|
473 | } |
---|
474 | } |
---|
475 | } |
---|
476 | |
---|
477 | membership[i]=firstentry; |
---|
478 | } |
---|
479 | |
---|
480 | fprintf(stderr,"Leaves added: %d \n", |
---|
481 | lp_split(v->pointlist,v->points, |
---|
482 | b,entryindex,v->entries, |
---|
483 | pointindex,v->points, |
---|
484 | membership,reventry, |
---|
485 | 0,&pointssofar)); |
---|
486 | |
---|
487 | free(pointindex); |
---|
488 | free(membership); |
---|
489 | free(reventry); |
---|
490 | |
---|
491 | fprintf(stderr,"Paring/rerouting redundant branches... "); |
---|
492 | |
---|
493 | /* The tree is likely big and redundant. Pare and reroute branches */ |
---|
494 | { |
---|
495 | int changedflag=1; |
---|
496 | |
---|
497 | while(changedflag){ |
---|
498 | changedflag=0; |
---|
499 | |
---|
500 | /* span the tree node by node; list unique decision nodes and |
---|
501 | short circuit redundant branches */ |
---|
502 | |
---|
503 | for(i=0;i<t->aux;){ |
---|
504 | int k; |
---|
505 | |
---|
506 | /* check list of unique decisions */ |
---|
507 | for(j=0;j<i;j++) |
---|
508 | if(_node_eq(t,i,j))break; |
---|
509 | |
---|
510 | if(j<i){ |
---|
511 | /* a redundant entry; find all higher nodes referencing it and |
---|
512 | short circuit them to the previously noted unique entry */ |
---|
513 | changedflag=1; |
---|
514 | for(k=0;k<t->aux;k++){ |
---|
515 | if(t->ptr0[k]==-i)t->ptr0[k]=-j; |
---|
516 | if(t->ptr1[k]==-i)t->ptr1[k]=-j; |
---|
517 | } |
---|
518 | |
---|
519 | /* Now, we need to fill in the hole from this redundant |
---|
520 | entry in the listing. Insert the last entry in the list. |
---|
521 | Fix the forward pointers to that last entry */ |
---|
522 | t->aux--; |
---|
523 | t->ptr0[i]=t->ptr0[t->aux]; |
---|
524 | t->ptr1[i]=t->ptr1[t->aux]; |
---|
525 | t->p[i]=t->p[t->aux]; |
---|
526 | t->q[i]=t->q[t->aux]; |
---|
527 | for(k=0;k<t->aux;k++){ |
---|
528 | if(t->ptr0[k]==-t->aux)t->ptr0[k]=-i; |
---|
529 | if(t->ptr1[k]==-t->aux)t->ptr1[k]=-i; |
---|
530 | } |
---|
531 | /* hole plugged */ |
---|
532 | |
---|
533 | }else |
---|
534 | i++; |
---|
535 | } |
---|
536 | |
---|
537 | fprintf(stderr,"\rParing/rerouting redundant branches... " |
---|
538 | "%ld remaining ",t->aux); |
---|
539 | } |
---|
540 | fprintf(stderr,"\n"); |
---|
541 | } |
---|
542 | } |
---|
543 | |
---|
544 | /* run all training points through the decision tree to get a final |
---|
545 | probability count */ |
---|
546 | { |
---|
547 | long *probability=_ogg_malloc(c->entries*sizeof(long)); |
---|
548 | for(i=0;i<c->entries;i++)probability[i]=1; /* trivial guard */ |
---|
549 | b->dim=c->dim; |
---|
550 | |
---|
551 | /* sigh. A necessary hack */ |
---|
552 | for(i=0;i<t->aux;i++)t->p[i]*=c->dim; |
---|
553 | for(i=0;i<t->aux;i++)t->q[i]*=c->dim; |
---|
554 | |
---|
555 | for(i=0;i<v->points;i++){ |
---|
556 | /* we use the linear matcher regardless becuase the trainer |
---|
557 | doesn't convert log to linear */ |
---|
558 | int ret=_best(b,v->pointlist+i*v->elements,1); |
---|
559 | probability[ret]++; |
---|
560 | if(!(i&0xff))spinnit("counting hits... ",v->points-i); |
---|
561 | } |
---|
562 | for(i=0;i<t->aux;i++)t->p[i]/=c->dim; |
---|
563 | for(i=0;i<t->aux;i++)t->q[i]/=c->dim; |
---|
564 | |
---|
565 | build_tree_from_lengths(c->entries,probability,c->lengthlist); |
---|
566 | |
---|
567 | free(probability); |
---|
568 | } |
---|
569 | |
---|
570 | /* Sort the entries by codeword length, short to long (eases |
---|
571 | assignment and packing to do it now) */ |
---|
572 | { |
---|
573 | long *wordlen=c->lengthlist; |
---|
574 | long *index=_ogg_malloc(c->entries*sizeof(long)); |
---|
575 | long *revindex=_ogg_malloc(c->entries*sizeof(long)); |
---|
576 | int k; |
---|
577 | for(i=0;i<c->entries;i++)index[i]=i; |
---|
578 | isortvals=c->lengthlist; |
---|
579 | qsort(index,c->entries,sizeof(long),iascsort); |
---|
580 | |
---|
581 | /* rearrange storage; ptr0/1 first as it needs a reverse index */ |
---|
582 | /* n and c stay unchanged */ |
---|
583 | for(i=0;i<c->entries;i++)revindex[index[i]]=i; |
---|
584 | for(i=0;i<t->aux;i++){ |
---|
585 | if(!(i&0x3f))spinnit("sorting... ",t->aux-i); |
---|
586 | |
---|
587 | if(t->ptr0[i]>=0)t->ptr0[i]=revindex[t->ptr0[i]]; |
---|
588 | if(t->ptr1[i]>=0)t->ptr1[i]=revindex[t->ptr1[i]]; |
---|
589 | t->p[i]=revindex[t->p[i]]; |
---|
590 | t->q[i]=revindex[t->q[i]]; |
---|
591 | } |
---|
592 | free(revindex); |
---|
593 | |
---|
594 | /* map lengthlist and vallist with index */ |
---|
595 | c->lengthlist=_ogg_calloc(c->entries,sizeof(long)); |
---|
596 | b->valuelist=_ogg_malloc(sizeof(float)*c->entries*c->dim); |
---|
597 | c->quantlist=_ogg_malloc(sizeof(long)*c->entries*c->dim); |
---|
598 | for(i=0;i<c->entries;i++){ |
---|
599 | long e=index[i]; |
---|
600 | for(k=0;k<c->dim;k++){ |
---|
601 | b->valuelist[i*c->dim+k]=v->entrylist[e*c->dim+k]; |
---|
602 | c->quantlist[i*c->dim+k]=quantlist[e*c->dim+k]; |
---|
603 | } |
---|
604 | c->lengthlist[i]=wordlen[e]; |
---|
605 | } |
---|
606 | |
---|
607 | free(wordlen); |
---|
608 | } |
---|
609 | |
---|
610 | fprintf(stderr,"Done. \n\n"); |
---|
611 | } |
---|
612 | |
---|