1 | /******************************************************************** |
---|
2 | * * |
---|
3 | * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * |
---|
4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
---|
5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
---|
6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
---|
7 | * * |
---|
8 | * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * |
---|
9 | * by the Xiph.Org Foundation http://www.xiph.org/ * |
---|
10 | * * |
---|
11 | ******************************************************************** |
---|
12 | |
---|
13 | function: utility for paring low hit count cells from lattice codebook |
---|
14 | last mod: $Id: latticepare.c 13293 2007-07-24 00:09:47Z xiphmont $ |
---|
15 | |
---|
16 | ********************************************************************/ |
---|
17 | |
---|
18 | #include <stdlib.h> |
---|
19 | #include <stdio.h> |
---|
20 | #include <math.h> |
---|
21 | #include <string.h> |
---|
22 | #include <errno.h> |
---|
23 | #include "../lib/scales.h" |
---|
24 | #include "bookutil.h" |
---|
25 | #include "vqgen.h" |
---|
26 | #include "vqsplit.h" |
---|
27 | #include "../lib/os.h" |
---|
28 | |
---|
29 | /* Lattice codebooks have two strengths: important fetaures that are |
---|
30 | poorly modelled by global error minimization training (eg, strong |
---|
31 | peaks) are not neglected 2) compact quantized representation. |
---|
32 | |
---|
33 | A fully populated lattice codebook, however, swings point 1 too far |
---|
34 | in the opposite direction; rare features need not be modelled quite |
---|
35 | so religiously and as such, we waste bits unless we eliminate the |
---|
36 | least common cells. The codebook rep supports unused cells, so we |
---|
37 | need to tag such cells and build an auxiliary (non-thresh) search |
---|
38 | mechanism to find the proper match quickly */ |
---|
39 | |
---|
40 | /* two basic steps; first is pare the cell for which dispersal creates |
---|
41 | the least additional error. This will naturally choose |
---|
42 | low-population cells and cells that have not taken on points from |
---|
43 | neighboring paring (but does not result in the lattice collapsing |
---|
44 | inward and leaving low population ares totally unmodelled). After |
---|
45 | paring has removed the desired number of cells, we need to build an |
---|
46 | auxiliary search for each culled point */ |
---|
47 | |
---|
48 | /* Although lattice books (due to threshhold-based matching) do not |
---|
49 | actually use error to make cell selections (in fact, it need not |
---|
50 | bear any relation), the 'secondbest' entry finder here is in fact |
---|
51 | error/distance based, so latticepare is only useful on such books */ |
---|
52 | |
---|
53 | /* command line: |
---|
54 | latticepare latticebook.vqh input_data.vqd <target_cells> |
---|
55 | |
---|
56 | produces a new output book on stdout |
---|
57 | */ |
---|
58 | |
---|
59 | static float _dist(int el,float *a, float *b){ |
---|
60 | int i; |
---|
61 | float acc=0.f; |
---|
62 | for(i=0;i<el;i++){ |
---|
63 | float val=(a[i]-b[i]); |
---|
64 | acc+=val*val; |
---|
65 | } |
---|
66 | return(acc); |
---|
67 | } |
---|
68 | |
---|
69 | static float *pointlist; |
---|
70 | static long points=0; |
---|
71 | |
---|
72 | void add_vector(codebook *b,float *vec,long n){ |
---|
73 | int dim=b->dim,i,j; |
---|
74 | int step=n/dim; |
---|
75 | for(i=0;i<step;i++){ |
---|
76 | for(j=i;j<n;j+=step){ |
---|
77 | pointlist[points++]=vec[j]; |
---|
78 | } |
---|
79 | } |
---|
80 | } |
---|
81 | |
---|
82 | static int bestm(codebook *b,float *vec){ |
---|
83 | encode_aux_threshmatch *tt=b->c->thresh_tree; |
---|
84 | int dim=b->dim; |
---|
85 | int i,k,o; |
---|
86 | int best=0; |
---|
87 | |
---|
88 | /* what would be the closest match if the codebook was fully |
---|
89 | populated? */ |
---|
90 | |
---|
91 | for(k=0,o=dim-1;k<dim;k++,o--){ |
---|
92 | int i; |
---|
93 | for(i=0;i<tt->threshvals-1;i++) |
---|
94 | if(vec[o]<tt->quantthresh[i])break; |
---|
95 | best=(best*tt->quantvals)+tt->quantmap[i]; |
---|
96 | } |
---|
97 | return(best); |
---|
98 | } |
---|
99 | |
---|
100 | static int closest(codebook *b,float *vec,int current){ |
---|
101 | encode_aux_threshmatch *tt=b->c->thresh_tree; |
---|
102 | int dim=b->dim; |
---|
103 | int i,k,o; |
---|
104 | |
---|
105 | float bestmetric=0; |
---|
106 | int bestentry=-1; |
---|
107 | int best=bestm(b,vec); |
---|
108 | |
---|
109 | if(current<0 && b->c->lengthlist[best]>0)return best; |
---|
110 | |
---|
111 | for(i=0;i<b->entries;i++){ |
---|
112 | if(b->c->lengthlist[i]>0 && i!=best && i!=current){ |
---|
113 | float thismetric=_dist(dim, vec, b->valuelist+i*dim); |
---|
114 | if(bestentry==-1 || thismetric<bestmetric){ |
---|
115 | bestentry=i; |
---|
116 | bestmetric=thismetric; |
---|
117 | } |
---|
118 | } |
---|
119 | } |
---|
120 | |
---|
121 | return(bestentry); |
---|
122 | } |
---|
123 | |
---|
124 | static float _heuristic(codebook *b,float *ppt,int secondbest){ |
---|
125 | float *secondcell=b->valuelist+secondbest*b->dim; |
---|
126 | int best=bestm(b,ppt); |
---|
127 | float *firstcell=b->valuelist+best*b->dim; |
---|
128 | float error=_dist(b->dim,firstcell,secondcell); |
---|
129 | float *zero=alloca(b->dim*sizeof(float)); |
---|
130 | float fromzero; |
---|
131 | |
---|
132 | memset(zero,0,b->dim*sizeof(float)); |
---|
133 | fromzero=sqrt(_dist(b->dim,firstcell,zero)); |
---|
134 | |
---|
135 | return(error/fromzero); |
---|
136 | } |
---|
137 | |
---|
138 | static int longsort(const void *a, const void *b){ |
---|
139 | return **(long **)b-**(long **)a; |
---|
140 | } |
---|
141 | |
---|
142 | void usage(void){ |
---|
143 | fprintf(stderr,"Ogg/Vorbis lattice codebook paring utility\n\n" |
---|
144 | "usage: latticepare book.vqh data.vqd <target_cells> <protected_cells> base\n" |
---|
145 | "where <target_cells> is the desired number of final cells (or -1\n" |
---|
146 | " for no change)\n" |
---|
147 | " <protected_cells> is the number of highest-hit count cells\n" |
---|
148 | " to protect from dispersal\n" |
---|
149 | " base is the base name (not including .vqh) of the new\n" |
---|
150 | " book\n\n"); |
---|
151 | exit(1); |
---|
152 | } |
---|
153 | |
---|
154 | int main(int argc,char *argv[]){ |
---|
155 | char *basename; |
---|
156 | codebook *b=NULL; |
---|
157 | int entries=0; |
---|
158 | int dim=0; |
---|
159 | long i,j,target=-1,protect=-1; |
---|
160 | FILE *out=NULL; |
---|
161 | |
---|
162 | int argnum=0; |
---|
163 | |
---|
164 | argv++; |
---|
165 | if(*argv==NULL){ |
---|
166 | usage(); |
---|
167 | exit(1); |
---|
168 | } |
---|
169 | |
---|
170 | while(*argv){ |
---|
171 | if(*argv[0]=='-'){ |
---|
172 | |
---|
173 | argv++; |
---|
174 | |
---|
175 | }else{ |
---|
176 | switch (argnum++){ |
---|
177 | case 0:case 1: |
---|
178 | { |
---|
179 | /* yes, this is evil. However, it's very convenient to parse file |
---|
180 | extentions */ |
---|
181 | |
---|
182 | /* input file. What kind? */ |
---|
183 | char *dot; |
---|
184 | char *ext=NULL; |
---|
185 | char *name=strdup(*argv++); |
---|
186 | dot=strrchr(name,'.'); |
---|
187 | if(dot) |
---|
188 | ext=dot+1; |
---|
189 | else{ |
---|
190 | ext=""; |
---|
191 | |
---|
192 | } |
---|
193 | |
---|
194 | |
---|
195 | /* codebook */ |
---|
196 | if(!strcmp(ext,"vqh")){ |
---|
197 | |
---|
198 | basename=strrchr(name,'/'); |
---|
199 | if(basename) |
---|
200 | basename=strdup(basename)+1; |
---|
201 | else |
---|
202 | basename=strdup(name); |
---|
203 | dot=strrchr(basename,'.'); |
---|
204 | if(dot)*dot='\0'; |
---|
205 | |
---|
206 | b=codebook_load(name); |
---|
207 | dim=b->dim; |
---|
208 | entries=b->entries; |
---|
209 | } |
---|
210 | |
---|
211 | /* data file; we do actually need to suck it into memory */ |
---|
212 | /* we're dealing with just one book, so we can de-interleave */ |
---|
213 | if(!strcmp(ext,"vqd") && !points){ |
---|
214 | int cols; |
---|
215 | long lines=0; |
---|
216 | char *line; |
---|
217 | float *vec; |
---|
218 | FILE *in=fopen(name,"r"); |
---|
219 | if(!in){ |
---|
220 | fprintf(stderr,"Could not open input file %s\n",name); |
---|
221 | exit(1); |
---|
222 | } |
---|
223 | |
---|
224 | reset_next_value(); |
---|
225 | line=setup_line(in); |
---|
226 | /* count cols before we start reading */ |
---|
227 | { |
---|
228 | char *temp=line; |
---|
229 | while(*temp==' ')temp++; |
---|
230 | for(cols=0;*temp;cols++){ |
---|
231 | while(*temp>32)temp++; |
---|
232 | while(*temp==' ')temp++; |
---|
233 | } |
---|
234 | } |
---|
235 | vec=alloca(cols*sizeof(float)); |
---|
236 | /* count, then load, to avoid fragmenting the hell out of |
---|
237 | memory */ |
---|
238 | while(line){ |
---|
239 | lines++; |
---|
240 | for(j=0;j<cols;j++) |
---|
241 | if(get_line_value(in,vec+j)){ |
---|
242 | fprintf(stderr,"Too few columns on line %ld in data file\n",lines); |
---|
243 | exit(1); |
---|
244 | } |
---|
245 | if((lines&0xff)==0)spinnit("counting samples...",lines*cols); |
---|
246 | line=setup_line(in); |
---|
247 | } |
---|
248 | pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float)); |
---|
249 | |
---|
250 | rewind(in); |
---|
251 | line=setup_line(in); |
---|
252 | while(line){ |
---|
253 | lines--; |
---|
254 | for(j=0;j<cols;j++) |
---|
255 | if(get_line_value(in,vec+j)){ |
---|
256 | fprintf(stderr,"Too few columns on line %ld in data file\n",lines); |
---|
257 | exit(1); |
---|
258 | } |
---|
259 | /* deinterleave, add to heap */ |
---|
260 | add_vector(b,vec,cols); |
---|
261 | if((lines&0xff)==0)spinnit("loading samples...",lines*cols); |
---|
262 | |
---|
263 | line=setup_line(in); |
---|
264 | } |
---|
265 | fclose(in); |
---|
266 | } |
---|
267 | } |
---|
268 | break; |
---|
269 | case 2: |
---|
270 | target=atol(*argv++); |
---|
271 | if(target==0)target=entries; |
---|
272 | break; |
---|
273 | case 3: |
---|
274 | protect=atol(*argv++); |
---|
275 | break; |
---|
276 | case 4: |
---|
277 | { |
---|
278 | char *buff=alloca(strlen(*argv)+5); |
---|
279 | sprintf(buff,"%s.vqh",*argv); |
---|
280 | basename=*argv++; |
---|
281 | |
---|
282 | out=fopen(buff,"w"); |
---|
283 | if(!out){ |
---|
284 | fprintf(stderr,"unable ot open %s for output",buff); |
---|
285 | exit(1); |
---|
286 | } |
---|
287 | } |
---|
288 | break; |
---|
289 | default: |
---|
290 | usage(); |
---|
291 | } |
---|
292 | } |
---|
293 | } |
---|
294 | if(!entries || !points || !out)usage(); |
---|
295 | if(target==-1)usage(); |
---|
296 | |
---|
297 | /* add guard points */ |
---|
298 | for(i=0;i<entries;i++) |
---|
299 | for(j=0;j<dim;j++) |
---|
300 | pointlist[points++]=b->valuelist[i*dim+j]; |
---|
301 | |
---|
302 | points/=dim; |
---|
303 | |
---|
304 | /* set up auxiliary vectors for error tracking */ |
---|
305 | { |
---|
306 | encode_aux_nearestmatch *nt=NULL; |
---|
307 | long pointssofar=0; |
---|
308 | long *pointindex; |
---|
309 | long indexedpoints=0; |
---|
310 | long *entryindex; |
---|
311 | long *reventry; |
---|
312 | long *membership=_ogg_malloc(points*sizeof(long)); |
---|
313 | long *firsthead=_ogg_malloc(entries*sizeof(long)); |
---|
314 | long *secondary=_ogg_malloc(points*sizeof(long)); |
---|
315 | long *secondhead=_ogg_malloc(entries*sizeof(long)); |
---|
316 | |
---|
317 | long *cellcount=_ogg_calloc(entries,sizeof(long)); |
---|
318 | long *cellcount2=_ogg_calloc(entries,sizeof(long)); |
---|
319 | float *cellerror=_ogg_calloc(entries,sizeof(float)); |
---|
320 | float *cellerrormax=_ogg_calloc(entries,sizeof(float)); |
---|
321 | long cellsleft=entries; |
---|
322 | for(i=0;i<points;i++)membership[i]=-1; |
---|
323 | for(i=0;i<entries;i++)firsthead[i]=-1; |
---|
324 | for(i=0;i<points;i++)secondary[i]=-1; |
---|
325 | for(i=0;i<entries;i++)secondhead[i]=-1; |
---|
326 | |
---|
327 | for(i=0;i<points;i++){ |
---|
328 | /* assign vectors to the nearest cell. Also keep track of second |
---|
329 | nearest for error statistics */ |
---|
330 | float *ppt=pointlist+i*dim; |
---|
331 | int firstentry=closest(b,ppt,-1); |
---|
332 | int secondentry=closest(b,ppt,firstentry); |
---|
333 | float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); |
---|
334 | float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); |
---|
335 | |
---|
336 | if(!(i&0xff))spinnit("initializing... ",points-i); |
---|
337 | |
---|
338 | membership[i]=firsthead[firstentry]; |
---|
339 | firsthead[firstentry]=i; |
---|
340 | secondary[i]=secondhead[secondentry]; |
---|
341 | secondhead[secondentry]=i; |
---|
342 | |
---|
343 | if(i<points-entries){ |
---|
344 | cellerror[firstentry]+=secondmetric-firstmetric; |
---|
345 | cellerrormax[firstentry]=max(cellerrormax[firstentry], |
---|
346 | _heuristic(b,ppt,secondentry)); |
---|
347 | cellcount[firstentry]++; |
---|
348 | cellcount2[secondentry]++; |
---|
349 | } |
---|
350 | } |
---|
351 | |
---|
352 | /* which cells are most heavily populated? Protect as many from |
---|
353 | dispersal as the user has requested */ |
---|
354 | { |
---|
355 | long **countindex=_ogg_calloc(entries,sizeof(long *)); |
---|
356 | for(i=0;i<entries;i++)countindex[i]=cellcount+i; |
---|
357 | qsort(countindex,entries,sizeof(long *),longsort); |
---|
358 | for(i=0;i<protect;i++){ |
---|
359 | int ptr=countindex[i]-cellcount; |
---|
360 | cellerrormax[ptr]=9e50f; |
---|
361 | } |
---|
362 | } |
---|
363 | |
---|
364 | { |
---|
365 | fprintf(stderr,"\r"); |
---|
366 | for(i=0;i<entries;i++){ |
---|
367 | /* decompose index */ |
---|
368 | int entry=i; |
---|
369 | for(j=0;j<dim;j++){ |
---|
370 | fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals); |
---|
371 | entry/=b->c->thresh_tree->quantvals; |
---|
372 | } |
---|
373 | |
---|
374 | fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]); |
---|
375 | } |
---|
376 | fprintf(stderr,"\n"); |
---|
377 | } |
---|
378 | |
---|
379 | /* do the automatic cull request */ |
---|
380 | while(cellsleft>target){ |
---|
381 | int bestcell=-1; |
---|
382 | float besterror=0; |
---|
383 | float besterror2=0; |
---|
384 | long head=-1; |
---|
385 | char spinbuf[80]; |
---|
386 | sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target); |
---|
387 | |
---|
388 | /* find the cell with lowest removal impact */ |
---|
389 | for(i=0;i<entries;i++){ |
---|
390 | if(b->c->lengthlist[i]>0){ |
---|
391 | if(bestcell==-1 || cellerrormax[i]<=besterror2){ |
---|
392 | if(bestcell==-1 || cellerrormax[i]<besterror2 || |
---|
393 | besterror>cellerror[i]){ |
---|
394 | besterror=cellerror[i]; |
---|
395 | besterror2=cellerrormax[i]; |
---|
396 | bestcell=i; |
---|
397 | } |
---|
398 | } |
---|
399 | } |
---|
400 | } |
---|
401 | |
---|
402 | fprintf(stderr,"\reliminating cell %d \n" |
---|
403 | " dispersal error of %g max/%g total (%ld hits)\n", |
---|
404 | bestcell,besterror2,besterror,cellcount[bestcell]); |
---|
405 | |
---|
406 | /* disperse it. move each point out, adding it (properly) to |
---|
407 | the second best */ |
---|
408 | b->c->lengthlist[bestcell]=0; |
---|
409 | head=firsthead[bestcell]; |
---|
410 | firsthead[bestcell]=-1; |
---|
411 | while(head!=-1){ |
---|
412 | /* head is a point number */ |
---|
413 | float *ppt=pointlist+head*dim; |
---|
414 | int firstentry=closest(b,ppt,-1); |
---|
415 | int secondentry=closest(b,ppt,firstentry); |
---|
416 | float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); |
---|
417 | float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); |
---|
418 | long next=membership[head]; |
---|
419 | |
---|
420 | if(head<points-entries){ |
---|
421 | cellcount[firstentry]++; |
---|
422 | cellcount[bestcell]--; |
---|
423 | cellerror[firstentry]+=secondmetric-firstmetric; |
---|
424 | cellerrormax[firstentry]=max(cellerrormax[firstentry], |
---|
425 | _heuristic(b,ppt,secondentry)); |
---|
426 | } |
---|
427 | |
---|
428 | membership[head]=firsthead[firstentry]; |
---|
429 | firsthead[firstentry]=head; |
---|
430 | head=next; |
---|
431 | if(cellcount[bestcell]%128==0) |
---|
432 | spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]); |
---|
433 | |
---|
434 | } |
---|
435 | |
---|
436 | /* now see that all points that had the dispersed cell as second |
---|
437 | choice have second choice reassigned */ |
---|
438 | head=secondhead[bestcell]; |
---|
439 | secondhead[bestcell]=-1; |
---|
440 | while(head!=-1){ |
---|
441 | float *ppt=pointlist+head*dim; |
---|
442 | /* who are we assigned to now? */ |
---|
443 | int firstentry=closest(b,ppt,-1); |
---|
444 | /* what is the new second closest match? */ |
---|
445 | int secondentry=closest(b,ppt,firstentry); |
---|
446 | /* old second closest is the cell being disbanded */ |
---|
447 | float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt); |
---|
448 | /* new second closest error */ |
---|
449 | float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); |
---|
450 | long next=secondary[head]; |
---|
451 | |
---|
452 | if(head<points-entries){ |
---|
453 | cellcount2[secondentry]++; |
---|
454 | cellcount2[bestcell]--; |
---|
455 | cellerror[firstentry]+=secondmetric-oldsecondmetric; |
---|
456 | cellerrormax[firstentry]=max(cellerrormax[firstentry], |
---|
457 | _heuristic(b,ppt,secondentry)); |
---|
458 | } |
---|
459 | |
---|
460 | secondary[head]=secondhead[secondentry]; |
---|
461 | secondhead[secondentry]=head; |
---|
462 | head=next; |
---|
463 | |
---|
464 | if(cellcount2[bestcell]%128==0) |
---|
465 | spinnit(spinbuf,cellcount2[bestcell]); |
---|
466 | } |
---|
467 | |
---|
468 | cellsleft--; |
---|
469 | } |
---|
470 | |
---|
471 | /* paring is over. Build decision trees using points that now fall |
---|
472 | through the thresh matcher. */ |
---|
473 | /* we don't free membership; we flatten it in order to use in lp_split */ |
---|
474 | |
---|
475 | for(i=0;i<entries;i++){ |
---|
476 | long head=firsthead[i]; |
---|
477 | spinnit("rearranging membership cache... ",entries-i); |
---|
478 | while(head!=-1){ |
---|
479 | long next=membership[head]; |
---|
480 | membership[head]=i; |
---|
481 | head=next; |
---|
482 | } |
---|
483 | } |
---|
484 | |
---|
485 | free(secondhead); |
---|
486 | free(firsthead); |
---|
487 | free(cellerror); |
---|
488 | free(cellerrormax); |
---|
489 | free(secondary); |
---|
490 | |
---|
491 | pointindex=_ogg_malloc(points*sizeof(long)); |
---|
492 | /* make a point index of fall-through points */ |
---|
493 | for(i=0;i<points;i++){ |
---|
494 | int best=_best(b,pointlist+i*dim,1); |
---|
495 | if(best==-1) |
---|
496 | pointindex[indexedpoints++]=i; |
---|
497 | spinnit("finding orphaned points... ",points-i); |
---|
498 | } |
---|
499 | |
---|
500 | /* make an entry index */ |
---|
501 | entryindex=_ogg_malloc(entries*sizeof(long)); |
---|
502 | target=0; |
---|
503 | for(i=0;i<entries;i++){ |
---|
504 | if(b->c->lengthlist[i]>0) |
---|
505 | entryindex[target++]=i; |
---|
506 | } |
---|
507 | |
---|
508 | /* make working space for a reverse entry index */ |
---|
509 | reventry=_ogg_malloc(entries*sizeof(long)); |
---|
510 | |
---|
511 | /* do the split */ |
---|
512 | nt=b->c->nearest_tree= |
---|
513 | _ogg_calloc(1,sizeof(encode_aux_nearestmatch)); |
---|
514 | |
---|
515 | nt->alloc=4096; |
---|
516 | nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
517 | nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
518 | nt->p=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
519 | nt->q=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
520 | nt->aux=0; |
---|
521 | |
---|
522 | fprintf(stderr,"Leaves added: %d \n", |
---|
523 | lp_split(pointlist,points, |
---|
524 | b,entryindex,target, |
---|
525 | pointindex,indexedpoints, |
---|
526 | membership,reventry, |
---|
527 | 0,&pointssofar)); |
---|
528 | free(membership); |
---|
529 | free(reventry); |
---|
530 | free(pointindex); |
---|
531 | |
---|
532 | /* hack alert. I should just change the damned splitter and |
---|
533 | codebook writer */ |
---|
534 | for(i=0;i<nt->aux;i++)nt->p[i]*=dim; |
---|
535 | for(i=0;i<nt->aux;i++)nt->q[i]*=dim; |
---|
536 | |
---|
537 | /* recount hits. Build new lengthlist. reuse entryindex storage */ |
---|
538 | for(i=0;i<entries;i++)entryindex[i]=1; |
---|
539 | for(i=0;i<points-entries;i++){ |
---|
540 | int best=_best(b,pointlist+i*dim,1); |
---|
541 | float *a=pointlist+i*dim; |
---|
542 | if(!(i&0xff))spinnit("counting hits...",i); |
---|
543 | if(best==-1){ |
---|
544 | fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n" |
---|
545 | "codebook entry. The new decision tree is broken.\n"); |
---|
546 | exit(1); |
---|
547 | } |
---|
548 | entryindex[best]++; |
---|
549 | } |
---|
550 | for(i=0;i<nt->aux;i++)nt->p[i]/=dim; |
---|
551 | for(i=0;i<nt->aux;i++)nt->q[i]/=dim; |
---|
552 | |
---|
553 | /* the lengthlist builder doesn't actually deal with 0 hit entries. |
---|
554 | So, we pack the 'sparse' hit list into a dense list, then unpack |
---|
555 | the lengths after the build */ |
---|
556 | { |
---|
557 | int upper=0; |
---|
558 | long *lengthlist=_ogg_calloc(entries,sizeof(long)); |
---|
559 | for(i=0;i<entries;i++){ |
---|
560 | if(b->c->lengthlist[i]>0) |
---|
561 | entryindex[upper++]=entryindex[i]; |
---|
562 | else{ |
---|
563 | if(entryindex[i]>1){ |
---|
564 | fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n"); |
---|
565 | exit(1); |
---|
566 | } |
---|
567 | } |
---|
568 | } |
---|
569 | |
---|
570 | /* sanity check */ |
---|
571 | if(upper != target){ |
---|
572 | fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n"); |
---|
573 | exit(1); |
---|
574 | } |
---|
575 | |
---|
576 | build_tree_from_lengths(upper,entryindex,lengthlist); |
---|
577 | |
---|
578 | upper=0; |
---|
579 | for(i=0;i<entries;i++){ |
---|
580 | if(b->c->lengthlist[i]>0) |
---|
581 | b->c->lengthlist[i]=lengthlist[upper++]; |
---|
582 | } |
---|
583 | |
---|
584 | } |
---|
585 | } |
---|
586 | /* we're done. write it out. */ |
---|
587 | write_codebook(out,basename,b->c); |
---|
588 | |
---|
589 | fprintf(stderr,"\r \nDone.\n"); |
---|
590 | return(0); |
---|
591 | } |
---|
592 | |
---|
593 | |
---|
594 | |
---|
595 | |
---|