[16] | 1 | /******************************************************************** |
---|
| 2 | * * |
---|
| 3 | * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * |
---|
| 4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
---|
| 5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
---|
| 6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
---|
| 7 | * * |
---|
| 8 | * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * |
---|
| 9 | * by the Xiph.Org Foundation http://www.xiph.org/ * |
---|
| 10 | * * |
---|
| 11 | ******************************************************************** |
---|
| 12 | |
---|
| 13 | function: utility for paring low hit count cells from lattice codebook |
---|
| 14 | last mod: $Id: latticepare.c 13293 2007-07-24 00:09:47Z xiphmont $ |
---|
| 15 | |
---|
| 16 | ********************************************************************/ |
---|
| 17 | |
---|
| 18 | #include <stdlib.h> |
---|
| 19 | #include <stdio.h> |
---|
| 20 | #include <math.h> |
---|
| 21 | #include <string.h> |
---|
| 22 | #include <errno.h> |
---|
| 23 | #include "../lib/scales.h" |
---|
| 24 | #include "bookutil.h" |
---|
| 25 | #include "vqgen.h" |
---|
| 26 | #include "vqsplit.h" |
---|
| 27 | #include "../lib/os.h" |
---|
| 28 | |
---|
| 29 | /* Lattice codebooks have two strengths: important fetaures that are |
---|
| 30 | poorly modelled by global error minimization training (eg, strong |
---|
| 31 | peaks) are not neglected 2) compact quantized representation. |
---|
| 32 | |
---|
| 33 | A fully populated lattice codebook, however, swings point 1 too far |
---|
| 34 | in the opposite direction; rare features need not be modelled quite |
---|
| 35 | so religiously and as such, we waste bits unless we eliminate the |
---|
| 36 | least common cells. The codebook rep supports unused cells, so we |
---|
| 37 | need to tag such cells and build an auxiliary (non-thresh) search |
---|
| 38 | mechanism to find the proper match quickly */ |
---|
| 39 | |
---|
| 40 | /* two basic steps; first is pare the cell for which dispersal creates |
---|
| 41 | the least additional error. This will naturally choose |
---|
| 42 | low-population cells and cells that have not taken on points from |
---|
| 43 | neighboring paring (but does not result in the lattice collapsing |
---|
| 44 | inward and leaving low population ares totally unmodelled). After |
---|
| 45 | paring has removed the desired number of cells, we need to build an |
---|
| 46 | auxiliary search for each culled point */ |
---|
| 47 | |
---|
| 48 | /* Although lattice books (due to threshhold-based matching) do not |
---|
| 49 | actually use error to make cell selections (in fact, it need not |
---|
| 50 | bear any relation), the 'secondbest' entry finder here is in fact |
---|
| 51 | error/distance based, so latticepare is only useful on such books */ |
---|
| 52 | |
---|
| 53 | /* command line: |
---|
| 54 | latticepare latticebook.vqh input_data.vqd <target_cells> |
---|
| 55 | |
---|
| 56 | produces a new output book on stdout |
---|
| 57 | */ |
---|
| 58 | |
---|
| 59 | static float _dist(int el,float *a, float *b){ |
---|
| 60 | int i; |
---|
| 61 | float acc=0.f; |
---|
| 62 | for(i=0;i<el;i++){ |
---|
| 63 | float val=(a[i]-b[i]); |
---|
| 64 | acc+=val*val; |
---|
| 65 | } |
---|
| 66 | return(acc); |
---|
| 67 | } |
---|
| 68 | |
---|
| 69 | static float *pointlist; |
---|
| 70 | static long points=0; |
---|
| 71 | |
---|
| 72 | void add_vector(codebook *b,float *vec,long n){ |
---|
| 73 | int dim=b->dim,i,j; |
---|
| 74 | int step=n/dim; |
---|
| 75 | for(i=0;i<step;i++){ |
---|
| 76 | for(j=i;j<n;j+=step){ |
---|
| 77 | pointlist[points++]=vec[j]; |
---|
| 78 | } |
---|
| 79 | } |
---|
| 80 | } |
---|
| 81 | |
---|
| 82 | static int bestm(codebook *b,float *vec){ |
---|
| 83 | encode_aux_threshmatch *tt=b->c->thresh_tree; |
---|
| 84 | int dim=b->dim; |
---|
| 85 | int i,k,o; |
---|
| 86 | int best=0; |
---|
| 87 | |
---|
| 88 | /* what would be the closest match if the codebook was fully |
---|
| 89 | populated? */ |
---|
| 90 | |
---|
| 91 | for(k=0,o=dim-1;k<dim;k++,o--){ |
---|
| 92 | int i; |
---|
| 93 | for(i=0;i<tt->threshvals-1;i++) |
---|
| 94 | if(vec[o]<tt->quantthresh[i])break; |
---|
| 95 | best=(best*tt->quantvals)+tt->quantmap[i]; |
---|
| 96 | } |
---|
| 97 | return(best); |
---|
| 98 | } |
---|
| 99 | |
---|
| 100 | static int closest(codebook *b,float *vec,int current){ |
---|
| 101 | encode_aux_threshmatch *tt=b->c->thresh_tree; |
---|
| 102 | int dim=b->dim; |
---|
| 103 | int i,k,o; |
---|
| 104 | |
---|
| 105 | float bestmetric=0; |
---|
| 106 | int bestentry=-1; |
---|
| 107 | int best=bestm(b,vec); |
---|
| 108 | |
---|
| 109 | if(current<0 && b->c->lengthlist[best]>0)return best; |
---|
| 110 | |
---|
| 111 | for(i=0;i<b->entries;i++){ |
---|
| 112 | if(b->c->lengthlist[i]>0 && i!=best && i!=current){ |
---|
| 113 | float thismetric=_dist(dim, vec, b->valuelist+i*dim); |
---|
| 114 | if(bestentry==-1 || thismetric<bestmetric){ |
---|
| 115 | bestentry=i; |
---|
| 116 | bestmetric=thismetric; |
---|
| 117 | } |
---|
| 118 | } |
---|
| 119 | } |
---|
| 120 | |
---|
| 121 | return(bestentry); |
---|
| 122 | } |
---|
| 123 | |
---|
| 124 | static float _heuristic(codebook *b,float *ppt,int secondbest){ |
---|
| 125 | float *secondcell=b->valuelist+secondbest*b->dim; |
---|
| 126 | int best=bestm(b,ppt); |
---|
| 127 | float *firstcell=b->valuelist+best*b->dim; |
---|
| 128 | float error=_dist(b->dim,firstcell,secondcell); |
---|
| 129 | float *zero=alloca(b->dim*sizeof(float)); |
---|
| 130 | float fromzero; |
---|
| 131 | |
---|
| 132 | memset(zero,0,b->dim*sizeof(float)); |
---|
| 133 | fromzero=sqrt(_dist(b->dim,firstcell,zero)); |
---|
| 134 | |
---|
| 135 | return(error/fromzero); |
---|
| 136 | } |
---|
| 137 | |
---|
| 138 | static int longsort(const void *a, const void *b){ |
---|
| 139 | return **(long **)b-**(long **)a; |
---|
| 140 | } |
---|
| 141 | |
---|
| 142 | void usage(void){ |
---|
| 143 | fprintf(stderr,"Ogg/Vorbis lattice codebook paring utility\n\n" |
---|
| 144 | "usage: latticepare book.vqh data.vqd <target_cells> <protected_cells> base\n" |
---|
| 145 | "where <target_cells> is the desired number of final cells (or -1\n" |
---|
| 146 | " for no change)\n" |
---|
| 147 | " <protected_cells> is the number of highest-hit count cells\n" |
---|
| 148 | " to protect from dispersal\n" |
---|
| 149 | " base is the base name (not including .vqh) of the new\n" |
---|
| 150 | " book\n\n"); |
---|
| 151 | exit(1); |
---|
| 152 | } |
---|
| 153 | |
---|
| 154 | int main(int argc,char *argv[]){ |
---|
| 155 | char *basename; |
---|
| 156 | codebook *b=NULL; |
---|
| 157 | int entries=0; |
---|
| 158 | int dim=0; |
---|
| 159 | long i,j,target=-1,protect=-1; |
---|
| 160 | FILE *out=NULL; |
---|
| 161 | |
---|
| 162 | int argnum=0; |
---|
| 163 | |
---|
| 164 | argv++; |
---|
| 165 | if(*argv==NULL){ |
---|
| 166 | usage(); |
---|
| 167 | exit(1); |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | while(*argv){ |
---|
| 171 | if(*argv[0]=='-'){ |
---|
| 172 | |
---|
| 173 | argv++; |
---|
| 174 | |
---|
| 175 | }else{ |
---|
| 176 | switch (argnum++){ |
---|
| 177 | case 0:case 1: |
---|
| 178 | { |
---|
| 179 | /* yes, this is evil. However, it's very convenient to parse file |
---|
| 180 | extentions */ |
---|
| 181 | |
---|
| 182 | /* input file. What kind? */ |
---|
| 183 | char *dot; |
---|
| 184 | char *ext=NULL; |
---|
| 185 | char *name=strdup(*argv++); |
---|
| 186 | dot=strrchr(name,'.'); |
---|
| 187 | if(dot) |
---|
| 188 | ext=dot+1; |
---|
| 189 | else{ |
---|
| 190 | ext=""; |
---|
| 191 | |
---|
| 192 | } |
---|
| 193 | |
---|
| 194 | |
---|
| 195 | /* codebook */ |
---|
| 196 | if(!strcmp(ext,"vqh")){ |
---|
| 197 | |
---|
| 198 | basename=strrchr(name,'/'); |
---|
| 199 | if(basename) |
---|
| 200 | basename=strdup(basename)+1; |
---|
| 201 | else |
---|
| 202 | basename=strdup(name); |
---|
| 203 | dot=strrchr(basename,'.'); |
---|
| 204 | if(dot)*dot='\0'; |
---|
| 205 | |
---|
| 206 | b=codebook_load(name); |
---|
| 207 | dim=b->dim; |
---|
| 208 | entries=b->entries; |
---|
| 209 | } |
---|
| 210 | |
---|
| 211 | /* data file; we do actually need to suck it into memory */ |
---|
| 212 | /* we're dealing with just one book, so we can de-interleave */ |
---|
| 213 | if(!strcmp(ext,"vqd") && !points){ |
---|
| 214 | int cols; |
---|
| 215 | long lines=0; |
---|
| 216 | char *line; |
---|
| 217 | float *vec; |
---|
| 218 | FILE *in=fopen(name,"r"); |
---|
| 219 | if(!in){ |
---|
| 220 | fprintf(stderr,"Could not open input file %s\n",name); |
---|
| 221 | exit(1); |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | reset_next_value(); |
---|
| 225 | line=setup_line(in); |
---|
| 226 | /* count cols before we start reading */ |
---|
| 227 | { |
---|
| 228 | char *temp=line; |
---|
| 229 | while(*temp==' ')temp++; |
---|
| 230 | for(cols=0;*temp;cols++){ |
---|
| 231 | while(*temp>32)temp++; |
---|
| 232 | while(*temp==' ')temp++; |
---|
| 233 | } |
---|
| 234 | } |
---|
| 235 | vec=alloca(cols*sizeof(float)); |
---|
| 236 | /* count, then load, to avoid fragmenting the hell out of |
---|
| 237 | memory */ |
---|
| 238 | while(line){ |
---|
| 239 | lines++; |
---|
| 240 | for(j=0;j<cols;j++) |
---|
| 241 | if(get_line_value(in,vec+j)){ |
---|
| 242 | fprintf(stderr,"Too few columns on line %ld in data file\n",lines); |
---|
| 243 | exit(1); |
---|
| 244 | } |
---|
| 245 | if((lines&0xff)==0)spinnit("counting samples...",lines*cols); |
---|
| 246 | line=setup_line(in); |
---|
| 247 | } |
---|
| 248 | pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float)); |
---|
| 249 | |
---|
| 250 | rewind(in); |
---|
| 251 | line=setup_line(in); |
---|
| 252 | while(line){ |
---|
| 253 | lines--; |
---|
| 254 | for(j=0;j<cols;j++) |
---|
| 255 | if(get_line_value(in,vec+j)){ |
---|
| 256 | fprintf(stderr,"Too few columns on line %ld in data file\n",lines); |
---|
| 257 | exit(1); |
---|
| 258 | } |
---|
| 259 | /* deinterleave, add to heap */ |
---|
| 260 | add_vector(b,vec,cols); |
---|
| 261 | if((lines&0xff)==0)spinnit("loading samples...",lines*cols); |
---|
| 262 | |
---|
| 263 | line=setup_line(in); |
---|
| 264 | } |
---|
| 265 | fclose(in); |
---|
| 266 | } |
---|
| 267 | } |
---|
| 268 | break; |
---|
| 269 | case 2: |
---|
| 270 | target=atol(*argv++); |
---|
| 271 | if(target==0)target=entries; |
---|
| 272 | break; |
---|
| 273 | case 3: |
---|
| 274 | protect=atol(*argv++); |
---|
| 275 | break; |
---|
| 276 | case 4: |
---|
| 277 | { |
---|
| 278 | char *buff=alloca(strlen(*argv)+5); |
---|
| 279 | sprintf(buff,"%s.vqh",*argv); |
---|
| 280 | basename=*argv++; |
---|
| 281 | |
---|
| 282 | out=fopen(buff,"w"); |
---|
| 283 | if(!out){ |
---|
| 284 | fprintf(stderr,"unable ot open %s for output",buff); |
---|
| 285 | exit(1); |
---|
| 286 | } |
---|
| 287 | } |
---|
| 288 | break; |
---|
| 289 | default: |
---|
| 290 | usage(); |
---|
| 291 | } |
---|
| 292 | } |
---|
| 293 | } |
---|
| 294 | if(!entries || !points || !out)usage(); |
---|
| 295 | if(target==-1)usage(); |
---|
| 296 | |
---|
| 297 | /* add guard points */ |
---|
| 298 | for(i=0;i<entries;i++) |
---|
| 299 | for(j=0;j<dim;j++) |
---|
| 300 | pointlist[points++]=b->valuelist[i*dim+j]; |
---|
| 301 | |
---|
| 302 | points/=dim; |
---|
| 303 | |
---|
| 304 | /* set up auxiliary vectors for error tracking */ |
---|
| 305 | { |
---|
| 306 | encode_aux_nearestmatch *nt=NULL; |
---|
| 307 | long pointssofar=0; |
---|
| 308 | long *pointindex; |
---|
| 309 | long indexedpoints=0; |
---|
| 310 | long *entryindex; |
---|
| 311 | long *reventry; |
---|
| 312 | long *membership=_ogg_malloc(points*sizeof(long)); |
---|
| 313 | long *firsthead=_ogg_malloc(entries*sizeof(long)); |
---|
| 314 | long *secondary=_ogg_malloc(points*sizeof(long)); |
---|
| 315 | long *secondhead=_ogg_malloc(entries*sizeof(long)); |
---|
| 316 | |
---|
| 317 | long *cellcount=_ogg_calloc(entries,sizeof(long)); |
---|
| 318 | long *cellcount2=_ogg_calloc(entries,sizeof(long)); |
---|
| 319 | float *cellerror=_ogg_calloc(entries,sizeof(float)); |
---|
| 320 | float *cellerrormax=_ogg_calloc(entries,sizeof(float)); |
---|
| 321 | long cellsleft=entries; |
---|
| 322 | for(i=0;i<points;i++)membership[i]=-1; |
---|
| 323 | for(i=0;i<entries;i++)firsthead[i]=-1; |
---|
| 324 | for(i=0;i<points;i++)secondary[i]=-1; |
---|
| 325 | for(i=0;i<entries;i++)secondhead[i]=-1; |
---|
| 326 | |
---|
| 327 | for(i=0;i<points;i++){ |
---|
| 328 | /* assign vectors to the nearest cell. Also keep track of second |
---|
| 329 | nearest for error statistics */ |
---|
| 330 | float *ppt=pointlist+i*dim; |
---|
| 331 | int firstentry=closest(b,ppt,-1); |
---|
| 332 | int secondentry=closest(b,ppt,firstentry); |
---|
| 333 | float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); |
---|
| 334 | float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); |
---|
| 335 | |
---|
| 336 | if(!(i&0xff))spinnit("initializing... ",points-i); |
---|
| 337 | |
---|
| 338 | membership[i]=firsthead[firstentry]; |
---|
| 339 | firsthead[firstentry]=i; |
---|
| 340 | secondary[i]=secondhead[secondentry]; |
---|
| 341 | secondhead[secondentry]=i; |
---|
| 342 | |
---|
| 343 | if(i<points-entries){ |
---|
| 344 | cellerror[firstentry]+=secondmetric-firstmetric; |
---|
| 345 | cellerrormax[firstentry]=max(cellerrormax[firstentry], |
---|
| 346 | _heuristic(b,ppt,secondentry)); |
---|
| 347 | cellcount[firstentry]++; |
---|
| 348 | cellcount2[secondentry]++; |
---|
| 349 | } |
---|
| 350 | } |
---|
| 351 | |
---|
| 352 | /* which cells are most heavily populated? Protect as many from |
---|
| 353 | dispersal as the user has requested */ |
---|
| 354 | { |
---|
| 355 | long **countindex=_ogg_calloc(entries,sizeof(long *)); |
---|
| 356 | for(i=0;i<entries;i++)countindex[i]=cellcount+i; |
---|
| 357 | qsort(countindex,entries,sizeof(long *),longsort); |
---|
| 358 | for(i=0;i<protect;i++){ |
---|
| 359 | int ptr=countindex[i]-cellcount; |
---|
| 360 | cellerrormax[ptr]=9e50f; |
---|
| 361 | } |
---|
| 362 | } |
---|
| 363 | |
---|
| 364 | { |
---|
| 365 | fprintf(stderr,"\r"); |
---|
| 366 | for(i=0;i<entries;i++){ |
---|
| 367 | /* decompose index */ |
---|
| 368 | int entry=i; |
---|
| 369 | for(j=0;j<dim;j++){ |
---|
| 370 | fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals); |
---|
| 371 | entry/=b->c->thresh_tree->quantvals; |
---|
| 372 | } |
---|
| 373 | |
---|
| 374 | fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]); |
---|
| 375 | } |
---|
| 376 | fprintf(stderr,"\n"); |
---|
| 377 | } |
---|
| 378 | |
---|
| 379 | /* do the automatic cull request */ |
---|
| 380 | while(cellsleft>target){ |
---|
| 381 | int bestcell=-1; |
---|
| 382 | float besterror=0; |
---|
| 383 | float besterror2=0; |
---|
| 384 | long head=-1; |
---|
| 385 | char spinbuf[80]; |
---|
| 386 | sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target); |
---|
| 387 | |
---|
| 388 | /* find the cell with lowest removal impact */ |
---|
| 389 | for(i=0;i<entries;i++){ |
---|
| 390 | if(b->c->lengthlist[i]>0){ |
---|
| 391 | if(bestcell==-1 || cellerrormax[i]<=besterror2){ |
---|
| 392 | if(bestcell==-1 || cellerrormax[i]<besterror2 || |
---|
| 393 | besterror>cellerror[i]){ |
---|
| 394 | besterror=cellerror[i]; |
---|
| 395 | besterror2=cellerrormax[i]; |
---|
| 396 | bestcell=i; |
---|
| 397 | } |
---|
| 398 | } |
---|
| 399 | } |
---|
| 400 | } |
---|
| 401 | |
---|
| 402 | fprintf(stderr,"\reliminating cell %d \n" |
---|
| 403 | " dispersal error of %g max/%g total (%ld hits)\n", |
---|
| 404 | bestcell,besterror2,besterror,cellcount[bestcell]); |
---|
| 405 | |
---|
| 406 | /* disperse it. move each point out, adding it (properly) to |
---|
| 407 | the second best */ |
---|
| 408 | b->c->lengthlist[bestcell]=0; |
---|
| 409 | head=firsthead[bestcell]; |
---|
| 410 | firsthead[bestcell]=-1; |
---|
| 411 | while(head!=-1){ |
---|
| 412 | /* head is a point number */ |
---|
| 413 | float *ppt=pointlist+head*dim; |
---|
| 414 | int firstentry=closest(b,ppt,-1); |
---|
| 415 | int secondentry=closest(b,ppt,firstentry); |
---|
| 416 | float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); |
---|
| 417 | float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); |
---|
| 418 | long next=membership[head]; |
---|
| 419 | |
---|
| 420 | if(head<points-entries){ |
---|
| 421 | cellcount[firstentry]++; |
---|
| 422 | cellcount[bestcell]--; |
---|
| 423 | cellerror[firstentry]+=secondmetric-firstmetric; |
---|
| 424 | cellerrormax[firstentry]=max(cellerrormax[firstentry], |
---|
| 425 | _heuristic(b,ppt,secondentry)); |
---|
| 426 | } |
---|
| 427 | |
---|
| 428 | membership[head]=firsthead[firstentry]; |
---|
| 429 | firsthead[firstentry]=head; |
---|
| 430 | head=next; |
---|
| 431 | if(cellcount[bestcell]%128==0) |
---|
| 432 | spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]); |
---|
| 433 | |
---|
| 434 | } |
---|
| 435 | |
---|
| 436 | /* now see that all points that had the dispersed cell as second |
---|
| 437 | choice have second choice reassigned */ |
---|
| 438 | head=secondhead[bestcell]; |
---|
| 439 | secondhead[bestcell]=-1; |
---|
| 440 | while(head!=-1){ |
---|
| 441 | float *ppt=pointlist+head*dim; |
---|
| 442 | /* who are we assigned to now? */ |
---|
| 443 | int firstentry=closest(b,ppt,-1); |
---|
| 444 | /* what is the new second closest match? */ |
---|
| 445 | int secondentry=closest(b,ppt,firstentry); |
---|
| 446 | /* old second closest is the cell being disbanded */ |
---|
| 447 | float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt); |
---|
| 448 | /* new second closest error */ |
---|
| 449 | float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); |
---|
| 450 | long next=secondary[head]; |
---|
| 451 | |
---|
| 452 | if(head<points-entries){ |
---|
| 453 | cellcount2[secondentry]++; |
---|
| 454 | cellcount2[bestcell]--; |
---|
| 455 | cellerror[firstentry]+=secondmetric-oldsecondmetric; |
---|
| 456 | cellerrormax[firstentry]=max(cellerrormax[firstentry], |
---|
| 457 | _heuristic(b,ppt,secondentry)); |
---|
| 458 | } |
---|
| 459 | |
---|
| 460 | secondary[head]=secondhead[secondentry]; |
---|
| 461 | secondhead[secondentry]=head; |
---|
| 462 | head=next; |
---|
| 463 | |
---|
| 464 | if(cellcount2[bestcell]%128==0) |
---|
| 465 | spinnit(spinbuf,cellcount2[bestcell]); |
---|
| 466 | } |
---|
| 467 | |
---|
| 468 | cellsleft--; |
---|
| 469 | } |
---|
| 470 | |
---|
| 471 | /* paring is over. Build decision trees using points that now fall |
---|
| 472 | through the thresh matcher. */ |
---|
| 473 | /* we don't free membership; we flatten it in order to use in lp_split */ |
---|
| 474 | |
---|
| 475 | for(i=0;i<entries;i++){ |
---|
| 476 | long head=firsthead[i]; |
---|
| 477 | spinnit("rearranging membership cache... ",entries-i); |
---|
| 478 | while(head!=-1){ |
---|
| 479 | long next=membership[head]; |
---|
| 480 | membership[head]=i; |
---|
| 481 | head=next; |
---|
| 482 | } |
---|
| 483 | } |
---|
| 484 | |
---|
| 485 | free(secondhead); |
---|
| 486 | free(firsthead); |
---|
| 487 | free(cellerror); |
---|
| 488 | free(cellerrormax); |
---|
| 489 | free(secondary); |
---|
| 490 | |
---|
| 491 | pointindex=_ogg_malloc(points*sizeof(long)); |
---|
| 492 | /* make a point index of fall-through points */ |
---|
| 493 | for(i=0;i<points;i++){ |
---|
| 494 | int best=_best(b,pointlist+i*dim,1); |
---|
| 495 | if(best==-1) |
---|
| 496 | pointindex[indexedpoints++]=i; |
---|
| 497 | spinnit("finding orphaned points... ",points-i); |
---|
| 498 | } |
---|
| 499 | |
---|
| 500 | /* make an entry index */ |
---|
| 501 | entryindex=_ogg_malloc(entries*sizeof(long)); |
---|
| 502 | target=0; |
---|
| 503 | for(i=0;i<entries;i++){ |
---|
| 504 | if(b->c->lengthlist[i]>0) |
---|
| 505 | entryindex[target++]=i; |
---|
| 506 | } |
---|
| 507 | |
---|
| 508 | /* make working space for a reverse entry index */ |
---|
| 509 | reventry=_ogg_malloc(entries*sizeof(long)); |
---|
| 510 | |
---|
| 511 | /* do the split */ |
---|
| 512 | nt=b->c->nearest_tree= |
---|
| 513 | _ogg_calloc(1,sizeof(encode_aux_nearestmatch)); |
---|
| 514 | |
---|
| 515 | nt->alloc=4096; |
---|
| 516 | nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
| 517 | nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
| 518 | nt->p=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
| 519 | nt->q=_ogg_malloc(sizeof(long)*nt->alloc); |
---|
| 520 | nt->aux=0; |
---|
| 521 | |
---|
| 522 | fprintf(stderr,"Leaves added: %d \n", |
---|
| 523 | lp_split(pointlist,points, |
---|
| 524 | b,entryindex,target, |
---|
| 525 | pointindex,indexedpoints, |
---|
| 526 | membership,reventry, |
---|
| 527 | 0,&pointssofar)); |
---|
| 528 | free(membership); |
---|
| 529 | free(reventry); |
---|
| 530 | free(pointindex); |
---|
| 531 | |
---|
| 532 | /* hack alert. I should just change the damned splitter and |
---|
| 533 | codebook writer */ |
---|
| 534 | for(i=0;i<nt->aux;i++)nt->p[i]*=dim; |
---|
| 535 | for(i=0;i<nt->aux;i++)nt->q[i]*=dim; |
---|
| 536 | |
---|
| 537 | /* recount hits. Build new lengthlist. reuse entryindex storage */ |
---|
| 538 | for(i=0;i<entries;i++)entryindex[i]=1; |
---|
| 539 | for(i=0;i<points-entries;i++){ |
---|
| 540 | int best=_best(b,pointlist+i*dim,1); |
---|
| 541 | float *a=pointlist+i*dim; |
---|
| 542 | if(!(i&0xff))spinnit("counting hits...",i); |
---|
| 543 | if(best==-1){ |
---|
| 544 | fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n" |
---|
| 545 | "codebook entry. The new decision tree is broken.\n"); |
---|
| 546 | exit(1); |
---|
| 547 | } |
---|
| 548 | entryindex[best]++; |
---|
| 549 | } |
---|
| 550 | for(i=0;i<nt->aux;i++)nt->p[i]/=dim; |
---|
| 551 | for(i=0;i<nt->aux;i++)nt->q[i]/=dim; |
---|
| 552 | |
---|
| 553 | /* the lengthlist builder doesn't actually deal with 0 hit entries. |
---|
| 554 | So, we pack the 'sparse' hit list into a dense list, then unpack |
---|
| 555 | the lengths after the build */ |
---|
| 556 | { |
---|
| 557 | int upper=0; |
---|
| 558 | long *lengthlist=_ogg_calloc(entries,sizeof(long)); |
---|
| 559 | for(i=0;i<entries;i++){ |
---|
| 560 | if(b->c->lengthlist[i]>0) |
---|
| 561 | entryindex[upper++]=entryindex[i]; |
---|
| 562 | else{ |
---|
| 563 | if(entryindex[i]>1){ |
---|
| 564 | fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n"); |
---|
| 565 | exit(1); |
---|
| 566 | } |
---|
| 567 | } |
---|
| 568 | } |
---|
| 569 | |
---|
| 570 | /* sanity check */ |
---|
| 571 | if(upper != target){ |
---|
| 572 | fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n"); |
---|
| 573 | exit(1); |
---|
| 574 | } |
---|
| 575 | |
---|
| 576 | build_tree_from_lengths(upper,entryindex,lengthlist); |
---|
| 577 | |
---|
| 578 | upper=0; |
---|
| 579 | for(i=0;i<entries;i++){ |
---|
| 580 | if(b->c->lengthlist[i]>0) |
---|
| 581 | b->c->lengthlist[i]=lengthlist[upper++]; |
---|
| 582 | } |
---|
| 583 | |
---|
| 584 | } |
---|
| 585 | } |
---|
| 586 | /* we're done. write it out. */ |
---|
| 587 | write_codebook(out,basename,b->c); |
---|
| 588 | |
---|
| 589 | fprintf(stderr,"\r \nDone.\n"); |
---|
| 590 | return(0); |
---|
| 591 | } |
---|
| 592 | |
---|
| 593 | |
---|
| 594 | |
---|
| 595 | |
---|