zenilib  0.5.3.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
vqgen.c
Go to the documentation of this file.
1 /********************************************************************
2  * *
3  * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7  * *
8  * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9  * by the Xiph.Org Foundation http://www.xiph.org/ *
10  * *
11  ********************************************************************
12 
13  function: train a VQ codebook
14  last mod: $Id: vqgen.c 16037 2009-05-26 21:10:58Z xiphmont $
15 
16  ********************************************************************/
17 
18 /* This code is *not* part of libvorbis. It is used to generate
19  trained codebooks offline and then spit the results into a
20  pregenerated codebook that is compiled into libvorbis. It is an
21  expensive (but good) algorithm. Run it on big iron. */
22 
23 /* There are so many optimizations to explore in *both* stages that
24  considering the undertaking is almost withering. For now, we brute
25  force it all */
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <math.h>
30 #include <string.h>
31 
32 #include "vqgen.h"
33 #include "bookutil.h"
34 
35 /* Codebook generation happens in two steps:
36 
37  1) Train the codebook with data collected from the encoder: We use
38  one of a few error metrics (which represent the distance between a
39  given data point and a candidate point in the training set) to
40  divide the training set up into cells representing roughly equal
41  probability of occurring.
42 
43  2) Generate the codebook and auxiliary data from the trained data set
44 */
45 
46 /* Codebook training ****************************************************
47  *
48  * The basic idea here is that a VQ codebook is like an m-dimensional
49  * foam with n bubbles. The bubbles compete for space/volume and are
50  * 'pressurized' [biased] according to some metric. The basic alg
51  * iterates through allowing the bubbles to compete for space until
52  * they converge (if the damping is dome properly) on a steady-state
53  * solution. Individual input points, collected from libvorbis, are
54  * used to train the algorithm monte-carlo style. */
55 
56 /* internal helpers *****************************************************/
57 #define vN(data,i) (data+v->elements*i)
58 
59 /* default metric; squared 'distance' from desired value. */
60 float _dist(vqgen *v,float *a, float *b){
61  int i;
62  int el=v->elements;
63  float acc=0.f;
64  for(i=0;i<el;i++){
65  float val=(a[i]-b[i]);
66  acc+=val*val;
67  }
68  return sqrt(acc);
69 }
70 
71 float *_weight_null(vqgen *v,float *a){
72  return a;
73 }
74 
75 /* *must* be beefed up. */
77  long i;
78  for(i=0;i<v->entries;i++)
79  memcpy(_now(v,i),_point(v,i),sizeof(float)*v->elements);
80  v->seeded=1;
81 }
82 
83 int directdsort(const void *a, const void *b){
84  float av=*((float *)a);
85  float bv=*((float *)b);
86  return (av<bv)-(av>bv);
87 }
88 
90  int j,k;
91  float min=-1.f,max=-1.f,mean=0.f,acc=0.f;
92  long dup=0,unused=0;
93  #ifdef NOISY
94  int i;
95  char buff[80];
96  float spacings[v->entries];
97  int count=0;
98  FILE *cells;
99  sprintf(buff,"cellspace%d.m",v->it);
100  cells=fopen(buff,"w");
101 #endif
102 
103  /* minimum, maximum, cell spacing */
104  for(j=0;j<v->entries;j++){
105  float localmin=-1.;
106 
107  for(k=0;k<v->entries;k++){
108  if(j!=k){
109  float this=_dist(v,_now(v,j),_now(v,k));
110  if(this>0){
111  if(v->assigned[k] && (localmin==-1 || this<localmin))
112  localmin=this;
113  }else{
114  if(k<j){
115  dup++;
116  break;
117  }
118  }
119  }
120  }
121  if(k<v->entries)continue;
122 
123  if(v->assigned[j]==0){
124  unused++;
125  continue;
126  }
127 
128  localmin=v->max[j]+localmin/2; /* this gives us rough diameter */
129  if(min==-1 || localmin<min)min=localmin;
130  if(max==-1 || localmin>max)max=localmin;
131  mean+=localmin;
132  acc++;
133 #ifdef NOISY
134  spacings[count++]=localmin;
135 #endif
136  }
137 
138  fprintf(stderr,"cell diameter: %.03g::%.03g::%.03g (%ld unused/%ld dup)\n",
139  min,mean/acc,max,unused,dup);
140 
141 #ifdef NOISY
142  qsort(spacings,count,sizeof(float),directdsort);
143  for(i=0;i<count;i++)
144  fprintf(cells,"%g\n",spacings[i]);
145  fclose(cells);
146 #endif
147 
148 }
149 
150 /* External calls *******************************************************/
151 
152 /* We have two forms of quantization; in the first, each vector
153  element in the codebook entry is orthogonal. Residues would use this
154  quantization for example.
155 
156  In the second, we have a sequence of monotonically increasing
157  values that we wish to quantize as deltas (to save space). We
158  still need to quantize so that absolute values are accurate. For
159  example, LSP quantizes all absolute values, but the book encodes
160  distance between values because each successive value is larger
161  than the preceeding value. Thus the desired quantibits apply to
162  the encoded (delta) values, not abs positions. This requires minor
163  additional encode-side trickery. */
164 
166 
167  float maxdel;
168  float mindel;
169 
170  float delta;
171  float maxquant=((1<<q->quant)-1);
172 
173  int j,k;
174 
175  mindel=maxdel=_now(v,0)[0];
176 
177  for(j=0;j<v->entries;j++){
178  float last=0.f;
179  for(k=0;k<v->elements;k++){
180  if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last;
181  if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last;
182  if(q->sequencep)last=_now(v,j)[k];
183  }
184  }
185 
186 
187  /* first find the basic delta amount from the maximum span to be
188  encoded. Loosen the delta slightly to allow for additional error
189  during sequence quantization */
190 
191  delta=(maxdel-mindel)/((1<<q->quant)-1.5f);
192 
193  q->min=_float32_pack(mindel);
194  q->delta=_float32_pack(delta);
195 
196  mindel=_float32_unpack(q->min);
197  delta=_float32_unpack(q->delta);
198 
199  for(j=0;j<v->entries;j++){
200  float last=0;
201  for(k=0;k<v->elements;k++){
202  float val=_now(v,j)[k];
203  float now=rint((val-last-mindel)/delta);
204 
205  _now(v,j)[k]=now;
206  if(now<0){
207  /* be paranoid; this should be impossible */
208  fprintf(stderr,"fault; quantized value<0\n");
209  exit(1);
210  }
211 
212  if(now>maxquant){
213  /* be paranoid; this should be impossible */
214  fprintf(stderr,"fault; quantized value>max\n");
215  exit(1);
216  }
217  if(q->sequencep)last=(now*delta)+mindel+last;
218  }
219  }
220 }
221 
222 /* much easier :-). Unlike in the codebook, we don't un-log log
223  scales; we just make sure they're properly offset. */
225  long j,k;
226  float mindel=_float32_unpack(q->min);
227  float delta=_float32_unpack(q->delta);
228 
229  for(j=0;j<v->entries;j++){
230  float last=0.f;
231  for(k=0;k<v->elements;k++){
232  float now=_now(v,j)[k];
233  now=fabs(now)*delta+last+mindel;
234  if(q->sequencep)last=now;
235  _now(v,j)[k]=now;
236  }
237  }
238 }
239 
240 void vqgen_init(vqgen *v,int elements,int aux,int entries,float mindist,
241  float (*metric)(vqgen *,float *, float *),
242  float *(*weight)(vqgen *,float *),int centroid){
243  memset(v,0,sizeof(vqgen));
244 
245  v->centroid=centroid;
246  v->elements=elements;
247  v->aux=aux;
248  v->mindist=mindist;
249  v->allocated=32768;
250  v->pointlist=_ogg_malloc(v->allocated*(v->elements+v->aux)*sizeof(float));
251 
252  v->entries=entries;
253  v->entrylist=_ogg_malloc(v->entries*v->elements*sizeof(float));
254  v->assigned=_ogg_malloc(v->entries*sizeof(long));
255  v->bias=_ogg_calloc(v->entries,sizeof(float));
256  v->max=_ogg_calloc(v->entries,sizeof(float));
257  if(metric)
258  v->metric_func=metric;
259  else
260  v->metric_func=_dist;
261  if(weight)
262  v->weight_func=weight;
263  else
265 
266  v->asciipoints=tmpfile();
267 
268 }
269 
270 void vqgen_addpoint(vqgen *v, float *p,float *a){
271  int k;
272  for(k=0;k<v->elements;k++)
273  fprintf(v->asciipoints,"%.12g\n",p[k]);
274  for(k=0;k<v->aux;k++)
275  fprintf(v->asciipoints,"%.12g\n",a[k]);
276 
277  if(v->points>=v->allocated){
278  v->allocated*=2;
280  sizeof(float));
281  }
282 
283  memcpy(_point(v,v->points),p,sizeof(float)*v->elements);
284  if(v->aux)memcpy(_point(v,v->points)+v->elements,a,sizeof(float)*v->aux);
285 
286  /* quantize to the density mesh if it's selected */
287  if(v->mindist>0.f){
288  /* quantize to the mesh */
289  for(k=0;k<v->elements+v->aux;k++)
290  _point(v,v->points)[k]=
291  rint(_point(v,v->points)[k]/v->mindist)*v->mindist;
292  }
293  v->points++;
294  if(!(v->points&0xff))spinnit("loading... ",v->points);
295 }
296 
297 /* yes, not threadsafe. These utils aren't */
298 static int sortit=0;
299 static int sortsize=0;
300 static int meshcomp(const void *a,const void *b){
301  if(((sortit++)&0xfff)==0)spinnit("sorting mesh...",sortit);
302  return(memcmp(a,b,sortsize));
303 }
304 
306  sortit=0;
307  if(v->mindist>0.f){
308  long i,march=1;
309 
310  /* sort to make uniqueness detection trivial */
311  sortsize=(v->elements+v->aux)*sizeof(float);
312  qsort(v->pointlist,v->points,sortsize,meshcomp);
313 
314  /* now march through and eliminate dupes */
315  for(i=1;i<v->points;i++){
316  if(memcmp(_point(v,i),_point(v,i-1),sortsize)){
317  /* a new, unique entry. march it down */
318  if(i>march)memcpy(_point(v,march),_point(v,i),sortsize);
319  march++;
320  }
321  spinnit("eliminating density... ",v->points-i);
322  }
323 
324  /* we're done */
325  fprintf(stderr,"\r%ld training points remining out of %ld"
326  " after density mesh (%ld%%)\n",march,v->points,march*100/v->points);
327  v->points=march;
328 
329  }
330  v->sorted=1;
331 }
332 
333 float vqgen_iterate(vqgen *v,int biasp){
334  long i,j,k;
335 
336  float fdesired;
337  long desired;
338  long desired2;
339 
340  float asserror=0.f;
341  float meterror=0.f;
342  float *new;
343  float *new2;
344  long *nearcount;
345  float *nearbias;
346  #ifdef NOISY
347  char buff[80];
348  FILE *assig;
349  FILE *bias;
350  FILE *cells;
351  sprintf(buff,"cells%d.m",v->it);
352  cells=fopen(buff,"w");
353  sprintf(buff,"assig%d.m",v->it);
354  assig=fopen(buff,"w");
355  sprintf(buff,"bias%d.m",v->it);
356  bias=fopen(buff,"w");
357  #endif
358 
359 
360  if(v->entries<2){
361  fprintf(stderr,"generation requires at least two entries\n");
362  exit(1);
363  }
364 
365  if(!v->sorted)vqgen_sortmesh(v);
366  if(!v->seeded)_vqgen_seed(v);
367 
368  fdesired=(float)v->points/v->entries;
369  desired=fdesired;
370  desired2=desired*2;
371  new=_ogg_malloc(sizeof(float)*v->entries*v->elements);
372  new2=_ogg_malloc(sizeof(float)*v->entries*v->elements);
373  nearcount=_ogg_malloc(v->entries*sizeof(long));
374  nearbias=_ogg_malloc(v->entries*desired2*sizeof(float));
375 
376  /* fill in nearest points for entry biasing */
377  /*memset(v->bias,0,sizeof(float)*v->entries);*/
378  memset(nearcount,0,sizeof(long)*v->entries);
379  memset(v->assigned,0,sizeof(long)*v->entries);
380  if(biasp){
381  for(i=0;i<v->points;i++){
382  float *ppt=v->weight_func(v,_point(v,i));
383  float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
384  float secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
385  long firstentry=0;
386  long secondentry=1;
387 
388  if(!(i&0xff))spinnit("biasing... ",v->points+v->points+v->entries-i);
389 
390  if(firstmetric>secondmetric){
391  float temp=firstmetric;
392  firstmetric=secondmetric;
393  secondmetric=temp;
394  firstentry=1;
395  secondentry=0;
396  }
397 
398  for(j=2;j<v->entries;j++){
399  float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
400  if(thismetric<secondmetric){
401  if(thismetric<firstmetric){
402  secondmetric=firstmetric;
403  secondentry=firstentry;
404  firstmetric=thismetric;
405  firstentry=j;
406  }else{
407  secondmetric=thismetric;
408  secondentry=j;
409  }
410  }
411  }
412 
413  j=firstentry;
414  for(j=0;j<v->entries;j++){
415 
416  float thismetric,localmetric;
417  float *nearbiasptr=nearbias+desired2*j;
418  long k=nearcount[j];
419 
420  localmetric=v->metric_func(v,_now(v,j),ppt);
421  /* 'thismetric' is to be the bias value necessary in the current
422  arrangement for entry j to capture point i */
423  if(firstentry==j){
424  /* use the secondary entry as the threshhold */
425  thismetric=secondmetric-localmetric;
426  }else{
427  /* use the primary entry as the threshhold */
428  thismetric=firstmetric-localmetric;
429  }
430 
431  /* support the idea of 'minimum distance'... if we want the
432  cells in a codebook to be roughly some minimum size (as with
433  the low resolution residue books) */
434 
435  /* a cute two-stage delayed sorting hack */
436  if(k<desired){
437  nearbiasptr[k]=thismetric;
438  k++;
439  if(k==desired){
440  spinnit("biasing... ",v->points+v->points+v->entries-i);
441  qsort(nearbiasptr,desired,sizeof(float),directdsort);
442  }
443 
444  }else if(thismetric>nearbiasptr[desired-1]){
445  nearbiasptr[k]=thismetric;
446  k++;
447  if(k==desired2){
448  spinnit("biasing... ",v->points+v->points+v->entries-i);
449  qsort(nearbiasptr,desired2,sizeof(float),directdsort);
450  k=desired;
451  }
452  }
453  nearcount[j]=k;
454  }
455  }
456 
457  /* inflate/deflate */
458 
459  for(i=0;i<v->entries;i++){
460  float *nearbiasptr=nearbias+desired2*i;
461 
462  spinnit("biasing... ",v->points+v->entries-i);
463 
464  /* due to the delayed sorting, we likely need to finish it off....*/
465  if(nearcount[i]>desired)
466  qsort(nearbiasptr,nearcount[i],sizeof(float),directdsort);
467 
468  v->bias[i]=nearbiasptr[desired-1];
469 
470  }
471  }else{
472  memset(v->bias,0,v->entries*sizeof(float));
473  }
474 
475  /* Now assign with new bias and find new midpoints */
476  for(i=0;i<v->points;i++){
477  float *ppt=v->weight_func(v,_point(v,i));
478  float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
479  long firstentry=0;
480 
481  if(!(i&0xff))spinnit("centering... ",v->points-i);
482 
483  for(j=0;j<v->entries;j++){
484  float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
485  if(thismetric<firstmetric){
486  firstmetric=thismetric;
487  firstentry=j;
488  }
489  }
490 
491  j=firstentry;
492 
493 #ifdef NOISY
494  fprintf(cells,"%g %g\n%g %g\n\n",
495  _now(v,j)[0],_now(v,j)[1],
496  ppt[0],ppt[1]);
497 #endif
498 
499  firstmetric-=v->bias[j];
500  meterror+=firstmetric;
501 
502  if(v->centroid==0){
503  /* set up midpoints for next iter */
504  if(v->assigned[j]++){
505  for(k=0;k<v->elements;k++)
506  vN(new,j)[k]+=ppt[k];
507  if(firstmetric>v->max[j])v->max[j]=firstmetric;
508  }else{
509  for(k=0;k<v->elements;k++)
510  vN(new,j)[k]=ppt[k];
511  v->max[j]=firstmetric;
512  }
513  }else{
514  /* centroid */
515  if(v->assigned[j]++){
516  for(k=0;k<v->elements;k++){
517  if(vN(new,j)[k]>ppt[k])vN(new,j)[k]=ppt[k];
518  if(vN(new2,j)[k]<ppt[k])vN(new2,j)[k]=ppt[k];
519  }
520  if(firstmetric>v->max[firstentry])v->max[j]=firstmetric;
521  }else{
522  for(k=0;k<v->elements;k++){
523  vN(new,j)[k]=ppt[k];
524  vN(new2,j)[k]=ppt[k];
525  }
526  v->max[firstentry]=firstmetric;
527  }
528  }
529  }
530 
531  /* assign midpoints */
532 
533  for(j=0;j<v->entries;j++){
534 #ifdef NOISY
535  fprintf(assig,"%ld\n",v->assigned[j]);
536  fprintf(bias,"%g\n",v->bias[j]);
537 #endif
538  asserror+=fabs(v->assigned[j]-fdesired);
539  if(v->assigned[j]){
540  if(v->centroid==0){
541  for(k=0;k<v->elements;k++)
542  _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
543  }else{
544  for(k=0;k<v->elements;k++)
545  _now(v,j)[k]=(vN(new,j)[k]+vN(new2,j)[k])/2.f;
546  }
547  }
548  }
549 
550  asserror/=(v->entries*fdesired);
551 
552  fprintf(stderr,"Pass #%d... ",v->it);
553  fprintf(stderr,": dist %g(%g) metric error=%g \n",
554  asserror,fdesired,meterror/v->points);
555  v->it++;
556 
557  free(new);
558  free(nearcount);
559  free(nearbias);
560 #ifdef NOISY
561  fclose(assig);
562  fclose(bias);
563  fclose(cells);
564 #endif
565  return(asserror);
566 }
567 
GLuint const GLfloat * val
Definition: glew.h:2715
void _vqgen_seed(vqgen *v)
Definition: vqgen.c:76
static int sortsize
Definition: vqgen.c:299
float mindist
Definition: vqgen.h:29
GLclampf f
Definition: glew.h:3390
long min
Definition: vqgen.h:51
static float * _now(codebook *c, int i)
Definition: metrics.c:52
int32_t k
Definition: e_log.c:102
SDL_EventEntry * free
Definition: SDL_events.c:80
void vqgen_unquantize(vqgen *v, quant_meta *q)
Definition: vqgen.c:224
int32_t j
Definition: e_log.c:102
#define memset
Definition: SDL_malloc.c:633
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:8736
GLuint GLuint GLfloat weight
Definition: glew.h:12401
int centroid
Definition: vqgen.h:30
long points
Definition: vqgen.h:34
float _float32_unpack(long val)
Definition: sharedbook.c:62
long allocated
Definition: vqgen.h:35
const GLdouble * v
Definition: glew.h:1377
long * assigned
Definition: vqgen.h:39
int seeded
Definition: vqgen.h:22
void vqgen_quantize(vqgen *v, quant_meta *q)
Definition: vqgen.c:165
float _dist(vqgen *v, float *a, float *b)
Definition: vqgen.c:60
new_palette entries
Definition: pngrutil.c:1486
static int sortit
Definition: vqgen.c:298
GLint GLsizei count
Definition: gl2ext.h:1011
GLfloat GLfloat p
Definition: glew.h:14938
float * bias
Definition: vqgen.h:40
void spinnit(char *s, int n)
Definition: bookutil.c:306
long _float32_pack(float val)
Definition: sharedbook.c:47
void vqgen_init(vqgen *v, int elements, int aux, int entries, float mindist, float(*metric)(vqgen *, float *, float *), float *(*weight)(vqgen *, float *), int centroid)
Definition: vqgen.c:240
#define _ogg_realloc
Definition: os_types.h:24
float *(* weight_func)(struct vqgen *v, float *point)
Definition: vqgen.h:45
float * pointlist
Definition: vqgen.h:33
float(* metric_func)(struct vqgen *v, float *entry, float *point)
Definition: vqgen.h:44
float vqgen_iterate(vqgen *v, int biasp)
Definition: vqgen.c:333
float * max
Definition: vqgen.h:42
int it
Definition: vqgen.h:25
void vqgen_addpoint(vqgen *v, float *p, float *a)
Definition: vqgen.c:270
#define memcpy
Definition: SDL_malloc.c:634
GLdouble GLdouble GLdouble GLdouble q
Definition: glew.h:1400
#define _ogg_calloc
Definition: os_types.h:23
#define vN(data, i)
Definition: vqgen.c:57
int aux
Definition: vqgen.h:28
Definition: vqgen.h:21
float * entrylist
Definition: vqgen.h:38
int directdsort(const void *a, const void *b)
Definition: vqgen.c:83
int sorted
Definition: vqgen.h:23
void vqgen_cellmetric(vqgen *v)
Definition: vqgen.c:89
GLdouble GLdouble GLdouble b
Definition: glew.h:8383
FILE * asciipoints
Definition: vqgen.h:47
static float * _point(vqgen *v, long ptr)
Definition: vqgen.h:57
long delta
Definition: vqgen.h:52
#define min(x, y)
Definition: os.h:75
int quant
Definition: vqgen.h:53
int i
Definition: pngrutil.c:1377
long entries
Definition: vqgen.h:41
static int meshcomp(const void *a, const void *b)
Definition: vqgen.c:300
#define max(x, y)
Definition: os.h:79
int elements
Definition: vqgen.h:26
void vqgen_sortmesh(vqgen *v)
Definition: vqgen.c:305
double fabs(double x)
Definition: s_fabs.c:29
float * _weight_null(vqgen *v, float *a)
Definition: vqgen.c:71
GLfloat bias
Definition: glew.h:9533
#define _ogg_malloc
Definition: os_types.h:22
int sequencep
Definition: vqgen.h:54
void qsort(void *base, size_t nmemb, size_t size, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:460