spandsp
0.0.6
fast_convert.h
1
/*
2
* SpanDSP - a series of DSP components for telephony
3
*
4
* fast_convert.h - Quick ways to convert floating point numbers to integers
5
*
6
* Written by Steve Underwood <steveu@coppice.org>
7
*
8
* Copyright (C) 2009 Steve Underwood
9
*
10
* All rights reserved.
11
*
12
* This program is free software; you can redistribute it and/or modify
13
* it under the terms of the GNU Lesser General Public License version 2.1,
14
* as published by the Free Software Foundation.
15
*
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU Lesser General Public License for more details.
20
*
21
* You should have received a copy of the GNU Lesser General Public
22
* License along with this program; if not, write to the Free Software
23
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24
*/
25
26
#if !defined(_SPANDSP_FAST_CONVERT_H_)
27
#define _SPANDSP_FAST_CONVERT_H_
28
29
#if defined(__cplusplus)
30
extern
"C"
31
{
32
#endif
33
34
/* The following code, to handle issues with lrint() and lrintf() on various
35
* platforms, is adapted from similar code in libsndfile, which is:
36
*
37
* Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
38
*
39
* This program is free software; you can redistribute it and/or modify
40
* it under the terms of the GNU Lesser General Public License as published by
41
* the Free Software Foundation; either version 2.1 of the License, or
42
* (at your option) any later version.
43
*
44
* This program is distributed in the hope that it will be useful,
45
* but WITHOUT ANY WARRANTY; without even the implied warranty of
46
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47
* GNU Lesser General Public License for more details.
48
*/
49
50
/*
51
* On Intel Pentium processors (especially PIII and probably P4), converting
52
* from float to int is very slow. To meet the C specs, the code produced by
53
* most C compilers targeting Pentium needs to change the FPU rounding mode
54
* before the float to int conversion is performed.
55
*
56
* Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
57
* is this flushing of the pipeline which is so slow.
58
*
59
* Fortunately the ISO C99 specification defines the functions lrint, lrintf,
60
* llrint and llrintf which fix this problem as a side effect.
61
*
62
* On Unix-like systems, the configure process should have detected the
63
* presence of these functions. If they weren't found we have to replace them
64
* here with a standard C cast.
65
*/
66
67
/*
68
* The C99 prototypes for these functions are as follows:
69
*
70
* int rintf(float x);
71
* int rint(double x);
72
* long int lrintf(float x);
73
* long int lrint(double x);
74
* long long int llrintf(float x);
75
* long long int llrint(double x);
76
*
77
* The presence of the required functions are detected during the configure
78
* process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
79
* the config file.
80
*/
81
82
#if defined(__CYGWIN__)
83
#if !defined(__cplusplus) && (__GNUC__ < 4)
84
/*
85
* CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but
86
* they are slow and buggy:
87
* http://sourceware.org/ml/cygwin/2005-06/msg00153.html
88
* http://sourceware.org/ml/cygwin/2005-09/msg00047.html
89
* These replacement functions (pulled from the Public Domain MinGW
90
* math.h header) replace the native versions.
91
*/
92
static
__inline__
long
int
lrint(
double
x)
93
{
94
long
int
retval;
95
96
__asm__ __volatile__
97
(
98
"fistpl %0"
99
:
"=m"
(retval)
100
:
"t"
(x)
101
:
"st"
102
);
103
104
return
retval;
105
}
106
107
static
__inline__
long
int
lrintf(
float
x)
108
{
109
long
int
retval;
110
111
__asm__ __volatile__
112
(
113
"fistpl %0"
114
:
"=m"
(retval)
115
:
"t"
(x)
116
:
"st"
117
);
118
return
retval;
119
}
120
#endif
121
122
/* The fastest way to convert is the equivalent of lrint() */
123
static
__inline__
long
int
lfastrint(
double
x)
124
{
125
long
int
retval;
126
127
__asm__ __volatile__
128
(
129
"fistpl %0"
130
:
"=m"
(retval)
131
:
"t"
(x)
132
:
"st"
133
);
134
135
return
retval;
136
}
137
138
static
__inline__
long
int
lfastrintf(
float
x)
139
{
140
long
int
retval;
141
142
__asm__ __volatile__
143
(
144
"fistpl %0"
145
:
"=m"
(retval)
146
:
"t"
(x)
147
:
"st"
148
);
149
return
retval;
150
}
151
#elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590)
152
153
#if defined(__i386__)
154
/* These routines are guaranteed fast on an i386 machine. Using the built in
155
lrint() and lrintf() should be similar, but they may not always be enabled.
156
Sometimes, especially with "-O0", you might get slow calls to routines. */
157
static
__inline__
long
int
lfastrint(
double
x)
158
{
159
long
int
retval;
160
161
__asm__ __volatile__
162
(
163
"fistpl %0"
164
:
"=m"
(retval)
165
:
"t"
(x)
166
:
"st"
167
);
168
169
return
retval;
170
}
171
172
static
__inline__
long
int
lfastrintf(
float
x)
173
{
174
long
int
retval;
175
176
__asm__ __volatile__
177
(
178
"fistpl %0"
179
:
"=m"
(retval)
180
:
"t"
(x)
181
:
"st"
182
);
183
return
retval;
184
}
185
#elif defined(__x86_64__)
186
/* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
187
double or float to an int. It looks like the design on the x86_64 took account
188
of the default behaviour specified for C. */
189
static
__inline__
long
int
lfastrint(
double
x)
190
{
191
return
(
long
int
) (x);
192
}
193
194
static
__inline__
long
int
lfastrintf(
float
x)
195
{
196
return
(
long
int
) (x);
197
}
198
#elif defined(__ppc__) || defined(__powerpc__)
199
static
__inline__
long
int
lfastrint(
register
double
x)
200
{
201
int
res[2];
202
203
__asm__ __volatile__
204
(
205
"fctiw %1, %1\n\t"
206
"stfd %1, %0"
207
:
"=m"
(res)
/* Output */
208
:
"f"
(x)
/* Input */
209
:
"memory"
210
);
211
212
return
res[1];
213
}
214
215
static
__inline__
long
int
lfastrintf(
register
float
x)
216
{
217
int
res[2];
218
219
__asm__ __volatile__
220
(
221
"fctiw %1, %1\n\t"
222
"stfd %1, %0"
223
:
"=m"
(res)
/* Output */
224
:
"f"
(x)
/* Input */
225
:
"memory"
226
);
227
228
return
res[1];
229
}
230
#else
231
/* Fallback routines, for unrecognised platforms */
232
static
__inline__
long
int
lfastrint(
double
x)
233
{
234
return
(
long
int
) x;
235
}
236
237
static
__inline__
long
int
lfastrintf(
float
x)
238
{
239
return
(
long
int
) x;
240
}
241
#endif
242
243
#elif defined(_M_IX86)
244
/* Visual Studio i386 */
245
/*
246
* Win32 doesn't seem to have the lrint() and lrintf() functions.
247
* Therefore implement inline versions of these functions here.
248
*/
249
250
__inline
long
int
lrint(
double
x)
251
{
252
long
int
i;
253
254
_asm
255
{
256
fld x
257
fistp i
258
};
259
return
i;
260
}
261
262
__inline
long
int
lrintf(
float
x)
263
{
264
long
int
i;
265
266
_asm
267
{
268
fld x
269
fistp i
270
};
271
return
i;
272
}
273
274
__inline
float
rintf(
float
flt)
275
{
276
_asm
277
{ fld flt
278
frndint
279
}
280
}
281
282
__inline
double
rint(
double
dbl)
283
{
284
_asm
285
{
286
fld dbl
287
frndint
288
}
289
}
290
291
__inline
long
int
lfastrint(
double
x)
292
{
293
long
int
i;
294
295
_asm
296
{
297
fld x
298
fistp i
299
};
300
return
i;
301
}
302
303
__inline
long
int
lfastrintf(
float
x)
304
{
305
long
int
i;
306
307
_asm
308
{
309
fld x
310
fistp i
311
};
312
return
i;
313
}
314
#elif defined(_M_X64)
315
/* Visual Studio x86_64 */
316
/* x86_64 machines will do best with a simple assignment. */
317
#include <intrin.h>
318
319
__inline
long
int
lrint(
double
x)
320
{
321
return
(
long
int
)_mm_cvtsd_si64x( _mm_loadu_pd ((
const
double
*)&x) );
322
}
323
324
__inline
long
int
lrintf(
float
x)
325
{
326
return
_mm_cvt_ss2si( _mm_load_ss((
const
float
*)&x) );
327
}
328
329
__inline
long
int
lfastrint(
double
x)
330
{
331
return
(
long
int
) (x);
332
}
333
334
__inline
long
int
lfastrintf(
float
x)
335
{
336
return
(
long
int
) (x);
337
}
338
#elif defined(__MWERKS__) && defined(macintosh)
339
/* This MacOS 9 solution was provided by Stephane Letz */
340
341
long
int
__inline__ lfastrint(
register
double
x)
342
{
343
long
int
res[2];
344
345
asm
346
{
347
fctiw x, x
348
stfd x, res
349
}
350
return
res[1];
351
}
352
353
long
int
__inline__ lfastrintf(
register
float
x)
354
{
355
long
int
res[2];
356
357
asm
358
{
359
fctiw x, x
360
stfd x, res
361
}
362
return
res[1];
363
}
364
#elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
365
/* For Apple Mac OS/X - do recent versions still need this? */
366
367
static
__inline__
long
int
lfastrint(
register
double
x)
368
{
369
int
res[2];
370
371
__asm__ __volatile__
372
(
373
"fctiw %1, %1\n\t"
374
"stfd %1, %0"
375
:
"=m"
(res)
/* Output */
376
:
"f"
(x)
/* Input */
377
:
"memory"
378
);
379
380
return
res[1];
381
}
382
383
static
__inline__
long
int
lfastrintf(
register
float
x)
384
{
385
int
res[2];
386
387
__asm__ __volatile__
388
(
389
"fctiw %1, %1\n\t"
390
"stfd %1, %0"
391
:
"=m"
(res)
/* Output */
392
:
"f"
(x)
/* Input */
393
:
"memory"
394
);
395
396
return
res[1];
397
}
398
#else
399
/* There is nothing else to do, but use a simple casting operation, instead of a real
400
rint() type function. Since we are only trying to use rint() to speed up conversions,
401
the accuracy issues related to changing the rounding scheme are of little concern
402
to us. */
403
404
#if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun)
405
#warning "No usable lrint() and lrintf() functions available."
406
#warning "Replacing these functions with a simple C cast."
407
#endif
408
409
static
__inline__
long
int
lrint(
double
x)
410
{
411
return
(
long
int
) (x);
412
}
413
414
static
__inline__
long
int
lrintf(
float
x)
415
{
416
return
(
long
int
) (x);
417
}
418
419
static
__inline__
long
int
lfastrint(
double
x)
420
{
421
return
(
long
int
) (x);
422
}
423
424
static
__inline__
long
int
lfastrintf(
float
x)
425
{
426
return
(
long
int
) (x);
427
}
428
#endif
429
430
#if defined(__cplusplus)
431
}
432
#endif
433
434
#endif
435
436
/*- End of file ------------------------------------------------------------*/
src
spandsp
fast_convert.h
Generated by
1.8.14