# 最快的方法来钳制一个真正的（固定/浮点）值？

``double clampedA; double a = calculate(); clampedA = a > MY_MAX ? MY_MAX : a; clampedA = a < MY_MIN ? MY_MIN : a;` `

` `double a = calculate(); double clampedA = a; if(clampedA > MY_MAX) clampedA = MY_MAX; else if(clampedA < MY_MIN) clampedA = MY_MIN;` `

` ` double FMIN = 3.13; double FMAX = 300.44; double FVAL[10] = {-100, 0.23, 1.24, 3.00, 3.5, 30.5, 50 ,100.22 ,200.22, 30000}; uint64 Lfmin = *(uint64 *)&FMIN; uint64 Lfmax = *(uint64 *)&FMAX; DWORD start = GetTickCount(); for (int j=0; j<10000000; ++j) { uint64 * pfvalue = (uint64 *)&FVAL[0]; for (int i=0; i<10; ++i) *pfvalue++ = (*pfvalue < Lfmin) ? Lfmin : (*pfvalue > Lfmax) ? Lfmax : *pfvalue; } volatile DWORD hacktime = GetTickCount() - start; for (int j=0; j<10000000; ++j) { double * pfvalue = &FVAL[0]; for (int i=0; i<10; ++i) *pfvalue++ = (*pfvalue < FMIN) ? FMIN : (*pfvalue > FMAX) ? FMAX : *pfvalue; } volatile DWORD normaltime = GetTickCount() - (start + hacktime);` `

` `#include <mmintrin.h> float minss ( float a, float b ) { // Branchless SSE min. _mm_store_ss( &a, _mm_min_ss(_mm_set_ss(a),_mm_set_ss(b)) ); return a; } float maxss ( float a, float b ) { // Branchless SSE max. _mm_store_ss( &a, _mm_max_ss(_mm_set_ss(a),_mm_set_ss(b)) ); return a; } float clamp ( float val, float minval, float maxval ) { // Branchless SSE clamp. // return minss( maxss(val,minval), maxval ); _mm_store_ss( &val, _mm_min_ss( _mm_max_ss(_mm_set_ss(val),_mm_set_ss(minval)), _mm_set_ss(maxval) ) ); return val; }` `

GCC和clang都为下面简单，直接，便携的代码生成漂亮的程序集：

` `double clamp(double d, double min, double max) { const double t = d < min ? min : d; return t > max ? max : t; }` `

`> gcc -O3 -march=native -Wall -Wextra -Wc++-compat -S -fverbose-asm clamp_ternary_operator.c`

GCC生成的程序集：

` `maxsd %xmm0, %xmm1 # d, min movapd %xmm2, %xmm0 # max, max minsd %xmm1, %xmm0 # min, max ret` `

`> clang -O3 -march=native -Wall -Wextra -Wc++-compat -S -fverbose-asm clamp_ternary_operator.c`

` `maxsd %xmm0, %xmm1 minsd %xmm1, %xmm2 movaps %xmm2, %xmm0 ret` `

` `min(a,b) = (a + b - abs(ab)) / 2 max(a,b) = (a + b + abs(ab)) / 2` `

` `max(a,0) = (a + abs(a)) / 2` `

` `double clamp(double value) { double temp = value + FMAX - abs(value-FMAX); #if FMIN == 0 return (temp + abs(temp)) * 0.25; #else return (temp + (2.0*FMIN) + abs(temp-(2.0*FMIN))) * 0.25; #endif }` `

` `double fsel( double a, double b, double c ) { return a >= 0 ? b : c; }` `

` `inline double clamp ( double a, double min, double max ) { a = fsel( a - min , a, min ); return fsel( a - max, max, a ); }` `

IEEE 754浮点的位的排列方式是，如果比较解释为整数的位，则会得到与直接将其作为浮点数进行比较的结果相同的结果。 所以，如果你发现或者知道一种钳制整数的方法，你也可以用它来用于（IEEE 754）浮点数。 对不起，我不知道更快的方法。

` `clampedA = fmin(fmax(a,MY_MIN),MY_MAX);` `

` `a = (a > MAX) ? MAX : ((a < MIN) ? MIN : a);` `

` `if (abs(a - (MAX+MIN)/2) > ((MAX-MIN)/2)) ...` `

` `typedef int64_t i_t; typedef double f_t; static inline i_t i_tmin(i_t x, i_t y) { return (y + ((x - y) & -(x < y))); // min(x, y) } static inline i_t i_tmax(i_t x, i_t y) { return (x - ((x - y) & -(x < y))); // max(x, y) } f_t clip_f_t(f_t f, f_t fmin, f_t fmax) { #ifndef TERNARY assert(sizeof(i_t) == sizeof(f_t)); //assert(not (fmin < 0 and (f < 0 or is_negative_zero(f)))); //XXX assume IEEE-754 compliant system (lexicographically ordered floats) //XXX break strict-aliasing rules const i_t imin = *(i_t*)&fmin; const i_t imax = *(i_t*)&fmax; const i_t i = *(i_t*)&f; const i_t iclipped = i_tmin(imax, i_tmax(i, imin)); #ifndef INT_TERNARY return *(f_t *)&iclipped; #else /* INT_TERNARY */ return i < imin ? fmin : (i > imax ? fmax : f); #endif /* INT_TERNARY */ #else /* TERNARY */ return fmin > f ? fmin : (fmax < f ? fmax : f); #endif /* TERNARY */ }` `

IEEE浮点数和双精度格式的devise使数字是“字典顺序排列”，用IEEE架构师威廉·卡汉（William Kahan）的话来说，意思是“如果相同格式的两个浮点数是有序的（比如x <y），那么当它们的位被重新解释为符号量级整数时，它们以相同的方式sorting。

testing程序：

` `/** gcc -std=c99 -fno-strict-aliasing -O2 -lm -Wall *.c -o clip_double && clip_double */ #include <assert.h> #include <iso646.h> // not, and #include <math.h> // isnan() #include <stdbool.h> // bool #include <stdint.h> // int64_t #include <stdio.h> static bool is_negative_zero(f_t x) { return x == 0 and 1/x < 0; } static inline f_t range(f_t low, f_t f, f_t hi) { return fmax(low, fmin(f, hi)); } static const f_t END = 0./0.; #define TOSTR(f, fmin, fmax, ff) ((f) == (fmin) ? "min" : \ ((f) == (fmax) ? "max" : \ (is_negative_zero(ff) ? "-0.": \ ((f) == (ff) ? "f" : #f)))) static int test(f_t p[], f_t fmin, f_t fmax, f_t (*fun)(f_t, f_t, f_t)) { assert(isnan(END)); int failed_count = 0; for ( ; ; ++p) { const f_t clipped = fun(*p, fmin, fmax), expected = range(fmin, *p, fmax); if(clipped != expected and not (isnan(clipped) and isnan(expected))) { failed_count++; fprintf(stderr, "error: got: %s, expected: %s\t(min=%g, max=%g, f=%g)\n", TOSTR(clipped, fmin, fmax, *p), TOSTR(expected, fmin, fmax, *p), fmin, fmax, *p); } if (isnan(*p)) break; } return failed_count; } int main(void) { int failed_count = 0; f_t arr[] = { -0., -1./0., 0., 1./0., 1., -1., 2, 2.1, -2.1, -0.1, END}; f_t minmax[][2] = { -1, 1, // min, max 0, 2, }; for (int i = 0; i < (sizeof(minmax) / sizeof(*minmax)); ++i) failed_count += test(arr, minmax[i][0], minmax[i][1], clip_f_t); return failed_count & 0xFF; }` `

` `\$ gcc -std=c99 -fno-strict-aliasing -O2 -lm *.c -o clip_double && ./clip_double` `

` `error: got: min, expected: -0. (min=-1, max=1, f=0) error: got: f, expected: min (min=-1, max=1, f=-1.#INF) error: got: f, expected: min (min=-1, max=1, f=-2.1) error: got: min, expected: f (min=-1, max=1, f=-0.1)` `

` `clampedA = (a > MY_MAX)? MY_MAX : (a < MY_MIN)? MY_MIN : a;` `

` `int b = (int)a; clampedA = (b > MY_MAX)? (double)MY_MAX : (b < MY_MIN)? (double)MY_MIN : a;` `

` `clamped = 0.5*(fabs(x)-fabs(x-1.0f) + 1.0f);` `

（我简化了一下代码）。 我们可以把它看作两个值，一个反映为> 0

` `fabs(x)` `

` `1.0-fabs(x-1.0)` `

` `template <typename T> inline T clamp(T val, T lo, T hi) { return std::max(lo, std::min(hi, val)); }` `