定点math最好的方法是什么？

我需要加速一个没有FPU的任天堂DS的程序，所以我需要将浮点math（模拟和缓慢）改为定点。

我是如何开始的，我将浮点数转换为整数，当我需要转换它们时，我使用x >> 8将定点variablesx转换为实际数值，将x << 8转换为定点。很快我发现不可能跟踪需要转换的东西，而且我也意识到要改变数字的精度是很困难的（在这种情况下是8）。

我的问题是，我应该如何让这个更容易，更快？我应该做一个FixedPoint类，或者只是一个FixedPoint8 typedef或结构与一些函数/macros来转换它们，或者别的什么？我应该把什么东西放在variables名称来显示它的定点？

你可以试试我的定点课程（最新版本@ https://github.com/eteran/cpp-utilities ）

// From: https://github.com/eteran/cpp-utilities/edit/master/Fixed.h // See also: http://stackoverflow.com/questions/79677/whats-the-best-way-to-do-fixed-point-math /* * The MIT License (MIT) * * Copyright (c) 2015 Evan Teran * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef FIXED_H_ #define FIXED_H_ #include <ostream> #include <exception> #include <cstddef> // for size_t #include <cstdint> #include <type_traits> #include <boost/operators.hpp> namespace numeric { template <size_t I, size_t F> class Fixed; namespace detail { // helper templates to make magic with types :) // these allow us to determine resonable types from // a desired size, they also let us infer the next largest type // from a type which is nice for the division op template <size_t T> struct type_from_size { static const bool is_specialized = false; typedef void value_type; }; #if defined(__GNUC__) && defined(__x86_64__) template <> struct type_from_size<128> { static const bool is_specialized = true; static const size_t size = 128; typedef __int128 value_type; typedef unsigned __int128 unsigned_type; typedef __int128 signed_type; typedef type_from_size<256> next_size; }; #endif template <> struct type_from_size<64> { static const bool is_specialized = true; static const size_t size = 64; typedef int64_t value_type; typedef uint64_t unsigned_type; typedef int64_t signed_type; typedef type_from_size<128> next_size; }; template <> struct type_from_size<32> { static const bool is_specialized = true; static const size_t size = 32; typedef int32_t value_type; typedef uint32_t unsigned_type; typedef int32_t signed_type; typedef type_from_size<64> next_size; }; template <> struct type_from_size<16> { static const bool is_specialized = true; static const size_t size = 16; typedef int16_t value_type; typedef uint16_t unsigned_type; typedef int16_t signed_type; typedef type_from_size<32> next_size; }; template <> struct type_from_size<8> { static const bool is_specialized = true; static const size_t size = 8; typedef int8_t value_type; typedef uint8_t unsigned_type; typedef int8_t signed_type; typedef type_from_size<16> next_size; }; // this is to assist in adding support for non-native base // types (for adding big-int support), this should be fine // unless your bit-int class doesn't nicely support casting template <class B, class N> B next_to_base(const N& rhs) { return static_cast<B>(rhs); } struct divide_by_zero : std::exception { }; template <size_t I, size_t F> Fixed<I,F> divide(const Fixed<I,F> &numerator, const Fixed<I,F> &denominator, Fixed<I,F> &remainder, typename std::enable_if<type_from_size<I+F>::next_size::is_specialized>::type* = 0) { typedef typename Fixed<I,F>::next_type next_type; typedef typename Fixed<I,F>::base_type base_type; static const size_t fractional_bits = Fixed<I,F>::fractional_bits; next_type t(numerator.to_raw()); t <<= fractional_bits; Fixed<I,F> quotient; quotient = Fixed<I,F>::from_base(next_to_base<base_type>(t / denominator.to_raw())); remainder = Fixed<I,F>::from_base(next_to_base<base_type>(t % denominator.to_raw())); return quotient; } template <size_t I, size_t F> Fixed<I,F> divide(Fixed<I,F> numerator, Fixed<I,F> denominator, Fixed<I,F> &remainder, typename std::enable_if<!type_from_size<I+F>::next_size::is_specialized>::type* = 0) { // NOTE(eteran): division is broken for large types :-( // especially when dealing with negative quantities typedef typename Fixed<I,F>::base_type base_type; typedef typename Fixed<I,F>::unsigned_type unsigned_type; static const int bits = Fixed<I,F>::total_bits; if(denominator == 0) { throw divide_by_zero(); } else { int sign = 0; Fixed<I,F> quotient; if(numerator < 0) { sign ^= 1; numerator = -numerator; } if(denominator < 0) { sign ^= 1; denominator = -denominator; } base_type n = numerator.to_raw(); base_type d = denominator.to_raw(); base_type x = 1; base_type answer = 0; // egyptian division algorithm while((n >= d) && (((d >> (bits - 1)) & 1) == 0)) { x <<= 1; d <<= 1; } while(x != 0) { if(n >= d) { n -= d; answer += x; } x >>= 1; d >>= 1; } unsigned_type l1 = n; unsigned_type l2 = denominator.to_raw(); // calculate the lower bits (needs to be unsigned) // unfortunately for many fractions this overflows the type still :-/ const unsigned_type lo = (static_cast<unsigned_type>(n) << F) / denominator.to_raw(); quotient = Fixed<I,F>::from_base((answer << F) | lo); remainder = n; if(sign) { quotient = -quotient; } return quotient; } } // this is the usual implementation of multiplication template <size_t I, size_t F> void multiply(const Fixed<I,F> &lhs, const Fixed<I,F> &rhs, Fixed<I,F> &result, typename std::enable_if<type_from_size<I+F>::next_size::is_specialized>::type* = 0) { typedef typename Fixed<I,F>::next_type next_type; typedef typename Fixed<I,F>::base_type base_type; static const size_t fractional_bits = Fixed<I,F>::fractional_bits; next_type t(static_cast<next_type>(lhs.to_raw()) * static_cast<next_type>(rhs.to_raw())); t >>= fractional_bits; result = Fixed<I,F>::from_base(next_to_base<base_type>(t)); } // this is the fall back version we use when we don't have a next size // it is slightly slower, but is more robust since it doesn't // require and upgraded type template <size_t I, size_t F> void multiply(const Fixed<I,F> &lhs, const Fixed<I,F> &rhs, Fixed<I,F> &result, typename std::enable_if<!type_from_size<I+F>::next_size::is_specialized>::type* = 0) { typedef typename Fixed<I,F>::base_type base_type; static const size_t fractional_bits = Fixed<I,F>::fractional_bits; static const size_t integer_mask = Fixed<I,F>::integer_mask; static const size_t fractional_mask = Fixed<I,F>::fractional_mask; // more costly but doesn't need a larger type const base_type a_hi = (lhs.to_raw() & integer_mask) >> fractional_bits; const base_type b_hi = (rhs.to_raw() & integer_mask) >> fractional_bits; const base_type a_lo = (lhs.to_raw() & fractional_mask); const base_type b_lo = (rhs.to_raw() & fractional_mask); const base_type x1 = a_hi * b_hi; const base_type x2 = a_hi * b_lo; const base_type x3 = a_lo * b_hi; const base_type x4 = a_lo * b_lo; result = Fixed<I,F>::from_base((x1 << fractional_bits) + (x3 + x2) + (x4 >> fractional_bits)); } } /* * inheriting from boost::operators enables us to be a drop in replacement for base types * without having to specify all the different versions of operators manually */ template <size_t I, size_t F> class Fixed : boost::operators<Fixed<I,F>> { static_assert(detail::type_from_size<I + F>::is_specialized, "invalid combination of sizes"); public: static const size_t fractional_bits = F; static const size_t integer_bits = I; static const size_t total_bits = I + F; typedef detail::type_from_size<total_bits> base_type_info; typedef typename base_type_info::value_type base_type; typedef typename base_type_info::next_size::value_type next_type; typedef typename base_type_info::unsigned_type unsigned_type; public: static const size_t base_size = base_type_info::size; static const base_type fractional_mask = ~((~base_type(0)) << fractional_bits); static const base_type integer_mask = ~fractional_mask; public: static const base_type one = base_type(1) << fractional_bits; public: // constructors Fixed() : data_(0) { } Fixed(long n) : data_(base_type(n) << fractional_bits) { // TODO(eteran): assert in range! } Fixed(unsigned long n) : data_(base_type(n) << fractional_bits) { // TODO(eteran): assert in range! } Fixed(int n) : data_(base_type(n) << fractional_bits) { // TODO(eteran): assert in range! } Fixed(unsigned int n) : data_(base_type(n) << fractional_bits) { // TODO(eteran): assert in range! } Fixed(float n) : data_(static_cast<base_type>(n * one)) { // TODO(eteran): assert in range! } Fixed(double n) : data_(static_cast<base_type>(n * one)) { // TODO(eteran): assert in range! } Fixed(const Fixed &o) : data_(o.data_) { } Fixed& operator=(const Fixed &o) { data_ = o.data_; return *this; } private: // this makes it simpler to create a fixed point object from // a native type without scaling // use "Fixed::from_base" in order to perform this. struct NoScale {}; Fixed(base_type n, const NoScale &) : data_(n) { } public: static Fixed from_base(base_type n) { return Fixed(n, NoScale()); } public: // comparison operators bool operator==(const Fixed &o) const { return data_ == o.data_; } bool operator<(const Fixed &o) const { return data_ < o.data_; } public: // unary operators bool operator!() const { return !data_; } Fixed operator~() const { Fixed t(*this); t.data_ = ~t.data_; return t; } Fixed operator-() const { Fixed t(*this); t.data_ = -t.data_; return t; } Fixed operator+() const { return *this; } Fixed& operator++() { data_ += one; return *this; } Fixed& operator--() { data_ -= one; return *this; } public: // basic math operators Fixed& operator+=(const Fixed &n) { data_ += n.data_; return *this; } Fixed& operator-=(const Fixed &n) { data_ -= n.data_; return *this; } Fixed& operator&=(const Fixed &n) { data_ &= n.data_; return *this; } Fixed& operator|=(const Fixed &n) { data_ |= n.data_; return *this; } Fixed& operator^=(const Fixed &n) { data_ ^= n.data_; return *this; } Fixed& operator*=(const Fixed &n) { detail::multiply(*this, n, *this); return *this; } Fixed& operator/=(const Fixed &n) { Fixed temp; *this = detail::divide(*this, n, temp); return *this; } Fixed& operator>>=(const Fixed &n) { data_ >>= n.to_int(); return *this; } Fixed& operator<<=(const Fixed &n) { data_ <<= n.to_int(); return *this; } public: // conversion to basic types int to_int() const { return (data_ & integer_mask) >> fractional_bits; } unsigned int to_uint() const { return (data_ & integer_mask) >> fractional_bits; } float to_float() const { return static_cast<float>(data_) / Fixed::one; } double to_double() const { return static_cast<double>(data_) / Fixed::one; } base_type to_raw() const { return data_; } public: void swap(Fixed &rhs) { using std::swap; swap(data_, rhs.data_); } public: base_type data_; }; // if we have the same fractional portion, but differing integer portions, we trivially upgrade the smaller type template <size_t I1, size_t I2, size_t F> typename std::conditional<I1 >= I2, Fixed<I1,F>, Fixed<I2,F>>::type operator+(const Fixed<I1,F> &lhs, const Fixed<I2,F> &rhs) { typedef typename std::conditional< I1 >= I2, Fixed<I1,F>, Fixed<I2,F> >::type T; const T l = T::from_base(lhs.to_raw()); const T r = T::from_base(rhs.to_raw()); return l + r; } template <size_t I1, size_t I2, size_t F> typename std::conditional<I1 >= I2, Fixed<I1,F>, Fixed<I2,F>>::type operator-(const Fixed<I1,F> &lhs, const Fixed<I2,F> &rhs) { typedef typename std::conditional< I1 >= I2, Fixed<I1,F>, Fixed<I2,F> >::type T; const T l = T::from_base(lhs.to_raw()); const T r = T::from_base(rhs.to_raw()); return l - r; } template <size_t I1, size_t I2, size_t F> typename std::conditional<I1 >= I2, Fixed<I1,F>, Fixed<I2,F>>::type operator*(const Fixed<I1,F> &lhs, const Fixed<I2,F> &rhs) { typedef typename std::conditional< I1 >= I2, Fixed<I1,F>, Fixed<I2,F> >::type T; const T l = T::from_base(lhs.to_raw()); const T r = T::from_base(rhs.to_raw()); return l * r; } template <size_t I1, size_t I2, size_t F> typename std::conditional<I1 >= I2, Fixed<I1,F>, Fixed<I2,F>>::type operator/(const Fixed<I1,F> &lhs, const Fixed<I2,F> &rhs) { typedef typename std::conditional< I1 >= I2, Fixed<I1,F>, Fixed<I2,F> >::type T; const T l = T::from_base(lhs.to_raw()); const T r = T::from_base(rhs.to_raw()); return l / r; } template <size_t I, size_t F> std::ostream &operator<<(std::ostream &os, const Fixed<I,F> &f) { os << f.to_double(); return os; } template <size_t I, size_t F> const size_t Fixed<I,F>::fractional_bits; template <size_t I, size_t F> const size_t Fixed<I,F>::integer_bits; template <size_t I, size_t F> const size_t Fixed<I,F>::total_bits; } #endif

它的devise是一个接近下降更换漂浮/双打，并有一个可select的精度。它确实使用boost来添加所有必要的math运算符重载，所以您也将需要（我相信这只是一个头依赖，而不是库依赖）。

顺便说一句，常见的用法可能是这样的：

 using namespace numeric; typedef Fixed<16, 16> fixed; fixed f;

唯一真正的规则是这个数字必须加到系统的本地大小，比如8,16,32,64。

在现代C ++实现中，使用简单和精简的抽象（例如具体类）将不会有性能损失。定点计算恰恰是使用正确devise的类的地方，可以避免大量的错误。

因此， 你应该写一个FixedPoint8类 。彻底testing和debugging。如果你必须说服自己的performance，而不是使用普通的整数，那么测量它。

通过将定点计算的复杂性移到一个地方，它将为您节省很多麻烦。

如果你喜欢，你可以进一步增加你的课程的效用，使它成为一个模板，并用typedef FixedPoint<short, 8> FixedPoint8;replace旧的typedef FixedPoint<short, 8> FixedPoint8; 但是在目标架构上，这可能不是必须的，所以首先要避免模板的复杂性。

在互联网的某个地方可能有一个很好的定点课程 – 我会从Boost图书馆开始寻找。

你的浮点代码实际上是使用小数点吗？如果是这样：

首先，您必须阅读Randy Yates关于“定点math介绍”的论文： http : //www.digitalsignallabs.com/fp.pdf

然后，您需要对浮点代码进行“剖析”，以找出代码中“关键”点所需的定点值的适当范围，例如左边的U（5,3）= 5位，3位向右，未签名。

在这一点上，你可以应用上面提到的论文中的算术规则; 规则指定了如何解释算术运算产生的位。您可以编写macros或函数来执行操作。

为了比较浮点和固定点的结果，保持浮点版本是很方便的。

改变的定点表示通常被称为“缩放”。

如果你能在一个没有性能损失的class上做到这一点，那么这就是要走的路。它很大程度上取决于编译器以及它如何内联。如果使用类的性能损失，那么你需要一个更传统的C风格的方法。 OOP方法将为您提供编译器强制执行的types安全性，而传统的实现方式只能近似于此。

@cibyr有一个很好的OOP实现。现在更传统的一个。

要跟踪哪些variables被缩放，您需要使用一致的约定。在每个variables名的末尾做一个表示法来表示是否缩放值，然后写入扩展为x >> 8和x << 8的macrosSCALE（）和UNSCALE（）。

 #define SCALE(x) (x>>8) #define UNSCALE(x) (x<<8) xPositionUnscaled = UNSCALE(10); xPositionScaled = SCALE(xPositionUnscaled);

使用这么多的符号可能看起来像是额外的工作，但注意如何在不查看其他行的情况下一目了然地知道任何行是正确的。例如：

 xPositionScaled = SCALE(xPositionScaled);

显然是错误的，通过检查。

这是乔尔在这篇文章中提到的应用匈牙利观念的一个变种。

我不会在没有特殊硬件的CPU上使用浮点来处理它。我的build议是把所有的数字都按整数换算成一个特定的因子。例如，所有货币价值都以美分为整数，而不是美元作为浮动。例如，0.72被表示为整数72。

那么加法和减法就是一个非常简单的整数运算，例如（0.72 + 1变成72 + 100变成172变成1.72）。

乘法稍微复杂一点，因为它需要一个整数乘，接着是一个缩小比例（0.72 * 2变为72 * 200变成14400变成144（缩小）变成1.44）。

这可能需要特殊的function来执行更复杂的math运算（正弦，余弦等），但即使这些可以通过使用查找表加快。例如：由于您使用的是固定2表示，所以范围（0.0,1）（0-99）中只有100个值，而在此范围之外的正弦/余弦重复，因此您只需要一个100整数查找表。

干杯，伙计。

当我第一次遇到定点数时，我发现Joe Lemieux的文章C中的定点math非常有帮助，它的确提供了一种表示定点数值的方法。

尽pipe我没有用他的工会代表来定点数字。我大多在C语言中有定点的经验，所以我还没有select使用类。不过，大部分情况下，我认为在一个macros中定义你的小数位数，并使用描述性的variables名称可以很容易地处理。另外，我发现最好有用于乘法和特别除法的macros或函数，否则你很快就会得到不可读的代码。

例如，具有24.8个值：

  #include "stdio.h" /* Declarations for fixed point stuff */ typedef int int_fixed; #define FRACT_BITS 8 #define FIXED_POINT_ONE (1 << FRACT_BITS) #define MAKE_INT_FIXED(x) ((x) << FRACT_BITS) #define MAKE_FLOAT_FIXED(x) ((int_fixed)((x) * FIXED_POINT_ONE)) #define MAKE_FIXED_INT(x) ((x) >> FRACT_BITS) #define MAKE_FIXED_FLOAT(x) (((float)(x)) / FIXED_POINT_ONE) #define FIXED_MULT(x, y) ((x)*(y) >> FRACT_BITS) #define FIXED_DIV(x, y) (((x)<<FRACT_BITS) / (y)) /* tests */ int main() { int_fixed fixed_x = MAKE_FLOAT_FIXED( 4.5f ); int_fixed fixed_y = MAKE_INT_FIXED( 2 ); int_fixed fixed_result = FIXED_MULT( fixed_x, fixed_y ); printf( "%.1f\n", MAKE_FIXED_FLOAT( fixed_result ) ); fixed_result = FIXED_DIV( fixed_result, fixed_y ); printf( "%.1f\n", MAKE_FIXED_FLOAT( fixed_result ) ); return 0; }

写出来

 9
 4.5

请注意，这些macros有各种各样的整数溢出问题，我只是想保持macros简单。这只是一个快速和肮脏的例子，我已经在C中做了这样的事情。在C ++中，使用操作符重载可以使事情变得更清晰。其实，你可以很容易地使C代码更漂亮

我想这是一个冗长的说法：我认为可以使用typedef和macros观方法。只要你清楚哪些variables包含了定点值就不难保持，但它可能不会像C ++类那么漂亮。

如果我在你的位置上，我会尝试获得一些分析数字来显示瓶颈在哪里。如果它们相对较less，则使用typedef和macros。如果你决定需要用定点math来全面replace所有的浮点数，那么在课堂上你可能会变得更好。

游戏编程大师的技巧的原始版本有一整章实施定点math。

无论你决定去哪里（我会倾向于一个typedef和一些CPPmacros转换），你需要小心来回转换一些纪律。

你可能会发现你永远不需要来回转换。想象一下整个系统中的一切都是x256。

 template <int precision = 8> class FixedPoint { private: int val_; public: inline FixedPoint(int val) : val_ (val << precision) {}; inline operator int() { return val_ >> precision; } // Other operators... };