`xrange(2 ** 100)` – > OverflowError:long int太大而无法转换为int

xrange函数不适用于大整数:

 >>> N = 10**100 >>> xrange(N) Traceback (most recent call last): ... OverflowError: long int too large to convert to int >>> xrange(N, N+10) Traceback (most recent call last): ... OverflowError: long int too large to convert to int 

Python 3.x:

 >>> N = 10**100 >>> r = range(N) >>> r = range(N, N+10) >>> len(r) 10 

Python 2.x中有py3k builtin range()函数的后端吗?

编辑

我正在寻找一个完整的“lazy” range() ,而不仅仅是它的一些function的部分实现。

好的,这是一个更全面的重新实现。

 class MyXRange(object): def __init__(self, a1, a2=None, step=1): if step == 0: raise ValueError("arg 3 must not be 0") if a2 is None: a1, a2 = 0, a1 if (a2 - a1) % step != 0: a2 += step - (a2 - a1) % step if cmp(a1, a2) != cmp(0, step): a2 = a1 self.start, self.stop, self.step = a1, a2, step def __iter__(self): n = self.start while cmp(n, self.stop) == cmp(0, self.step): yield n n += self.step def __repr__(self): return "MyXRange(%d,%d,%d)" % (self.start, self.stop, self.step) # NB: len(self) will convert this to an int, and may fail def __len__(self): return (self.stop - self.start)//(self.step) def __getitem__(self, key): if key < 0: key = self.__len__() + key if key < 0: raise IndexError("list index out of range") return self[key] n = self.start + self.step*key if cmp(n, self.stop) != cmp(0, self.step): raise IndexError("list index out of range") return n def __reversed__(self): return MyXRange(self.stop-self.step, self.start-self.step, -self.step) def __contains__(self, val): if val == self.start: return cmp(0, self.step) == cmp(self.start, self.stop) if cmp(self.start, val) != cmp(0, self.step): return False if cmp(val, self.stop) != cmp(0, self.step): return False return (val - self.start) % self.step == 0 

还有一些testing:

 def testMyXRange(testsize=10): def normexcept(f,args): try: r = [f(args)] except Exception, e: r = type(e) return r for i in range(-testsize,testsize+1): for j in range(-testsize,testsize+1): print i, j for k in range(-9, 10, 2): r, mr = range(i,j,k), MyXRange(i,j,k) if r != list(mr): print "iter fail: %d, %d, %d" % (i,j,k) if list(reversed(r)) != list(reversed(mr)): print "reversed fail: %d, %d, %d" % (i,j,k) if len(r) != len(mr): print "len fail: %d, %d, %d" % (i,j,k) z = [m for m in range(-testsize*2,testsize*2+1) if (m in r) != (m in mr)] if z != []: print "contains fail: %d, %d, %d, %s" % (i,j,k,(z+["..."])[:10]) z = [m for m in range(-testsize*2, testsize*2+1) if normexcept(r.__getitem__, m) != normexcept(mr.__getitem__, m)] if z != []: print "getitem fail: %d, %d, %d, %s" % (i,j,k,(z+["..."])[:10]) 

我相信没有回报(Py 3彻底删除了int / long的区别,毕竟,但是在2. *它在这里保持;-)但是不难破解你自己的,例如…:

 import operator def wowrange(start, stop, step=1): if step == 0: raise ValueError('step must be != 0') elif step < 0: proceed = operator.gt else: proceed = operator.lt while proceed(start, stop): yield start start += step 

编辑它似乎OP不只是想要循环(xrange的正常目的,并在Py3范围),而且还有lenin运算符(后者在上述生成器上工作,但慢慢优化是可能的)。 对于这样的丰富一个class级更好…:

 import operator class wowrange(object): def __init__(self, start, stop=None, step=1): if step == 0: raise ValueError('step must be != 0') if stop is None: start, stop = 0, start if step < 0: self.proceed = operator.gt self.l = (stop-start+step+1)//step else: self.proceed = operator.lt self.l = (stop-start+step-1)//step self.lo = min(start, stop) self.start, self.stop, self.step = start, stop, step def __iter__(self): start = self.start while self.proceed(start, self.stop): yield start start += self.step def __len__(self): return self.l def __contains__(self, x): if x == self.stop: return False if self.proceed(x, self.start): return False if self.proceed(self.stop, x): return False return (x-self.lo) % self.step == 0 

如果潜伏在这里,我不会感到惊讶,但是,我希望这有助于!

再次编辑 :我看到索引也是必需的。 编写自己的__getitem__太难了吗? 我想是的,所以这里也是,在银盘上…:

  def __getitem__(self, i): if i < 0: i += self.l if i < 0: raise IndexError elif if i >= self.l: raise IndexError return self.start + i * self.step 

我不知道3.0 range支持切片(在最近的xrange版本中xrange不是 – 它曾经用过,但是因为复杂性很可笑,容易出错),但是我想我必须画出一个在某处的沙子线,所以我不会添加它;-)。

从文档:

注意

xrange()旨在简单而快速。 实现可能会施加限制来实现这一点。 Python的C实现将所有参数限制为本地C long(“short”Python整数),并且还要求元素的数量适合本地C long。 如果需要更大的范围,则可以使用itertools模块制作替代版本:islice(count(start,step),(stop-start + step-1)// step)。

或者使用生成器重新实现xrange:

 def myxrange(a1, a2=None, step=1): if a2 is None: start, last = 0, a1 else: start, last = a1, a2 while cmp(start, last) == cmp(0, step): yield start start += step 

 N = 10**100 len(list(myxrange(N, N+10))) 

编辑

问题1546078:在Python问题跟踪器上的“xrange支持longs等等”包含由Neal Norwitz(nnorwitz)编写的C patch和纯Python的无限xrange实现。 请参阅xrange.py

编辑

最新版本的irange (改名为lrange )在github上 。


基于py3k的rangeobject.c实现

irange.py

 """Define `irange.irange` class `xrange`, py3k's `range` analog for large integers See help(irange.irange) >>> r = irange(2**100, 2**101, 2**100) >>> len(r) 1 >>> for i in r: ... print i, 1267650600228229401496703205376 >>> for i in r: ... print i, 1267650600228229401496703205376 >>> 2**100 in r True >>> r[0], r[-1] (1267650600228229401496703205376L, 1267650600228229401496703205376L) >>> L = list(r) >>> L2 = [1, 2, 3] >>> L2[:] = r >>> L == L2 == [2**100] True """ def toindex(arg): """Convert `arg` to integer type that could be used as an index. """ if not any(isinstance(arg, cls) for cls in (long, int, bool)): raise TypeError("'%s' object cannot be interpreted as an integer" % ( type(arg).__name__,)) return int(arg) class irange(object): """irange([start,] stop[, step]) -> irange object Return an iterator that generates the numbers in the range on demand. Return `xrange` for small integers Pure Python implementation of py3k's `range()`. (Ie it supports large integers) If `xrange` and py3k `range()` differ then prefer `xrange`'s behaviour Based on `[1]`_ .. [1] http://svn.python.org/view/python/branches/py3k/Objects/rangeobject.c?view=markup >>> # on Python 2.6 >>> N = 10**80 >>> len(range(N, N+3)) 3 >>> len(xrange(N, N+3)) Traceback (most recent call last): ... OverflowError: long int too large to convert to int >>> len(irange(N, N+3)) 3 >>> xrange(N) Traceback (most recent call last): ... OverflowError: long int too large to convert to int >>> irange(N).length() == N True """ def __new__(cls, *args): try: return xrange(*args) # use `xrange` for small integers except OverflowError: pass nargs = len(args) if nargs == 1: stop = toindex(args[0]) start = 0 step = 1 elif nargs in (2, 3): start = toindex(args[0]) stop = toindex(args[1]) if nargs == 3: step = args[2] if step is None: step = 1 step = toindex(step) if step == 0: raise ValueError("irange() arg 3 must not be zero") else: step = 1 else: raise ValueError("irange(): wrong number of arguments," + " got %s" % args) r = super(irange, cls).__new__(cls) r._start, r._stop, r._step = start, stop, step return r def length(self): """len(self) might throw OverflowError, this method shouldn't.""" if self._step > 0: lo, hi = self._start, self._stop step = self._step else: hi, lo = self._start, self._stop step = -self._step assert step if lo >= hi: return 0 else: return (hi - lo - 1) // step + 1 __len__ = length def __getitem__(self, i): # for L[:] = irange(..) if i < 0: i = i + self.length() if i < 0 or i >= self.length(): raise IndexError("irange object index out of range") return self._start + i * self._step def __repr__(self): if self._step == 1: return "irange(%r, %r)" % (self._start, self._stop) else: return "irange(%r, %r, %r)" % ( self._start, self._stop, self._step) def __contains__(self, ob): if type(ob) not in (int, long, bool): # mimic py3k # perform iterative search return any(i == ob for i in self) # if long or bool if self._step > 0: inrange = self._start <= ob < self._stop else: assert self._step inrange = self._stop < ob <= self._start if not inrange: return False else: return ((ob - self._start) % self._step) == 0 def __iter__(self): len_ = self.length() i = 0 while i < len_: yield self._start + i * self._step i += 1 def __reversed__(self): len_ = self.length() new_start = self._start + (len_ - 1) * self._step new_stop = self._start if self._step > 0: new_stop -= 1 else: new_stop += 1 return irange(new_start, new_stop, -self._step) 

test_irange.py

 """Unit-tests for irange.irange class. Usage: $ python -W error test_irange.py --with-doctest --doctest-tests """ import sys from nose.tools import raises from irange import irange def eq_irange(a, b): """Assert that `a` equals `b`. Where `a`, `b` are `irange` objects """ try: assert a.length() == b.length() assert a._start == b._start assert a._stop == b._stop assert a._step == b._step if a.length() < 100: assert list(a) == list(b) try: assert list(a) == range(a._start, a._stop, a._step) except OverflowError: pass except AttributeError: if type(a) == xrange: assert len(a) == len(b) if len(a) == 0: # empty xrange return if len(a) > 0: assert a[0] == b[0] if len(a) > 1: a = irange(a[0], a[-1], a[1] - a[0]) b = irange(b[0], b[-1], b[1] - b[0]) eq_irange(a, b) else: raise def _get_short_iranges_args(): # perl -E'local $,= q/ /; $n=100; for (1..20) # > { say map {int(-$n + 2*$n*rand)} 0..int(3*rand) }' input_args = """\ 67 -11 51 -36 -15 38 19 43 -58 79 -91 -71 -56 3 51 -23 -63 -80 13 -30 24 -14 49 10 73 31 38 66 -22 20 -81 79 5 84 44 40 49 """ return [[int(arg) for arg in line.split()] for line in input_args.splitlines() if line.strip()] def _get_iranges_args(): N = 2**100 return [(start, stop, step) for start in range(-2*N, 2*N, N//2+1) for stop in range(-4*N, 10*N, N+1) for step in range(-N//2, N, N//8+1)] def _get_short_iranges(): return [irange(*args) for args in _get_short_iranges_args()] def _get_iranges(): return (_get_short_iranges() + [irange(*args) for args in _get_iranges_args()]) @raises(TypeError) def test_kwarg(): irange(stop=10) @raises(TypeError, DeprecationWarning) def test_float_stop(): irange(1.0) @raises(TypeError, DeprecationWarning) def test_float_step2(): irange(-1, 2, 1.0) @raises(TypeError, DeprecationWarning) def test_float_start(): irange(1.0, 2) @raises(TypeError, DeprecationWarning) def test_float_step(): irange(1, 2, 1.0) @raises(TypeError) def test_empty_args(): irange() def test_empty_range(): for args in ( "-3", "1 3 -1", "1 1", "1 1 1", "-3 -4", "-3 -2 -1", "-3 -3 -1", "-3 -3", ): r = irange(*[int(a) for a in args.split()]) assert len(r) == 0 L = list(r) assert len(L) == 0 def test_small_ints(): for args in _get_short_iranges_args(): ir, r = irange(*args), xrange(*args) assert len(ir) == len(r) assert list(ir) == list(r) def test_big_ints(): N = 10**100 for args, len_ in [ [(N,), N], [(N, N+10), 10], [(N, N-10, -2), 5], ]: try: xrange(*args) assert 0 except OverflowError: pass ir = irange(*args) assert ir.length() == len_ try: assert ir.length() == len(ir) except OverflowError: pass # ir[ir.length()-1] # if len(args) >= 2: r = range(*args) assert list(ir) == r assert ir[ir.length()-1] == r[-1] assert list(reversed(ir)) == list(reversed(r)) # def test_negative_index(): assert irange(10)[-1] == 9 assert irange(2**100+1)[-1] == 2**100 def test_reversed(): for r in _get_iranges(): if type(r) == xrange: continue # known not to work for xrange if r.length() > 1000: continue # skip long assert list(reversed(reversed(r))) == list(r) assert list(r) == range(r._start, r._stop, r._step) def test_pickle(): import pickle for r in _get_iranges(): rp = pickle.loads(pickle.dumps(r)) eq_irange(rp, r) def test_equility(): for args in _get_iranges_args(): a, b = irange(*args), irange(*args) assert a is not b assert a != b eq_irange(a, b) def test_contains(): class IntSubclass(int): pass r10 = irange(10) for i in range(10): assert i in r10 assert IntSubclass(i) in r10 assert 10 not in r10 assert -1 not in r10 assert IntSubclass(10) not in r10 assert IntSubclass(-1) not in r10 def test_repr(): for r in _get_iranges(): eq_irange(eval(repr(r)), r) def test_new(): assert repr(irange(True)) == repr(irange(1)) def test_overflow(): lo, hi = sys.maxint-2, sys.maxint+3 assert list(irange(lo, hi)) == list(range(lo, hi)) def test_getitem(): r = irange(sys.maxint-2, sys.maxint+3) L = [] L[:] = r assert len(L) == len(r) assert L == list(r) if __name__ == "__main__": import nose nose.main() 

即使有回报,也可能需要修改。 这里的底层问题是,在Python 2.x中, intlong是独立的数据types,即使int会根据需要自动上传到long s。 但是,这不一定发生在用C编写的函数中,这取决于它们是如何写的。