最快的方式列出N以下的所有素数

这是我能想到的最好的algorithm。

def get_primes(n): numbers = set(range(n, 1, -1)) primes = [] while numbers: p = numbers.pop() primes.append(p) numbers.difference_update(set(range(p*2, n+1, p))) return primes >>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import get_primes').timeit(1) 1.1499958793645562 

它可以做得更快吗?

这个代码有一个缺陷:因为numbers是一个无序的集合,不能保证numbers.pop()将从集合中删除最低的数字。 尽pipe如此,对于一些input数字来说(至less对我来说)起作用:

 >>> sum(get_primes(2000000)) 142913828922L #That's the correct sum of all numbers below 2 million >>> 529 in get_primes(1000) False >>> 529 in get_primes(530) True 

警告:由于硬件或Python版本的差异, timeit结果可能会有所不同。

下面是一个脚本比较了一些实现:

  • ambi_sieve_plain,
  • rwh_primes ,
  • rwh_primes1 ,
  • rwh_primes2 ,
  • sieveOfAtkin ,
  • sieveOfEratosthenes ,
  • sundaram3 ,
  • sieve_wheel_30 ,
  • ambi_sieve (需要numpy)
  • primesfrom3 (要求numpy)
  • primesfrom2to (需要numpy)

非常感谢stephan把sieve_wheel_30引起了我的注意。 值得一提的是罗伯特·威廉·汉克斯 ( Robert William Hanks)提供的素数从2到3的素数,rwh_primes,rwh_primes1和rwh_primes2。

psycotesting的普通Python方法中,对于n = 1000000, rwh_primes1是最快的testing。

 +---------------------+-------+ | Method | ms | +---------------------+-------+ | rwh_primes1 | 43.0 | | sieveOfAtkin | 46.4 | | rwh_primes | 57.4 | | sieve_wheel_30 | 63.0 | | rwh_primes2 | 67.8 | | sieveOfEratosthenes | 147.0 | | ambi_sieve_plain | 152.0 | | sundaram3 | 194.0 | +---------------------+-------+ 

在testing过的普通Python方法中, 没有psyco ,对于n = 1000000, rwh_primes2是最快的。

 +---------------------+-------+ | Method | ms | +---------------------+-------+ | rwh_primes2 | 68.1 | | rwh_primes1 | 93.7 | | rwh_primes | 94.6 | | sieve_wheel_30 | 97.4 | | sieveOfEratosthenes | 178.0 | | ambi_sieve_plain | 286.0 | | sieveOfAtkin | 314.0 | | sundaram3 | 416.0 | +---------------------+-------+ 

在所有testing的方法中, 允许numpy ,n = 1000000, primesfrom2to是最快的testing。

 +---------------------+-------+ | Method | ms | +---------------------+-------+ | primesfrom2to | 15.9 | | primesfrom3to | 18.4 | | ambi_sieve | 29.3 | +---------------------+-------+ 

时间使用命令测量:

 python -mtimeit -s"import primes" "primes.{method}(1000000)" 

{method}replace每个方法名称。

primes.py:

 #!/usr/bin/env python import psyco; psyco.full() from math import sqrt, ceil import numpy as np def rwh_primes(n): # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Returns a list of primes < n """ sieve = [True] * n for i in xrange(3,int(n**0.5)+1,2): if sieve[i]: sieve[i*i::2*i]=[False]*((ni*i-1)/(2*i)+1) return [2] + [i for i in xrange(3,n,2) if sieve[i]] def rwh_primes1(n): # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Returns a list of primes < n """ sieve = [True] * (n/2) for i in xrange(3,int(n**0.5)+1,2): if sieve[i/2]: sieve[i*i/2::i] = [False] * ((ni*i-1)/(2*i)+1) return [2] + [2*i+1 for i in xrange(1,n/2) if sieve[i]] def rwh_primes2(n): # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Input n>=6, Returns a list of primes, 2 <= p < n """ correction = (n%6>1) n = {0:n,1:n-1,2:n+4,3:n+3,4:n+2,5:n+1}[n%6] sieve = [True] * (n/3) sieve[0] = False for i in xrange(int(n**0.5)/3+1): if sieve[i]: k=3*i+1|1 sieve[ ((k*k)/3) ::2*k]=[False]*((n/6-(k*k)/6-1)/k+1) sieve[(k*k+4*k-2*k*(i&1))/3::2*k]=[False]*((n/6-(k*k+4*k-2*k*(i&1))/6-1)/k+1) return [2,3] + [3*i+1|1 for i in xrange(1,n/3-correction) if sieve[i]] def sieve_wheel_30(N): # http://zerovolt.com/?p=88 ''' Returns a list of primes <= N using wheel criterion 2*3*5 = 30 Copyright 2009 by zerovolt.com This code is free for non-commercial purposes, in which case you can just leave this comment as a credit for my work. If you need this code for commercial purposes, please contact me by sending an email to: info [at] zerovolt [dot] com.''' __smallp = ( 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997) wheel = (2, 3, 5) const = 30 if N < 2: return [] if N <= const: pos = 0 while __smallp[pos] <= N: pos += 1 return list(__smallp[:pos]) # make the offsets list offsets = (7, 11, 13, 17, 19, 23, 29, 1) # prepare the list p = [2, 3, 5] dim = 2 + N // const tk1 = [True] * dim tk7 = [True] * dim tk11 = [True] * dim tk13 = [True] * dim tk17 = [True] * dim tk19 = [True] * dim tk23 = [True] * dim tk29 = [True] * dim tk1[0] = False # help dictionary d # d[a , b] = c ==> if I want to find the smallest useful multiple of (30*pos)+a # on tkc, then I need the index given by the product of [(30*pos)+a][(30*pos)+b] # in general. If b < a, I need [(30*pos)+a][(30*(pos+1))+b] d = {} for x in offsets: for y in offsets: res = (x*y) % const if res in offsets: d[(x, res)] = y # another help dictionary: gives tkx calling tmptk[x] tmptk = {1:tk1, 7:tk7, 11:tk11, 13:tk13, 17:tk17, 19:tk19, 23:tk23, 29:tk29} pos, prime, lastadded, stop = 0, 0, 0, int(ceil(sqrt(N))) # inner functions definition def del_mult(tk, start, step): for k in xrange(start, len(tk), step): tk[k] = False # end of inner functions definition cpos = const * pos while prime < stop: # 30k + 7 if tk7[pos]: prime = cpos + 7 p.append(prime) lastadded = 7 for off in offsets: tmp = d[(7, off)] start = (pos + prime) if off == 7 else (prime * (const * (pos + 1 if tmp < 7 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # 30k + 11 if tk11[pos]: prime = cpos + 11 p.append(prime) lastadded = 11 for off in offsets: tmp = d[(11, off)] start = (pos + prime) if off == 11 else (prime * (const * (pos + 1 if tmp < 11 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # 30k + 13 if tk13[pos]: prime = cpos + 13 p.append(prime) lastadded = 13 for off in offsets: tmp = d[(13, off)] start = (pos + prime) if off == 13 else (prime * (const * (pos + 1 if tmp < 13 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # 30k + 17 if tk17[pos]: prime = cpos + 17 p.append(prime) lastadded = 17 for off in offsets: tmp = d[(17, off)] start = (pos + prime) if off == 17 else (prime * (const * (pos + 1 if tmp < 17 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # 30k + 19 if tk19[pos]: prime = cpos + 19 p.append(prime) lastadded = 19 for off in offsets: tmp = d[(19, off)] start = (pos + prime) if off == 19 else (prime * (const * (pos + 1 if tmp < 19 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # 30k + 23 if tk23[pos]: prime = cpos + 23 p.append(prime) lastadded = 23 for off in offsets: tmp = d[(23, off)] start = (pos + prime) if off == 23 else (prime * (const * (pos + 1 if tmp < 23 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # 30k + 29 if tk29[pos]: prime = cpos + 29 p.append(prime) lastadded = 29 for off in offsets: tmp = d[(29, off)] start = (pos + prime) if off == 29 else (prime * (const * (pos + 1 if tmp < 29 else 0) + tmp) )//const del_mult(tmptk[off], start, prime) # now we go back to top tk1, so we need to increase pos by 1 pos += 1 cpos = const * pos # 30k + 1 if tk1[pos]: prime = cpos + 1 p.append(prime) lastadded = 1 for off in offsets: tmp = d[(1, off)] start = (pos + prime) if off == 1 else (prime * (const * pos + tmp) )//const del_mult(tmptk[off], start, prime) # time to add remaining primes # if lastadded == 1, remove last element and start adding them from tk1 # this way we don't need an "if" within the last while if lastadded == 1: p.pop() # now complete for every other possible prime while pos < len(tk1): cpos = const * pos if tk1[pos]: p.append(cpos + 1) if tk7[pos]: p.append(cpos + 7) if tk11[pos]: p.append(cpos + 11) if tk13[pos]: p.append(cpos + 13) if tk17[pos]: p.append(cpos + 17) if tk19[pos]: p.append(cpos + 19) if tk23[pos]: p.append(cpos + 23) if tk29[pos]: p.append(cpos + 29) pos += 1 # remove exceeding if present pos = len(p) - 1 while p[pos] > N: pos -= 1 if pos < len(p) - 1: del p[pos+1:] # return p list return p def sieveOfEratosthenes(n): """sieveOfEratosthenes(n): return the list of the primes < n.""" # Code from: <dickinsm@gmail.com>, Nov 30 2006 # http://groups.google.com/group/comp.lang.python/msg/f1f10ced88c68c2d if n <= 2: return [] sieve = range(3, n, 2) top = len(sieve) for si in sieve: if si: bottom = (si*si - 3) // 2 if bottom >= top: break sieve[bottom::si] = [0] * -((bottom - top) // si) return [2] + [el for el in sieve if el] def sieveOfAtkin(end): """sieveOfAtkin(end): return a list of all the prime numbers <end using the Sieve of Atkin.""" # Code by Steve Krenzel, <Sgk284@gmail.com>, improved # Code: https://web.archive.org/web/20080324064651/http://krenzel.info/?p=83 # Info: http://en.wikipedia.org/wiki/Sieve_of_Atkin assert end > 0 lng = ((end-1) // 2) sieve = [False] * (lng + 1) x_max, x2, xd = int(sqrt((end-1)/4.0)), 0, 4 for xd in xrange(4, 8*x_max + 2, 8): x2 += xd y_max = int(sqrt(end-x2)) n, n_diff = x2 + y_max*y_max, (y_max << 1) - 1 if not (n & 1): n -= n_diff n_diff -= 2 for d in xrange((n_diff - 1) << 1, -1, -8): m = n % 12 if m == 1 or m == 5: m = n >> 1 sieve[m] = not sieve[m] n -= d x_max, x2, xd = int(sqrt((end-1) / 3.0)), 0, 3 for xd in xrange(3, 6 * x_max + 2, 6): x2 += xd y_max = int(sqrt(end-x2)) n, n_diff = x2 + y_max*y_max, (y_max << 1) - 1 if not(n & 1): n -= n_diff n_diff -= 2 for d in xrange((n_diff - 1) << 1, -1, -8): if n % 12 == 7: m = n >> 1 sieve[m] = not sieve[m] n -= d x_max, y_min, x2, xd = int((2 + sqrt(4-8*(1-end)))/4), -1, 0, 3 for x in xrange(1, x_max + 1): x2 += xd xd += 6 if x2 >= end: y_min = (((int(ceil(sqrt(x2 - end))) - 1) << 1) - 2) << 1 n, n_diff = ((x*x + x) << 1) - 1, (((x-1) << 1) - 2) << 1 for d in xrange(n_diff, y_min, -8): if n % 12 == 11: m = n >> 1 sieve[m] = not sieve[m] n += d primes = [2, 3] if end <= 3: return primes[:max(0,end-2)] for n in xrange(5 >> 1, (int(sqrt(end))+1) >> 1): if sieve[n]: primes.append((n << 1) + 1) aux = (n << 1) + 1 aux *= aux for k in xrange(aux, end, 2 * aux): sieve[k >> 1] = False s = int(sqrt(end)) + 1 if s % 2 == 0: s += 1 primes.extend([i for i in xrange(s, end, 2) if sieve[i >> 1]]) return primes def ambi_sieve_plain(n): s = range(3, n, 2) for m in xrange(3, int(n**0.5)+1, 2): if s[(m-3)/2]: for t in xrange((m*m-3)/2,(n>>1)-1,m): s[t]=0 return [2]+[t for t in s if t>0] def sundaram3(max_n): # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/2073279#2073279 numbers = range(3, max_n+1, 2) half = (max_n)//2 initial = 4 for step in xrange(3, max_n+1, 2): for i in xrange(initial, half, step): numbers[i-1] = 0 initial += 2*(step+1) if initial > half: return [2] + filter(None, numbers) ################################################################################ # Using Numpy: def ambi_sieve(n): # http://tommih.blogspot.com/2009/04/fast-prime-number-generator.html s = np.arange(3, n, 2) for m in xrange(3, int(n ** 0.5)+1, 2): if s[(m-3)/2]: s[(m*m-3)/2::m]=0 return np.r_[2, s[s>0]] def primesfrom3to(n): # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Returns a array of primes, p < n """ assert n>=2 sieve = np.ones(n/2, dtype=np.bool) for i in xrange(3,int(n**0.5)+1,2): if sieve[i/2]: sieve[i*i/2::i] = False return np.r_[2, 2*np.nonzero(sieve)[0][1::]+1] def primesfrom2to(n): # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Input n>=6, Returns a array of primes, 2 <= p < n """ sieve = np.ones(n/3 + (n%6==2), dtype=np.bool) sieve[0] = False for i in xrange(int(n**0.5)/3+1): if sieve[i]: k=3*i+1|1 sieve[ ((k*k)/3) ::2*k] = False sieve[(k*k+4*k-2*k*(i&1))/3::2*k] = False return np.r_[2,3,((3*np.nonzero(sieve)[0]+1)|1)] if __name__=='__main__': import itertools import sys def test(f1,f2,num): print('Testing {f1} and {f2} return same results'.format( f1=f1.func_name, f2=f2.func_name)) if not all([a==b for a,b in itertools.izip_longest(f1(num),f2(num))]): sys.exit("Error: %s(%s) != %s(%s)"%(f1.func_name,num,f2.func_name,num)) n=1000000 test(sieveOfAtkin,sieveOfEratosthenes,n) test(sieveOfAtkin,ambi_sieve,n) test(sieveOfAtkin,ambi_sieve_plain,n) test(sieveOfAtkin,sundaram3,n) test(sieveOfAtkin,sieve_wheel_30,n) test(sieveOfAtkin,primesfrom3to,n) test(sieveOfAtkin,primesfrom2to,n) test(sieveOfAtkin,rwh_primes,n) test(sieveOfAtkin,rwh_primes1,n) test(sieveOfAtkin,rwh_primes2,n) 

运行脚本testing所有实现都会得到相同的结果。

相关问题(涉及素数生成器和包括基准):
在Python中加速位串/位操作?

更快和更具记忆力的纯Python代码:

 def primes(n): """ Returns a list of primes < n """ sieve = [True] * n for i in xrange(3,int(n**0.5)+1,2): if sieve[i]: sieve[i*i::2*i]=[False]*((ni*i-1)/(2*i)+1) return [2] + [i for i in xrange(3,n,2) if sieve[i]] 

或者以半筛子开始

 def primes1(n): """ Returns a list of primes < n """ sieve = [True] * (n/2) for i in xrange(3,int(n**0.5)+1,2): if sieve[i/2]: sieve[i*i/2::i] = [False] * ((ni*i-1)/(2*i)+1) return [2] + [2*i+1 for i in xrange(1,n/2) if sieve[i]] 

更快和更具记忆力的numpy代码:

 import numpy def primesfrom3to(n): """ Returns a array of primes, 3 <= p < n """ sieve = numpy.ones(n/2, dtype=numpy.bool) for i in xrange(3,int(n**0.5)+1,2): if sieve[i/2]: sieve[i*i/2::i] = False return 2*numpy.nonzero(sieve)[0][1::]+1 

以三分之一的筛子开始更快的变化:

 import numpy def primesfrom2to(n): """ Input n>=6, Returns a array of primes, 2 <= p < n """ sieve = numpy.ones(n/3 + (n%6==2), dtype=numpy.bool) for i in xrange(1,int(n**0.5)/3+1): if sieve[i]: k=3*i+1|1 sieve[ k*k/3 ::2*k] = False sieve[k*(k-2*(i&1)+4)/3::2*k] = False return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)] 

上面代码的一个(难以编码的)纯python版本是:

 def primes2(n): """ Input n>=6, Returns a list of primes, 2 <= p < n """ n, correction = nn%6+6, 2-(n%6>1) sieve = [True] * (n/3) for i in xrange(1,int(n**0.5)/3+1): if sieve[i]: k=3*i+1|1 sieve[ k*k/3 ::2*k] = [False] * ((n/6-k*k/6-1)/k+1) sieve[k*(k-2*(i&1)+4)/3::2*k] = [False] * ((n/6-k*(k-2*(i&1)+4)/6-1)/k+1) return [2,3] + [3*i+1|1 for i in xrange(1,n/3-correction) if sieve[i]] 

不幸的是,纯python不采用更简单,更快速的numpy方式来执行Assignment,并且在循环内调用len(),如[False]*len(sieve[((k*k)/3)::2*k])太慢了。 所以我不得不即兴纠正input(并避免更多的math),并做一些极端(和痛苦的)math魔法。
就个人而言,我认为这是一个耻辱(这是如此广泛使用)是不是Python标准库的一部分(在Python 3发布后2年和没有numpy兼容性),语法和速度的改善似乎被完全忽略python开发者。

Python Cookbook中有一个非常简洁的示例 – 在该URL上提出的最快版本是:

 import itertools def erat2( ): D = { } yield 2 for q in itertools.islice(itertools.count(3), 0, None, 2): p = D.pop(q, None) if p is None: D[q*q] = q yield q else: x = p + q while x in D or not (x&1): x += p D[x] = p 

所以,会给

 def get_primes_erat(n): return list(itertools.takewhile(lambda p: p<n, erat2())) 

使用pri.py中的代码在shell提示符下进行testing(我喜欢这样做),我观察到:

 $ python2.5 -mtimeit -s'import pri' 'pri.get_primes(1000000)' 10 loops, best of 3: 1.69 sec per loop $ python2.5 -mtimeit -s'import pri' 'pri.get_primes_erat(1000000)' 10 loops, best of 3: 673 msec per loop 

所以看起来菜谱解决scheme的速度要快两倍。

使用Sundaram的Sieve ,我想我打破了纯Python的logging:

 def sundaram3(max_n): numbers = range(3, max_n+1, 2) half = (max_n)//2 initial = 4 for step in xrange(3, max_n+1, 2): for i in xrange(initial, half, step): numbers[i-1] = 0 initial += 2*(step+1) if initial > half: return [2] + filter(None, numbers) 

比较讨论:

 C:\USERS>python -m timeit -n10 -s "import get_primes" "get_primes.get_primes_erat(1000000)" 10 loops, best of 3: 710 msec per loop C:\USERS>python -m timeit -n10 -s "import get_primes" "get_primes.daniel_sieve_2(1000000)" 10 loops, best of 3: 435 msec per loop C:\USERS>python -m timeit -n10 -s "import get_primes" "get_primes.sundaram3(1000000)" 10 loops, best of 3: 327 msec per loop 

该algorithm速度快,但存在严重缺陷:

 >>> sorted(get_primes(530)) [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 527, 529] >>> 17*31 527 >>> 23*23 529 

你假设numbers.pop()会返回集合中最小的数字,但是这是不能保证的。 集合是无序的, pop()移除并返回一个任意的元素,所以它不能用来从其余的数字中select下一个素数。

对于具有足够大的N的真正最快的解决scheme,将下载预先计算的素数列表 ,将其存储为元组,并执行如下操作:

 for pos,i in enumerate(primes): if i > N: print primes[:pos] 

如果N > primes[-1] 计算更多的素数,并将新的列表保存在您的代码中,那么下一次同样快。

总是在箱子外面思考。

如果你不想重新发明轮子,你可以安装符号math库sympy (是的,它是Python 3兼容)

 pip install sympy 

并使用primerange函数

 from sympy import sieve primes = list(sieve.primerange(1, 10**6)) 

编写自己的主要search代码是有益的,但是手头上有一个快速可靠的库也是有用的。 我在C ++库primesieve上编写了一个包装器,命名为primesieve-python

试试看, pip install primesieve

 import primesieve primes = primesieve.generate_primes(10**8) 

我很好奇看到比较的速度。

如果你接受itertools而不是numpy,这里是适用于Python 3的rwh_primes2,它在我的机器上的运行速度是它的两倍。 唯一的实质性变化是使用bytearray而不是布尔的列表,并使用compress来代替list comprehension来构build最终列表。 (如果可以的话,我会添加这个作为像moarningsun的评论。)

 import itertools izip = itertools.zip_longest chain = itertools.chain.from_iterable compress = itertools.compress def rwh_primes2_python3(n): """ Input n>=6, Returns a list of primes, 2 <= p < n """ zero = bytearray([False]) size = n//3 + (n % 6 == 2) sieve = bytearray([True]) * size sieve[0] = False for i in range(int(n**0.5)//3+1): if sieve[i]: k=3*i+1|1 start = (k*k+4*k-2*k*(i&1))//3 sieve[(k*k)//3::2*k]=zero*((size - (k*k)//3 - 1) // (2 * k) + 1) sieve[ start ::2*k]=zero*((size - start - 1) // (2 * k) + 1) ans = [2,3] poss = chain(izip(*[range(i, n, 6) for i in (1,5)])) ans.extend(compress(poss, sieve)) return ans 

比较:

 >>> timeit.timeit('primes.rwh_primes2(10**6)', setup='import primes', number=1) 0.0652179726976101 >>> timeit.timeit('primes.rwh_primes2_python3(10**6)', setup='import primes', number=1) 0.03267321276325674 

 >>> timeit.timeit('primes.rwh_primes2(10**8)', setup='import primes', number=1) 6.394284538007014 >>> timeit.timeit('primes.rwh_primes2_python3(10**8)', setup='import primes', number=1) 3.833829450302801 

如果你对N有控制权,列出所有素数的最快方法是预先计算它们。 认真。 预计算是忽视优化的一种方式。

以下是我通常用来在Python中生成素数的代码:

 $ python -mtimeit -s'import sieve' 'sieve.sieve(1000000)' 10 loops, best of 3: 445 msec per loop $ cat sieve.py from math import sqrt def sieve(size): prime=[True]*size rng=xrange limit=int(sqrt(size)) for i in rng(3,limit+1,+2): if prime[i]: prime[i*i::+i]=[False]*len(prime[i*i::+i]) return [2]+[i for i in rng(3,size,+2) if prime[i]] if __name__=='__main__': print sieve(100) 

它不能与这里发布的更快的解决scheme竞争,但至less它是纯粹的Python。

感谢您发布这个问题。 我今天学到了很多东西。

对于最快的代码,numpy解决scheme是最好的。 但是,纯粹由于学术原因,我发布了我的纯Python版本,比上面发布的食谱版本less了50%。 由于我在记忆中列出了整个列表,所以需要足够的空间来容纳所有的东西,但似乎相当好。

 def daniel_sieve_2(maxNumber): """ Given a number, returns all numbers less than or equal to that number which are prime. """ allNumbers = range(3, maxNumber+1, 2) for mIndex, number in enumerate(xrange(3, maxNumber+1, 2)): if allNumbers[mIndex] == 0: continue # now set all multiples to 0 for index in xrange(mIndex+number, (maxNumber-3)/2+1, number): allNumbers[index] = 0 return [2] + filter(lambda n: n!=0, allNumbers) 

结果是:

 >>>mine = timeit.Timer("daniel_sieve_2(1000000)", ... "from sieves import daniel_sieve_2") >>>prev = timeit.Timer("get_primes_erat(1000000)", ... "from sieves import get_primes_erat") >>>print "Mine: {0:0.4f} ms".format(min(mine.repeat(3, 1))*1000) Mine: 428.9446 ms >>>print "Previous Best {0:0.4f} ms".format(min(prev.repeat(3, 1))*1000) Previous Best 621.3581 ms 

在N <9,080,191的假设下确定性地实施Miller-Rabin的Primality检验

 import sys import random def miller_rabin_pass(a, n): d = n - 1 s = 0 while d % 2 == 0: d >>= 1 s += 1 a_to_power = pow(a, d, n) if a_to_power == 1: return True for i in xrange(s-1): if a_to_power == n - 1: return True a_to_power = (a_to_power * a_to_power) % n return a_to_power == n - 1 def miller_rabin(n): for a in [2, 3, 37, 73]: if not miller_rabin_pass(a, n): return False return True n = int(sys.argv[1]) primes = [2] for p in range(3,n,2): if miller_rabin(p): primes.append(p) print len(primes) 

根据维基百科( http://en.wikipedia.org/wiki/Miller-Rabin_primality_test )上的文章,对于a = 2,3,37和73,N <9,080,191就足以决定N是否合成。

我修改了原始Miller-Rabintesting的概率实现的源代码: http : //en.literateprograms.org/Miller-Rabin_primality_test_(Python)

使用Numpy的半筛的稍微不同的实现:

http://rebrained.com/?p=458

导入math
importnumpy
 def prime6(upto):
    素数= numpy.arange(3,高达+ 1,2)
     isprime = numpy.ones((高达-1)/ 2,D型细胞=布尔)
    for factor in primes[:int(math.sqrt(upto))]:
        if isprime[(factor-2)/2]: isprime[(factor*3-2)/2:(upto-1)/2:factor]=0
    return numpy.insert(primes[isprime],0,2)

Can someone compare this with the other timings? On my machine it seems pretty comparable to the other Numpy half-sieve.

First time using python, so some of the methods I use in this might seem a bit cumbersome. I just straight converted my c++ code to python and this is what I have (albeit a tad bit slowww in python)

 #!/usr/bin/env python import time def GetPrimes(n): Sieve = [1 for x in xrange(n)] Done = False w = 3 while not Done: for q in xrange (3, n, 2): Prod = w*q if Prod < n: Sieve[Prod] = 0 else: break if w > (n/2): Done = True w += 2 return Sieve start = time.clock() d = 10000000 Primes = GetPrimes(d) count = 1 #This is for 2 for x in xrange (3, d, 2): if Primes[x]: count+=1 elapsed = (time.clock() - start) print "\nFound", count, "primes in", elapsed, "seconds!\n" 

pythonw Primes.py

Found 664579 primes in 12.799119 seconds!

 #!/usr/bin/env python import time def GetPrimes2(n): Sieve = [1 for x in xrange(n)] for q in xrange (3, n, 2): k = q for y in xrange(k*3, n, k*2): Sieve[y] = 0 return Sieve start = time.clock() d = 10000000 Primes = GetPrimes2(d) count = 1 #This is for 2 for x in xrange (3, d, 2): if Primes[x]: count+=1 elapsed = (time.clock() - start) print "\nFound", count, "primes in", elapsed, "seconds!\n" 

pythonw Primes2.py

Found 664579 primes in 10.230172 seconds!

 #!/usr/bin/env python import time def GetPrimes3(n): Sieve = [1 for x in xrange(n)] for q in xrange (3, n, 2): k = q for y in xrange(k*k, n, k << 1): Sieve[y] = 0 return Sieve start = time.clock() d = 10000000 Primes = GetPrimes3(d) count = 1 #This is for 2 for x in xrange (3, d, 2): if Primes[x]: count+=1 elapsed = (time.clock() - start) print "\nFound", count, "primes in", elapsed, "seconds!\n" 

python Primes2.py

Found 664579 primes in 7.113776 seconds!

I know the competition is closed for some years. …

Nonetheless this is my suggestion for a pure python prime sieve, based on omitting the multiples of 2, 3 and 5 by using appropriate steps while processing the sieve forward. Nonetheless it is actually slower for N<10^9 than @Robert William Hanks superior solutions rwh_primes2 and rwh_primes1. By using a ctypes.c_ushort sieve array above 1.5* 10^8 it is somehow adaptive to memory limits.

10^6

$ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.primeSieveSeq(1000000)" 10 loops, best of 3: 46.7 msec per loop

to compare:$ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes1(1000000)" 10 loops, best of 3: 43.2 msec per loop to compare: $ python -m timeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes2(1000000)" 10 loops, best of 3: 34.5 msec per loop

10^7

$ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.primeSieveSeq(10000000)" 10 loops, best of 3: 530 msec per loop

to compare:$ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes1(10000000)" 10 loops, best of 3: 494 msec per loop to compare: $ python -m timeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes2(10000000)" 10 loops, best of 3: 375 msec per loop

10^8

$ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.primeSieveSeq(100000000)" 10 loops, best of 3: 5.55 sec per loop

to compare: $ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes1(100000000)" 10 loops, best of 3: 5.33 sec per loop to compare: $ python -m timeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes2(100000000)" 10 loops, best of 3: 3.95 sec per loop

10^9

$ python -mtimeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.primeSieveSeq(1000000000)" 10 loops, best of 3: 61.2 sec per loop

to compare: $ python -mtimeit -n 3 -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes1(1000000000)" 3 loops, best of 3: 97.8 sec per loop

to compare: $ python -m timeit -s"import primeSieveSpeedComp" "primeSieveSpeedComp.rwh_primes2(1000000000)" 10 loops, best of 3: 41.9 sec per loop

You may copy the code below into ubuntus primeSieveSpeedComp to review this tests.

 def primeSieveSeq(MAX_Int): if MAX_Int > 5*10**8: import ctypes int16Array = ctypes.c_ushort * (MAX_Int >> 1) sieve = int16Array() #print 'uses ctypes "unsigned short int Array"' else: sieve = (MAX_Int >> 1) * [False] #print 'uses python list() of long long int' if MAX_Int < 10**8: sieve[4::3] = [True]*((MAX_Int - 8)/6+1) sieve[12::5] = [True]*((MAX_Int - 24)/10+1) r = [2, 3, 5] n = 0 for i in xrange(int(MAX_Int**0.5)/30+1): n += 3 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 2 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 1 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 2 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 1 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 2 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 3 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) n += 1 if not sieve[n]: n2 = (n << 1) + 1 r.append(n2) n2q = (n2**2) >> 1 sieve[n2q::n2] = [True]*(((MAX_Int >> 1) - n2q - 1) / n2 + 1) if MAX_Int < 10**8: return [2, 3, 5]+[(p << 1) + 1 for p in [n for n in xrange(3, MAX_Int >> 1) if not sieve[n]]] n = n >> 1 try: for i in xrange((MAX_Int-2*n)/30 + 1): n += 3 if not sieve[n]: r.append((n << 1) + 1) n += 2 if not sieve[n]: r.append((n << 1) + 1) n += 1 if not sieve[n]: r.append((n << 1) + 1) n += 2 if not sieve[n]: r.append((n << 1) + 1) n += 1 if not sieve[n]: r.append((n << 1) + 1) n += 2 if not sieve[n]: r.append((n << 1) + 1) n += 3 if not sieve[n]: r.append((n << 1) + 1) n += 1 if not sieve[n]: r.append((n << 1) + 1) except: pass return r 

It's all written and tested. So there is no need to reinvent the wheel.

 python -m timeit -r10 -s"from sympy import sieve" "primes = list(sieve.primerange(1, 10**6))" 

gives us a record breaking 12.2 msec !

 10 loops, best of 10: 12.2 msec per loop 

If this is not fast enough, you can try PyPy:

 pypy -m timeit -r10 -s"from sympy import sieve" "primes = list(sieve.primerange(1, 10**6))" 

这导致:

 10 loops, best of 10: 2.03 msec per loop 

The answer with 247 up-votes lists 15.9 ms for the best solution. Compare this!!!

I tested some unutbu's functions , i computed it with hungred millions number

The winners are the functions that use numpy library,

Note : It would also interesting make a memory utilization test 🙂

Computation time result

Sample code

Complete code on my github repository

 #!/usr/bin/env python import lib import timeit import sys import math import datetime import prettyplotlib as ppl import numpy as np import matplotlib.pyplot as plt from prettyplotlib import brewer2mpl primenumbers_gen = [ 'sieveOfEratosthenes', 'ambi_sieve', 'ambi_sieve_plain', 'sundaram3', 'sieve_wheel_30', 'primesfrom3to', 'primesfrom2to', 'rwh_primes', 'rwh_primes1', 'rwh_primes2', ] def human_format(num): # https://stackoverflow.com/questions/579310/formatting-long-numbers-as-strings-in-python?answertab=active#tab-top magnitude = 0 while abs(num) >= 1000: magnitude += 1 num /= 1000.0 # add more suffixes if you need them return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude]) if __name__=='__main__': # Vars n = 10000000 # number itereration generator nbcol = 5 # For decompose prime number generator nb_benchloop = 3 # Eliminate false positive value during the test (bench average time) datetimeformat = '%Y-%m-%d %H:%M:%S.%f' config = 'from __main__ import n; import lib' primenumbers_gen = { 'sieveOfEratosthenes': {'color': 'b'}, 'ambi_sieve': {'color': 'b'}, 'ambi_sieve_plain': {'color': 'b'}, 'sundaram3': {'color': 'b'}, 'sieve_wheel_30': {'color': 'b'}, # # # 'primesfrom2to': {'color': 'b'}, 'primesfrom3to': {'color': 'b'}, # 'rwh_primes': {'color': 'b'}, # 'rwh_primes1': {'color': 'b'}, 'rwh_primes2': {'color': 'b'}, } # Get n in command line if len(sys.argv)>1: n = int(sys.argv[1]) step = int(math.ceil(n / float(nbcol))) nbs = np.array([i * step for i in range(1, int(nbcol) + 1)]) set2 = brewer2mpl.get_map('Paired', 'qualitative', 12).mpl_colors print datetime.datetime.now().strftime(datetimeformat) print("Compute prime number to %(n)s" % locals()) print("") results = dict() for pgen in primenumbers_gen: results[pgen] = dict() benchtimes = list() for n in nbs: t = timeit.Timer("lib.%(pgen)s(n)" % locals(), setup=config) execute_times = t.repeat(repeat=nb_benchloop,number=1) benchtime = np.mean(execute_times) benchtimes.append(benchtime) results[pgen] = {'benchtimes':np.array(benchtimes)} fig, ax = plt.subplots(1) plt.ylabel('Computation time (in second)') plt.xlabel('Numbers computed') i = 0 for pgen in primenumbers_gen: bench = results[pgen]['benchtimes'] avgs = np.divide(bench,nbs) avg = np.average(bench, weights=nbs) # Compute linear regression A = np.vstack([nbs, np.ones(len(nbs))]).T a, b = np.linalg.lstsq(A, nbs*avgs)[0] # Plot i += 1 #label="%(pgen)s" % locals() #ppl.plot(nbs, nbs*avgs, label=label, lw=1, linestyle='--', color=set2[i % 12]) label="%(pgen)s avg" % locals() ppl.plot(nbs, a * nbs + b, label=label, lw=2, color=set2[i % 12]) print datetime.datetime.now().strftime(datetimeformat) ppl.legend(ax, loc='upper left', ncol=4) # Change x axis label ax.get_xaxis().get_major_formatter().set_scientific(False) fig.canvas.draw() labels = [human_format(int(item.get_text())) for item in ax.get_xticklabels()] ax.set_xticklabels(labels) ax = plt.gca() plt.show() 

In general if you need fast number computation python is not the best choice. Today there are a lot of faster (and complex) algorithm. For example on my computer I got 2.2 second for your code, with Mathematica I got 0.088005.

First of all: do you need set?

The easiest optimization to implement is that if you want to check whether n is prime, you only have to check to see if n is divisible by a number up to square_root(n). Is this for Project Euler?

My guess is that the fastest of all ways is to hard code the primes in your code.

So why not just write a slow script that generates another source file that has all numbers hardwired in it, and then import that source file when you run your actual program.

Of course, this works only if you know the upper bound of N at compile time, but thus is the case for (almost) all project Euler problems.

PS: I might be wrong though iff parsing the source with hard-wired primes is slower than computing them in the first place, but as far I know Python runs from compiled .pyc files so reading a binary array with all primes up to N should be bloody fast in that case.

Sorry to bother but erat2() has a serious flaw in the algorithm.

While searching for the next composite, we need to test odd numbers only. q,p both are odd; then q+p is even and doesn't need to be tested, but q+2*p is always odd. This eliminates the "if even" test in the while loop condition and saves about 30% of the runtime.

While we're at it: instead of the elegant 'D.pop(q,None)' get and delete method use 'if q in D: p=D[q],del D[q]' which is twice as fast! At least on my machine (P3-1Ghz). So I suggest this implementation of this clever algorithm:

 def erat3( ): from itertools import islice, count # q is the running integer that's checked for primeness. # yield 2 and no other even number thereafter yield 2 D = {} # no need to mark D[4] as we will test odd numbers only for q in islice(count(3),0,None,2): if q in D: # is composite p = D[q] del D[q] # q is composite. p=D[q] is the first prime that # divides it. Since we've reached q, we no longer # need it in the map, but we'll mark the next # multiple of its witnesses to prepare for larger # numbers. x = q + p+p # next odd(!) multiple while x in D: # skip composites x += p+p D[x] = p else: # is prime # q is a new prime. # Yield it and mark its first multiple that isn't # already marked in previous iterations. D[q*q] = q yield q 

The fastest method I've tried so far is based on the Python cookbook erat2 function:

 import itertools as it def erat2a( ): D = { } yield 2 for q in it.islice(it.count(3), 0, None, 2): p = D.pop(q, None) if p is None: D[q*q] = q yield q else: x = q + 2*p while x in D: x += 2*p D[x] = p 

See this answer for an explanation of the speeding-up.

I may be late to the party but will have to add my own code for this. It uses approximately n/2 in space because we don't need to store even numbers and I also make use of the bitarray python module, further draStically cutting down on memory consumption and enabling computing all primes up to 1,000,000,000

 from bitarray import bitarray def primes_to(n): size = n//2 sieve = bitarray(size) sieve.setall(1) limit = int(n**0.5) for i in range(1,limit): if sieve[i]: val = 2*i+1 sieve[(i+i*val)::val] = 0 return [2] + [2*i+1 for i, v in enumerate(sieve) if v and i > 0] python -m timeit -n10 -s "import euler" "euler.primes_to(1000000000)" 10 loops, best of 3: 46.5 sec per loop 

This was run on a 64bit 2.4GHZ MAC OSX 10.8.3

I collected several prime number sieves over time. The fastest on my computer is this:

 from time import time # 175 ms for all the primes up to the value 10**6 def primes_sieve(limit): a = [True] * limit a[0] = a[1] = False #a[2] = True for n in xrange(4, limit, 2): a[n] = False root_limit = int(limit**.5)+1 for i in xrange(3,root_limit): if a[i]: for n in xrange(i*i, limit, 2*i): a[n] = False return a LIMIT = 10**6 s=time() primes = primes_sieve(LIMIT) print time()-s 

I'm slow responding to this question but it seemed like a fun exercise. I'm using numpy which might be cheating and I doubt this method is the fastest but it should be clear. It sieves a Boolean array referring to its indices only and elicits prime numbers from the indices of all True values. No modulo needed.

 import numpy as np def ajs_primes3a(upto): mat = np.ones((upto), dtype=bool) mat[0] = False mat[1] = False mat[4::2] = False for idx in range(3, int(upto ** 0.5)+1, 2): mat[idx*2::idx] = False return np.where(mat == True)[0] 

对于Python 3

 def rwh_primes2(n): correction = (n%6>1) n = {0:n,1:n-1,2:n+4,3:n+3,4:n+2,5:n+1}[n%6] sieve = [True] * (n//3) sieve[0] = False for i in range(int(n**0.5)//3+1): if sieve[i]: k=3*i+1|1 sieve[ ((k*k)//3) ::2*k]=[False]*((n//6-(k*k)//6-1)//k+1) sieve[(k*k+4*k-2*k*(i&1))//3::2*k]=[False]*((n//6-(k*k+4*k-2*k*(i&1))//6-1)//k+1) return [2,3] + [3*i+1|1 for i in range(1,n//3-correction) if sieve[i]] 

Here is two updated (pure Python 3.6) versions of one of the fastest functions,

 from itertools import compress def rwh_primes1v1(n): """ Returns a list of primes < n for n > 2 """ sieve = bytearray([True]) * (n//2) for i in range(3,int(n**0.5)+1,2): if sieve[i//2]: sieve[i*i//2::i] = bytearray((ni*i-1)//(2*i)+1) return [2,*compress(range(3,n,2), sieve[1:])] def rwh_primes1v2(n): """ Returns a list of primes < n for n > 2 """ sieve = bytearray([True]) * (n//2+1) for i in range(1,int(n**0.5)//2+1): if sieve[i]: sieve[2*i*(i+1)::2*i+1] = bytearray((n//2-2*i*(i+1))//(2*i+1)+1) return [2,*compress(range(3,n,2), sieve[1:])] 

This is an elegant and simpler solution to find primes using a stored list. Starts with a 4 variables, you only have to test odd primes for divisors, and you only have to test up to a half of what number you are testing as a prime (no point in testing whether 9, 11, 13 divide into 17). It tests previously stored primes as divisors.`

  # Program to calculate Primes primes = [1,3,5,7] for n in range(9,100000,2): for x in range(1,(len(primes)/2)): if n % primes[x] == 0: break else: primes.append(n) print primes 

This is the way you can compare with others.

 # You have to list primes upto n nums = xrange(2, n) for i in range(2, 10): nums = filter(lambda s: s==i or s%i, nums) print nums 

So simple…