# 查找numpy数组中相同值序列的长度（运行长度编码）

` `from pylab import * threshold = 7 print '.' d = 10*rand(10000000) print '.' b = d<threshold print '.' durations=[] for i in xrange(len(b)): if b[i] and (i==0 or not b[i-1]): counter=1 if i>0 and b[i-1] and b[i]: counter+=1 if (b[i-1] and not b[i]) or i==len(b)-1: durations.append(counter) print '.'` `

`itertools`函数虽然不是普通的原始函数，但通常速度非常快，所以请尝试一下（当然， `itertools`测量包括这个在内的各种解决scheme的时间）：

` `def runs_of_ones(bits): for bit, group in itertools.groupby(bits): if bit: yield sum(group)` `

` `def runs_of_ones_list(bits): return [sum(g) for b, g in itertools.groupby(bits) if b]` `

` `def runs_of_ones_array(bits): # make sure all runs of ones are well-bounded bounded = numpy.hstack(([0], bits, [0])) # get 1 at run starts and -1 at run ends difs = numpy.diff(bounded) run_starts, = numpy.where(difs > 0) run_ends, = numpy.where(difs < 0) return run_ends - run_starts` `

` `import numpy as np def rle(inarray): """ run length encoding. Partial credit to R rle function. Multi datatype arrays catered for including non Numpy returns: tuple (runlengths, startpositions, values) """ ia = np.array(inarray) # force numpy n = len(ia) if n == 0: return (None, None, None) else: y = np.array(ia[1:] != ia[:-1]) # pairwise unequal (string safe) i = np.append(np.where(y), n - 1) # must include last element posi z = np.diff(np.append(-1, i)) # run lengths p = np.cumsum(np.append(0, z))[:-1] # positions return(z, p, ia[i])` `

` `xx = np.random.randint(0, 5, 1000000) %timeit yy = rle(xx) 100 loops, best of 3: 18.6 ms per loop` `

` `rle([True, True, True, False, True, False, False]) Out[8]: (array([3, 1, 1, 2]), array([0, 3, 4, 5]), array([ True, False, True, False], dtype=bool)) rle(np.array([5, 4, 4, 4, 4, 0, 0])) Out[9]: (array([1, 4, 2]), array([0, 1, 5]), array([5, 4, 0])) rle(["hello", "hello", "my", "friend", "okay", "okay", "bye"]) Out[10]: (array([2, 1, 1, 2, 1]), array([0, 2, 3, 4, 6]), array(['hello', 'my', 'friend', 'okay', 'bye'], dtype='|S6'))` `

` `xx = np.random.randint(0, 2, 20) xx Out[60]: array([1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1]) am = runs_of_ones_array(xx) tb = rle(xx) am Out[63]: array([4, 5, 2, 5]) tb[0][tb[2] == 1] Out[64]: array([4, 5, 2, 5]) %timeit runs_of_ones_array(xx) 10000 loops, best of 3: 28.5 µs per loop %timeit rle(xx) 10000 loops, best of 3: 38.2 µs per loop` `

` `threshold = 7; d = 10*rand(1,100000); % Sample data b = diff([false (d < threshold) false]); durations = find(b == -1)-find(b == 1);` `

` `>>> from numpy import array, arange >>> b = array([0,0,0,1,1,1,0,0,0,1,1,1,1,0,0], dtype=bool) >>> sw = (b[:-1] ^ b[1:]); print sw [False False True False False True False False True False False False True False] >>> isw = arange(len(sw))[sw]; print isw [ 2 5 8 12] >>> lens = isw[1::2] - isw[::2]; print lens [3 4]` `

`sw`包含switch的情况下包含true， `isw`将它们转换为索引。 然后将isw的项目在`lens`两两相减。

` `import numpy as np def count_adjacent_true(arr): assert len(arr.shape) == 1 assert arr.dtype == np.bool if arr.size == 0: return np.empty(0, dtype=int), np.empty(0, dtype=int) sw = np.insert(arr[1:] ^ arr[:-1], [0, arr.shape[0]-1], values=True) swi = np.arange(sw.shape[0])[sw] offset = 0 if arr[0] else 1 lengths = swi[offset+1::2] - swi[offset:-1:2] return swi[offset:-1:2], lengths` `

testing不同的布尔1D数组（空arrays;单/多个元素;偶数/奇数长度;以`True` / `False`开头;只有`True` / `False`元素）。

` `durations = [] counter = 0 for bool in b: if bool: counter += 1 elif counter > 0: durations.append(counter) counter = 0 if counter > 0: durations.append(counter)` `