# 如何在维护秩序的同时从列表中删除重复项？

``def uniq(input): output = [] for x in input: if x not in output: output.append(x) return output` `

（感谢解放这个代码示例 。）

` `def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))]` `

O （1）插入，删除和每个操作的成员检查。

` `>>> from more_itertools import unique_everseen >>> items = [1, 2, 0, 1, 3, 2] >>> list(unique_everseen(items)) [1, 2, 0, 3]` `

` `def unique_everseen(iterable, key=None): "List unique elements, preserving order. Remember all elements ever seen." # unique_everseen('AAAABBBCCDAABBB') --> ABCD # unique_everseen('ABBCcAD', str.lower) --> ABCD seen = set() seen_add = seen.add if key is None: for element in filterfalse(seen.__contains__, iterable): seen_add(element) yield element else: for element in iterable: k = key(element) if k not in seen: seen_add(k) yield element` `

` `>>> from collections import OrderedDict >>> items = [1, 2, 0, 1, 3, 2] >>> list(OrderedDict.fromkeys(items)) [1, 2, 0, 3]` `

` `seen = set() [x for x in seq if x not in seen and not seen.add(x)]` `

` `not seen.add(x)` `

` `sequence = ['1', '2', '3', '3', '6', '4', '5', '6'] unique = [] [unique.append(item) for item in sequence if item not in unique]` `

` `from itertools import groupby [ key for key,_ in groupby(sortedList)]` `

` `>>> from collections import OrderedDict >>> list(OrderedDict.fromkeys('abracadabra')) ['a', 'b', 'r', 'c', 'd']` `

[更新]截至CPython 3.6，字典是紧凑的和有序的。 虽然到目前为止还没有保证sorting行为，但在Python 3.6中不会改变。 所以，你可以使用`list(dict.fromkeys('abracadabra'))`

## 你可以试试这个：

` `list1 = ['b','c','d','b','c','a','a'] list2 = list(set(list1)) list2.sort(key=list1.index) print list2` `

## 或者类似的你可以这样做：

` `list1 = ['b','c','d','b','c','a','a'] list2 = sorted(set(list1),key=list1.index) print list2` `

## 你也可以这样做：

` `list1 = ['b','c','d','b','c','a','a'] list2 = [] for i in list1: if not i in list2: list2.append(i)` print list2` `

## 它也可以这样写：

` `list1 = ['b','c','d','b','c','a','a'] list2 = [] [list2.append(i) for i in list1 if not i in list2] print list2` `

`itertools`食谱有一个function，使用`seen`设置技术，但是：

• 处理标准的`key`function。
• 不使用不恰当的黑客。
• 通过预先绑定`seen.add`优化循环，而不是查看N次。 （ `f7`也是这样，但有些版本不这样做。）
• 通过使用`ifilterfalse`优化循环，所以你只需要遍历Python中的独特元素，而不是所有的元素。 （当然，你仍然可以在`ifilterfalse`里遍历所有的内容，但是这是C语言，而且要快得多。）

` `def unique(iterable): seen = set() seen_add = seen.add for element in itertools.ifilterfalse(seen.__contains__, iterable): seen_add(element) yield element` `

` `def f7_noHash(seq) seen = set() return [ x for x in seq if str( x ) not in seen and not seen.add( str( x ) )]` `

5倍快速减less变体，但更复杂

` `>>> l = [5, 6, 6, 1, 1, 2, 2, 3, 4] >>> reduce(lambda r, v: v in r[1] and r or (r[0].append(v) or r[1].add(v)) or r, l, ([], set()))[0] [5, 6, 1, 2, 3, 4]` `

` `default = (list(), set()) # use list to keep order # use set to make lookup faster def reducer(result, item): if item not in result[1]: result[0].append(item) result[1].add(item) return result >>> reduce(reducer, l, default)[0] [5, 6, 1, 2, 3, 4]` `

` `def unique(my_list): return [x for x in my_list if x not in locals()['_[1]']]` `

` `l1 = [1, 2, 3, 4, 1, 2, 3, 4, 5] l2 = [x for x in l1 if x not in locals()['_[1]']] print l2` `

` `[1, 2, 3, 4, 5]` `

` `>>> from iteration_utilities import unique_everseen >>> lst = [1,1,1,2,3,2,2,2,1,3,4] >>> list(unique_everseen(lst)) [1, 2, 3, 4]` `

# 计时

` `from iteration_utilities import unique_everseen from collections import OrderedDict from more_itertools import unique_everseen as more_itertools_unique_everseen def f7(seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] import random # no duplicates lst = [random.random() for _ in range(10000)] # ordered by: fastest to slowest %timeit list(unique_everseen(lst)) # 100 loops, best of 3: 4.51 ms per loop %timeit f7(lst) # 100 loops, best of 3: 8.68 ms per loop %timeit list(more_itertools_unique_everseen(lst)) # 100 loops, best of 3: 10 ms per loop %timeit list(OrderedDict.fromkeys(lst)) # 100 loops, best of 3: 11.7 ms per loop # more duplicates lst = [random.randint(0, 500) for _ in range(10000)] %timeit list(unique_everseen(lst)) # 1000 loops, best of 3: 1.38 ms per loop %timeit f7(lst) # 100 loops, best of 3: 1.96 ms per loop %timeit list(OrderedDict.fromkeys(lst)) # 1000 loops, best of 3: 1.98 ms per loop %timeit list(more_itertools_unique_everseen(lst)) # 100 loops, best of 3: 2.29 ms per loop` `

` `>>> lst = [{1}, {1}, {2}, {1}, {3}] >>> list(unique_everseen(lst)) [{1}, {2}, {3}]` `

1免责声明：我是该软件包的作者。

` `import pandas as pd my_list = range(5) + range(5) # [0, 1, 2, 3, 4, 0, 1, 2, 3, 4] >>> pd.Series(my_list).drop_duplicates().tolist() # Output: # [0, 1, 2, 3, 4]` `

` `def unique(lst): return [] if lst==[] else [lst[0]] + unique(filter(lambda x: x!= lst[0], lst[1:]))` `

` `In [118]: unique([1,5,1,1,4,3,4]) Out[118]: [1, 5, 4, 3]` `

` `In [122]: %timeit unique(np.random.randint(5, size=(1))) 10000 loops, best of 3: 25.3 us per loop In [123]: %timeit unique(np.random.randint(5, size=(10))) 10000 loops, best of 3: 42.9 us per loop In [124]: %timeit unique(np.random.randint(5, size=(100))) 10000 loops, best of 3: 132 us per loop In [125]: %timeit unique(np.random.randint(5, size=(1000))) 1000 loops, best of 3: 1.05 ms per loop In [126]: %timeit unique(np.random.randint(5, size=(10000))) 100 loops, best of 3: 11 ms per loop` `

` `import operator def unique(lst, cmp_op=operator.ne): return [] if lst==[] else [lst[0]] + unique(filter(lambda x: cmp_op(x, lst[0]), lst[1:]), cmp_op)` `

` `def test_round(x,y): return round(x) != round(y)` `

` `In [6]: unique([1.2, 5, 1.9, 1.1, 4.2, 3, 4.8], test_round) Out[6]: [1.2, 5, 1.9, 4.2, 3]` `

` `[l[i] for i in range(len(l)) if l.index(l[i]) == i]` `
` `l = [1,2,2,3,3,...] n = [] n.extend(ele for ele in l if ele not in set(n))` `

MizardX的答案提供了多种方法的好集合。

` `mylist = [x for i,x in enumerate(mylist) if x not in mylist[i+1:]]` `

`_sorted_`一个`numpy`数组比较有效的方法：

` `b = np.array([1,3,3, 8, 12, 12,12]) numpy.hstack([b[0], [x[0] for x in zip(b[1:], b[:-1]) if x[0]!=x[1]]])` `

` `array([ 1, 3, 8, 12])` `

` `def uniquefy_list(a): return uniquefy_list(a[1:]) if a[0] in a[1:] else [a[0]]+uniquefy_list(a[1:]) if len(a)>1 else [a[0]]` `

` `reduce(lambda x, y: x + y if y[0] not in x else x, map(lambda x: [x],lst))` `

…应该工作，但纠正我，如果我错了

` `l = list(set(l))` `

…如果你的列表项不可哈希，那么它就不起作用。

` `l = reduce(lambda x, y: x if y in x else x + [y], l, [])` `

` `>>> list1 = [ 1,1,2,2,3,3 ] >>> [ list1.pop(i) for i in range(len(list1))[::-1] if list1.count(list1[i]) > 1 ] [1, 2, 3]` `

` `list1 = [0, 2, 4, 9] for x in range(0, 7): list1.append(x)` `

` `list1 = [0, 2, 4, 9] for x in range(0, 7) if x not in list1: list1.append(x)` `

.get（True）XOR .setdefault（False）

` `# Explanation of d.get(x,True) != d.setdefault(x,False) # # x in d | d[x] | A = d.get(x,True) | x in d | B = d.setdefault(x,False) | x in d | d[x] | A xor B # False | None | True (1) | False | False (2) | True | False | True # True | False | False (3) | True | False (4) | True | False | False # # Notes # (1) x is not in the dictionary, so get(x,<default>) returns True but does __not__ add the value to the dictionary # (2) x is not in the dictionary, so setdefault(x,<default>) adds the {x:False} and returns False # (3) since x is in the dictionary, the <default> argument is ignored, and the value of the key is returned, which was # set to False in (2) # (4) since the key is already in the dictionary, its value is returned directly and the argument is ignored # # A != B is how to do boolean XOR in Python # def sort_with_order(s): d = dict() return [x for x in s if d.get(x,True) != d.setdefault(x,False)]` `

__OVERRIDING ___missing_____（受此答案启发）

` `class Tracker(dict): # returns True if missing, otherwise sets the value to False # so next time d[key] is called, the value False will be returned # and __missing__ will not be called again def __missing__(self, key): self[key] = False return True t = Tracker() unique_with_order = [x for x in samples if t[x]]` `

2.5版本中的新增function：如果dict的子类定义了缺less_____（）的方法，如果键值不存在，则d [key]操作使用键值作为参数调用该方法。 然后，d [key]操作返回或提出由_____缺less的_____（key）调用返回或引发的任何操作，如果该键不存在的话。 没有其他操作或方法援引_____缺less_____（）。 如果_____缺less_____（）未定义，则引发KeyError。 _____缺less_____（）必须是一种方法; 它不能是一个实例variables。 有关示例，请参阅collections.defaultdict。

` `def uniquify(s): if len(s) < 2: return s return uniquify(s[:-1]) + [s[-1]] * (s[-1] not in s[:-1])` `

` ` import pandas as pd import numpy as np uniquifier = lambda alist: pd.Series(alist).drop_duplicates().tolist() # from the chosen answer def f7(seq): seen = set() seen_add = seen.add return [ x for x in seq if not (x in seen or seen_add(x))] alist = np.random.randint(low=0, high=1000, size=10000).tolist() print uniquifier(alist) == f7(alist) # True` `

` ` In [104]: %timeit f7(alist) 1000 loops, best of 3: 1.3 ms per loop In [110]: %timeit uniquifier(alist) 100 loops, best of 3: 4.39 ms per loop` `

` `def deduplicate(l): count = {} (read,write) = (0,0) while read < len(l): if l[read] in count: read += 1 continue count[l[read]] = True l[write] = l[read] read += 1 write += 1 return l[0:write]` `

` `text = "ask not what your country can do for you ask what you can do for your country" sentence = text.split(" ") noduplicates = [(sentence[i]) for i in range (0,len(sentence)) if sentence[i] not in sentence[:i]] print(noduplicates)` `

` `['ask', 'not', 'what', 'your', 'country', 'can', 'do', 'for', 'you']` `

` `def unique(nums): unique = [] for n in nums: if n not in unique: unique.append(n) return unique` `

` `a_list = ["a", "b", "a", "c"] sorted_list = [x[0] for x in (sorted({x:a_list.index(x) for x in set(a_list)}.items(), key=lambda x: x[1]))] print sorted_list` `

` `>>> l = [3, 4, 3, 6, 4, 1, 4, 8] >>> l = [l[i] for i in range(len(l)) if i == l.index(l[i])] >>> l = [3, 4, 6, 1, 8]` `