获取两个列表之间的区别

我有两个Python列表，如下所示：

temp1 = ['One', 'Two', 'Three', 'Four'] temp2 = ['One', 'Two']

我需要创建第三个列表，第一个列表中的项目不在第二个列表中。从我必须得到的例子：

 temp3 = ['Three', 'Four']

有没有周期和检查的快速方法？

 In [5]: list(set(temp1) - set(temp2)) Out[5]: ['Four', 'Three']

当心

 In [5]: set([1, 2]) - set([2, 3]) Out[5]: set([1])

你可能期望/希望它等于set([1, 3]) 。如果你想set([1, 3])作为你的答案，你需要使用set([1, 2]).symmetric_difference(set([2, 3])) 。

现有的解决方案都提供以下两种方法之一：

比O（n * m）的性能更快。
保留输入列表的顺序。

但到目前为止，没有解决方案。如果你想要两个，试试这个：

 s = set(temp2) temp3 = [x for x in temp1 if x not in s]

性能测试

 import timeit init = 'temp1 = list(range(100)); temp2 = [i * 2 for i in range(50)]' print timeit.timeit('list(set(temp1) - set(temp2))', init, number = 100000) print timeit.timeit('s = set(temp2);[x for x in temp1 if x not in s]', init, number = 100000) print timeit.timeit('[item for item in temp1 if item not in temp2]', init, number = 100000)

结果：

 4.34620224079 # ars' answer 4.2770634955 # This answer 30.7715615392 # matt b's answer

我提出的方法以及保存顺序也比设置的减法（略快）快，因为它不需要构造不必要的集合。如果第一个列表比第二个列表长得多，并且如果散列值很高，则性能差异将更加明显。这是第二个测试，证明这一点：

 init = ''' temp1 = [str(i) for i in range(100000)] temp2 = [str(i * 2) for i in range(50)] '''

结果：

 11.3836875916 # ars' answer 3.63890368748 # this answer (3 times faster!) 37.7445402279 # matt b's answer

 temp3 = [item for item in temp1 if item not in temp2]

如果你想递归的差异，我已经写了一个python包： https ： //github.com/seperman/deepdiff

安装

从PyPi安装：

 pip install deepdiff

用法示例

输入

 >>> from deepdiff import DeepDiff >>> from pprint import pprint >>> from __future__ import print_function # In case running on Python 2

相同的对象返回空

 >>> t1 = {1:1, 2:2, 3:3} >>> t2 = t1 >>> print(DeepDiff(t1, t2)) {}

项目的类型已更改

 >>> t1 = {1:1, 2:2, 3:3} >>> t2 = {1:1, 2:"2", 3:3} >>> pprint(DeepDiff(t1, t2), indent=2) { 'type_changes': { 'root[2]': { 'newtype': <class 'str'>, 'newvalue': '2', 'oldtype': <class 'int'>, 'oldvalue': 2}}}

一个项目的价值已经改变

 >>> t1 = {1:1, 2:2, 3:3} >>> t2 = {1:1, 2:4, 3:3} >>> pprint(DeepDiff(t1, t2), indent=2) {'values_changed': {'root[2]': {'newvalue': 4, 'oldvalue': 2}}}

项目添加和/或删除

 >>> t1 = {1:1, 2:2, 3:3, 4:4} >>> t2 = {1:1, 2:4, 3:3, 5:5, 6:6} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff) {'dic_item_added': ['root[5]', 'root[6]'], 'dic_item_removed': ['root[4]'], 'values_changed': {'root[2]': {'newvalue': 4, 'oldvalue': 2}}}

字符串差异

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) { 'values_changed': { 'root[2]': {'newvalue': 4, 'oldvalue': 2}, "root[4]['b']": { 'newvalue': 'world!', 'oldvalue': 'world'}}}

字符串差异2

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' '+++ \n' '@@ -1,5 +1,4 @@\n' '-world!\n' '-Goodbye!\n' '+world\n' ' 1\n' ' 2\n' ' End', 'newvalue': 'world\n1\n2\nEnd', 'oldvalue': 'world!\n' 'Goodbye!\n' '1\n' '2\n' 'End'}}} >>> >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) --- +++ @@ -1,5 +1,4 @@ -world! -Goodbye! +world 1 2 End

类型更改

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) { 'type_changes': { "root[4]['b']": { 'newtype': <class 'str'>, 'newvalue': 'world\n\n\nEnd', 'oldtype': <class 'list'>, 'oldvalue': [1, 2, 3]}}}

列表差异

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}}

清单差异2：

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) { 'iterable_item_added': {"root[4]['b'][3]": 3}, 'values_changed': { "root[4]['b'][1]": {'newvalue': 3, 'oldvalue': 2}, "root[4]['b'][2]": {'newvalue': 2, 'oldvalue': 3}}}

列表差异忽略顺序或重复:(与上面相同的字典）

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} >>> ddiff = DeepDiff(t1, t2, ignore_order=True) >>> print (ddiff) {}

包含词典的列表：

 >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) { 'dic_item_removed': ["root[4]['b'][2][2]"], 'values_changed': {"root[4]['b'][2][1]": {'newvalue': 3, 'oldvalue': 1}}}

集：

 >>> t1 = {1, 2, 8} >>> t2 = {1, 2, 3, 5} >>> ddiff = DeepDiff(t1, t2) >>> pprint (DeepDiff(t1, t2)) {'set_item_added': ['root[3]', 'root[5]'], 'set_item_removed': ['root[8]']}

命名的元组：

 >>> from collections import namedtuple >>> Point = namedtuple('Point', ['x', 'y']) >>> t1 = Point(x=11, y=22) >>> t2 = Point(x=11, y=23) >>> pprint (DeepDiff(t1, t2)) {'values_changed': {'root.y': {'newvalue': 23, 'oldvalue': 22}}}

自定义对象：

 >>> class ClassA(object): ... a = 1 ... def __init__(self, b): ... self.b = b ... >>> t1 = ClassA(1) >>> t2 = ClassA(2) >>> >>> pprint(DeepDiff(t1, t2)) {'values_changed': {'root.b': {'newvalue': 2, 'oldvalue': 1}}}

添加对象属性：

 >>> t2.c = "new attribute" >>> pprint(DeepDiff(t1, t2)) {'attribute_added': ['root.c'], 'values_changed': {'root.b': {'newvalue': 2, 'oldvalue': 1}}}

如果你真的在看性能，那就用numpy吧！

这里是完整的笔记本作为Github上的要点，列表，numpy和熊猫之间的比较。

https://gist.github.com/denfromufa/2821ff59b02e9482be15d27f2bbd4451

在这里输入图像描述

两个列表（如list1和list2）之间的区别可以使用下面的简单函数找到。

 def diff(list1, list2): c = set(list1).union(set(list2)) # or c = set(list1) | set(list2) d = set(list1).intersection(set(list2)) # or d = set(list1) & set(list2) return list(c - d)

要么

 def diff(list1, list2): return list(set(list1).symmetric_difference(set(list2))) # or return list(set(list1) ^ set(list2))

通过使用上述函数，可以使用diff(temp2, temp1)或diff(temp1, temp2)来找到diff(temp2, temp1) 。两者都会给出结果['Four', 'Three'] 。您不必担心列表的顺序或首先给出哪个列表。

Python文档参考

我会折腾，因为目前的解决方案没有产生一个元组：

 temp3 = tuple(set(temp1) - set(temp2))

或者：

 #edited using @Mark Byers idea. If you accept this one as answer, just accept his instead. temp3 = tuple(x for x in temp1 if x not in set(temp2))

像其他非元组在这个方向上产生答案一样，它保留了顺序

可以使用python XOR操作符来完成。

这将删除每个列表中的重复项
这将显示temp1与temp1的temp1和temp2的差异。

 set(temp1) ^ set(temp2)

尝试这个：

 temp3 = set(temp1) - set(temp2)

这可能比Mark的列表理解还要快：

 filterfalse(set(temp2).__contains__, temp1)

我想要一些需要两个列表，并可以做bash中的diff 。由于这个问题首先弹出搜索“python比较两个列表”，而不是非常具体，我会张贴我想出了。

使用difflib SequenceMather可以比较两个列表，比如diff 。没有其他答案会告诉你的差异发生的位置，但这一个。有些答案仅在一个方向上有所不同。一些重新排列元素。有些不处理重复。但是这个解决方案给了你两个列表之间真正的区别：

 a = 'A quick fox jumps the lazy dog'.split() b = 'A quick brown mouse jumps over the dog'.split() from difflib import SequenceMatcher for tag, i, j, k, l in SequenceMatcher(None, a, b).get_opcodes(): if tag == 'equal': print('both have', a[i:j]) if tag in ('delete', 'replace'): print(' 1st has', a[i:j]) if tag in ('insert', 'replace'): print(' 2nd has', b[k:l])

这输出：

 both have ['A', 'quick'] 1st has ['fox'] 2nd has ['brown', 'mouse'] both have ['jumps'] 2nd has ['over'] both have ['the'] 1st has ['lazy'] both have ['dog']

当然，如果你的应用程序做出与其他答案相同的假设，你将从中受益最多。但是，如果你正在寻找一个真正的diff功能，那么这是唯一的出路。

例如，没有其他答案可以处理：

 a = [1,2,3,4,5] b = [5,4,3,2,1]

但是这个呢：

  2nd has [5, 4, 3, 2] both have [1] 1st has [2, 3, 4, 5]

如果碰到TypeError: unhashable type: 'list'你需要将列表或集合转换为元组，例如：

 set(map(tuple, list_of_lists1)).symmetric_difference(set(map(tuple, list_of_lists2)))

另请参见如何比较Python中的列表/集列表？

如果对difflist的元素进行排序和设置，则可以使用朴素的方法。

 list1=[1,2,3,4,5] list2=[1,2,3] print list1[len(list2):]

或使用本机设置的方法：

 subset=set(list1).difference(list2) print subset import timeit init = 'temp1 = list(range(100)); temp2 = [i * 2 for i in range(50)]' print "Naive solution: ", timeit.timeit('temp1[len(temp2):]', init, number = 100000) print "Native set solution: ", timeit.timeit('set(temp1).difference(temp2)', init, number = 100000)

天真的解决方案：0.0787101593292

本机解决方案：0.998837615564

如果你想要更像一个变更集…可以使用计数器

 from collections import Counter def diff(a, b): """ more verbose than needs to be, for clarity """ ca, cb = Counter(a), Counter(b) to_add = cb - ca to_remove = ca - cb changes = Counter(to_add) changes.subtract(to_remove) return changes lista = ['one', 'three', 'four', 'four', 'one'] listb = ['one', 'two', 'three'] In [127]: diff(lista, listb) Out[127]: Counter({'two': 1, 'one': -1, 'four': -2}) # in order to go from lista to list b, you need to add a "two", remove a "one", and remove two "four"s In [128]: diff(listb, lista) Out[128]: Counter({'four': 2, 'one': 1, 'two': -1}) # in order to go from listb to lista, you must add two "four"s, add a "one", and remove a "two"

这是最简单的情况的答案。

这比上面那个比双向差的短，因为它只是完成问题所要求的：生成第一个列表中的列表，而不是第二个列表中的列表。

 from collections import Counter lst1 = ['One', 'Two', 'Three', 'Four'] lst2 = ['One', 'Two'] c1 = Counter(lst1) c2 = Counter(lst2) diff = list((c1 - c2).elements())

另外，根据您的可读性偏好，这也是一个不错的选择：

 diff = list((Counter(lst1) - Counter(lst2)).elements())

输出：

 ['Three', 'Four']

请注意，如果您只是遍历它，您可以删除list(...)调用。

因为这个解决方案使用计数器，所以它可以正确处理数量和许多基于集合的答案。例如在这个输入上：

 lst1 = ['One', 'Two', 'Two', 'Two', 'Three', 'Three', 'Four'] lst2 = ['One', 'Two']

输出是：

 ['Two', 'Two', 'Three', 'Three', 'Four']

这是另一个解决方案：

 def diff(a, b): xa = [i for i in set(a) if i not in b] xb = [i for i in set(b) if i not in a] return xa + xb

arulmr解决方案的单线版本

 def diff(listA, listB): return set(listA) - set(listB) | set(listA) -set(listB)

我们可以计算交集减去列表的联合：

 temp1 = ['One', 'Two', 'Three', 'Four'] temp2 = ['One', 'Two', 'Five'] set(temp1+temp2)-(set(temp1)&set(temp2)) Out: set(['Four', 'Five', 'Three'])

这可以用一行来解决。问题是给出两个列表（temp1和temp2）在第三个列表（temp3）中返回它们的差异。

 temp3 = list(set(temp1).difference(set(temp2)))

 (list(set(a)-set(b))+list(set(b)-set(a)))

我在游戏中为时太晚，但是你可以把上面提到的一些代码的性能与这个进行比较，其中两个最快的竞争者是，

 list(set(x).symmetric_difference(set(y))) list(set(x) ^ set(y))

我对编码的基本层面表示歉意。

 import time import random from itertools import filterfalse # 1 - performance (time taken) # 2 - correctness (answer - 1,4,5,6) # set performance performance = 1 numberoftests = 7 def answer(x,y,z): if z == 0: start = time.clock() lists = (str(list(set(x)-set(y))+list(set(y)-set(y)))) times = ("1 = " + str(time.clock() - start)) return (lists,times) elif z == 1: start = time.clock() lists = (str(list(set(x).symmetric_difference(set(y))))) times = ("2 = " + str(time.clock() - start)) return (lists,times) elif z == 2: start = time.clock() lists = (str(list(set(x) ^ set(y)))) times = ("3 = " + str(time.clock() - start)) return (lists,times) elif z == 3: start = time.clock() lists = (filterfalse(set(y).__contains__, x)) times = ("4 = " + str(time.clock() - start)) return (lists,times) elif z == 4: start = time.clock() lists = (tuple(set(x) - set(y))) times = ("5 = " + str(time.clock() - start)) return (lists,times) elif z == 5: start = time.clock() lists = ([tt for tt in x if tt not in y]) times = ("6 = " + str(time.clock() - start)) return (lists,times) else: start = time.clock() Xarray = [iDa for iDa in x if iDa not in y] Yarray = [iDb for iDb in y if iDb not in x] lists = (str(Xarray + Yarray)) times = ("7 = " + str(time.clock() - start)) return (lists,times) n = numberoftests if performance == 2: a = [1,2,3,4,5] b = [3,2,6] for c in range(0,n): d = answer(a,b,c) print(d[0]) elif performance == 1: for tests in range(0,10): print("Test Number" + str(tests + 1)) a = random.sample(range(1, 900000), 9999) b = random.sample(range(1, 900000), 9999) for c in range(0,n): #if c not in (1,4,5,6): d = answer(a,b,c) print(d[1])

获取两个列表之间的区别

安装

用法示例

那里有没有重复的List实现？

Python列表片语法没有明显的原因使用

查找不在列表中的元素

有任何haskell函数来连接列表与分隔符？

Pythonic的方式来打印列表项目

List.of和Arrays.asList有什么区别？

列表通过ref传递 – 帮我解释一下这个行为

根据内容过滤string列表

在Python中深入复制列表

检查平面列表中的重复项