# Python是否有一个内置的string自然sorting函数？

``['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13']` `

` `['Elm11', 'Elm12', 'Elm2', 'elm0', 'elm1', 'elm10', 'elm13', 'elm9']` `

` `>>> from natsort import natsorted, ns >>> x = ['Elm11', 'Elm12', 'Elm2', 'elm0', 'elm1', 'elm10', 'elm13', 'elm9'] >>> natsorted(x, key=lambda y: y.lower()) ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13'] >>> natsorted(x, alg=ns.IGNORECASE) # or alg=ns.IC ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13']` `

` `>>> from natsort import natsort_keygen, ns >>> l1 = ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13'] >>> l2 = l1[:] >>> natsort_key1 = natsort_keygen(key=lambda y: y.lower()) >>> l1.sort(key=natsort_key1) >>> l1 ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13'] >>> natsort_key2 = natsort_keygen(alg=ns.IGNORECASE) >>> l2.sort(key=natsort_key2) >>> l2 ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13']` `

` `import re def natural_sort(l): convert = lambda text: int(text) if text.isdigit() else text.lower() alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] return sorted(l, key = alphanum_key)` `

` `['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13']` `

` `import re def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)]` `

` `lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split('(\d+)', s)]` `

` `import re def natural_sort(list, key=lambda s:s): """ Sort the list into natural alphanumeric order. """ def get_alphanum_key_func(key): convert = lambda text: int(text) if text.isdigit() else text return lambda s: [convert(c) for c in re.split('([0-9]+)', key(s))] sort_key = get_alphanum_key_func(key) list.sort(key=sort_key)` `

` `my_list = [{'name':'b'}, {'name':'10'}, {'name':'a'}, {'name':'1'}, {'name':'9'}] natural_sort(my_list, key=lambda x: x['name']) print my_list [{'name': '1'}, {'name': '9'}, {'name': '10'}, {'name': 'a'}, {'name': 'b'}]` `
` `data = ['elm13', 'elm9', 'elm0', 'elm1', 'Elm11', 'Elm2', 'elm10']` `

` `data.sort(key=lambda x: '{0:0>8}'.format(x).lower())` `

` `print(data) >>> ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'elm13']` `

` `for elm in data: print('{0:0>8}'.format(elm).lower()) >>> 0000elm0 0000elm1 0000elm2 0000elm9 000elm10 000elm11 000elm13` `

` `data=['Elm11', 'Elm12', 'Elm2', 'elm0', 'elm1', 'elm10', 'elm13', 'elm9']` `

` `data.sort(key=lambda x : int(x[3:]))` `

` `sorted_data=sorted(data, key=lambda x : int(x[3:]))` `

• 使用python进行testing（3.5.1）
• 包括一个额外的列表来演示当数字在string中间时它的工作原理
• 没有testing，但是，我假设，如果您的列表是相当大的，事先编译正则expression式会更有效率
• 如果这是一个错误的假设，我相信有人会纠正我

Quicky

` `from re import compile, split dre = compile(r'(\d+)') mylist.sort(key=lambda l: [int(s) if s.isdigit() else s.lower() for s in split(dre, l)])` `

` `#!/usr/bin/python3 # coding=utf-8 """ Natural-Sort Test """ from re import compile, split dre = compile(r'(\d+)') mylist = ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13', 'elm'] mylist2 = ['e0lm', 'e1lm', 'E2lm', 'e9lm', 'e10lm', 'E12lm', 'e13lm', 'elm', 'e01lm'] mylist.sort(key=lambda l: [int(s) if s.isdigit() else s.lower() for s in split(dre, l)]) mylist2.sort(key=lambda l: [int(s) if s.isdigit() else s.lower() for s in split(dre, l)]) print(mylist) # ['elm', 'elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13'] print(mylist2) # ['e0lm', 'e1lm', 'e01lm', 'E2lm', 'e9lm', 'e10lm', 'E12lm', 'e13lm', 'elm']` `

• `from os.path import split`
• 你将需要区分import

• Python Documentation – sorting如何
• 人类sorting：自然sorting
• 人类sorting
• 贡献者/评论员对这个和引用的职位

` `alist=["something1", "something12", "something17", "something2", "something25and_then_33", "something25and_then_34", "something29", "beta1.1", "beta2.3.0", "beta2.33.1", "a001", "a2", "z002", "z1"] def key(k): nums=set(list("0123456789")) chars=set(list(k)) chars=chars-nums for i in range(len(k)): for c in chars: k=k.replace(c+"0",c) l=list(k) base=10 j=0 for i in range(len(l)-1,-1,-1): try: l[i]=int(l[i])*base**j j+=1 except: j=0 l=tuple(l) print l return l print sorted(alist,key=key)` `

` `('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 1) ('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 10, 2) ('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 10, 7) ('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 2) ('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 20, 5, 'a', 'n', 'd', '_', 't', 'h', 'e', 'n', '_', 30, 3) ('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 20, 5, 'a', 'n', 'd', '_', 't', 'h', 'e', 'n', '_', 30, 4) ('s', 'o', 'm', 'e', 't', 'h', 'i', 'n', 'g', 20, 9) ('b', 'e', 't', 'a', 1, '.', 1) ('b', 'e', 't', 'a', 2, '.', 3, '.') ('b', 'e', 't', 'a', 2, '.', 30, 3, '.', 1) ('a', 1) ('a', 2) ('z', 2) ('z', 1) ['a001', 'a2', 'beta1.1', 'beta2.3.0', 'beta2.33.1', 'something1', 'something2', 'something12', 'something17', 'something25and_then_33', 'something25and_then_34', 'something29', 'z1', 'z002']` `

` `def natural_sort_key(string_or_number): """ by Scott S. Lawton <scott@ProductArchitect.com> 2014-12-11; public domain and/or CC0 license handles cases where simple 'int' approach fails, eg ['0.501', '0.55'] floating point with different number of significant digits [0.01, 0.1, 1] already numeric so regex and other string functions won't work (and aren't required) ['elm1', 'Elm2'] ASCII vs. letters (not case sensitive) """ def try_float(astring): try: return float(astring) except: return astring if isinstance(string_or_number, basestring): string_or_number = string_or_number.lower() if len(re.findall('[.]\d', string_or_number)) <= 1: # assume a floating point value, eg to correctly sort ['0.501', '0.55'] # '.' for decimal is locale-specific, eg correct for the Anglosphere and Asia but not continental Europe return [try_float(s) for s in re.split(r'([\d.]+)', string_or_number)] else: # assume distinct fields, eg IP address, phone number with '.', etc. # caveat: might want to first split by whitespace # TBD: for unicode, replace isdigit with isdecimal return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_or_number)] else: # consider: add code to recurse for lists/tuples and perhaps other iterables return string_or_number` `

testing代码和几个链接（打开和closuresStackOverflow）在这里： http : //productarchitect.com/code/better-natural-sort.py

`functools.cmp_to_key()`很可能与pythonsorting的底层实现紧密相关。 此外， cmp参数是遗留的。 现代的方法是将input项目转换为支持所需的丰富比较操作的对象。

` `Python 2.7.12 (default, Sep 29 2016, 13:30:34) >>> (0,"foo") < ("foo",0) True` `
` `Python 3.5.2 (default, Oct 14 2016, 12:54:53) >>> (0,"foo") < ("foo",0) Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: unorderable types: int() < str()` `

sorting字符被复制强制按大小写sorting，大小写replace强制小写字母先sorting; 这是“自然sorting”的典型定义。 我无法决定分组的types; 有些人可能更喜欢以下，这也带来了显着的性能好处：

` `d = lambda s: s.lower()+s.swapcase()` `

` `import functools import itertools @functools.total_ordering class NaturalStringA(str): def __repr__(self): return "{}({})".format\ ( type(self).__name__ , super().__repr__() ) d = lambda c, s: [ c.NaturalStringPart("".join(v)) for k,v in itertools.groupby(s, c.isdigit) ] d = classmethod(d) @functools.total_ordering class NaturalStringPart(str): d = lambda s: "".join(c.lower()+c.swapcase() for c in s) d = staticmethod(d) def __lt__(self, other): if not isinstance(self, type(other)): return NotImplemented try: return int(self) < int(other) except ValueError: if self.isdigit(): return True elif other.isdigit(): return False else: return self.d(self) < self.d(other) def __eq__(self, other): if not isinstance(self, type(other)): return NotImplemented try: return int(self) == int(other) except ValueError: if self.isdigit() or other.isdigit(): return False else: return self.d(self) == self.d(other) __le__ = object.__le__ __ne__ = object.__ne__ __gt__ = object.__gt__ __ge__ = object.__ge__ def __lt__(self, other): return self.d(self) < self.d(other) def __eq__(self, other): return self.d(self) == self.d(other) __le__ = object.__le__ __ne__ = object.__ne__ __gt__ = object.__gt__ __ge__ = object.__ge__` `
` `import functools import itertools @functools.total_ordering class NaturalStringB(str): def __repr__(self): return "{}({})".format\ ( type(self).__name__ , super().__repr__() ) d = lambda s: "".join(c.lower()+c.swapcase() for c in s) d = staticmethod(d) def __lt__(self, other): if not isinstance(self, type(other)): return NotImplemented groups = map(lambda i: itertools.groupby(i, type(self).isdigit), (self, other)) zipped = itertools.zip_longest(*groups) for s,o in zipped: if s is None: return True if o is None: return False s_k, s_v = s[0], "".join(s[1]) o_k, o_v = o[0], "".join(o[1]) if s_k and o_k: s_v, o_v = int(s_v), int(o_v) if s_v == o_v: continue return s_v < o_v elif s_k: return True elif o_k: return False else: s_v, o_v = self.d(s_v), self.d(o_v) if s_v == o_v: continue return s_v < o_v return False def __eq__(self, other): if not isinstance(self, type(other)): return NotImplemented groups = map(lambda i: itertools.groupby(i, type(self).isdigit), (self, other)) zipped = itertools.zip_longest(*groups) for s,o in zipped: if s is None or o is None: return False s_k, s_v = s[0], "".join(s[1]) o_k, o_v = o[0], "".join(o[1]) if s_k and o_k: s_v, o_v = int(s_v), int(o_v) if s_v == o_v: continue return False elif s_k or o_k: return False else: s_v, o_v = self.d(s_v), self.d(o_v) if s_v == o_v: continue return False return True __le__ = object.__le__ __ne__ = object.__ne__ __gt__ = object.__gt__ __ge__ = object.__ge__` `
` `import functools import itertools import enum class OrderingType(enum.Enum): PerWordSwapCase = lambda s: s.lower()+s.swapcase() PerCharacterSwapCase = lambda s: "".join(c.lower()+c.swapcase() for c in s) class NaturalOrdering: @classmethod def by(cls, ordering): def wrapper(string): return cls(string, ordering) return wrapper def __init__(self, string, ordering=OrderingType.PerCharacterSwapCase): self.string = string self.groups = [ (k,int("".join(v))) if k else (k,ordering("".join(v))) for k,v in itertools.groupby(string, str.isdigit) ] def __repr__(self): return "{}({})".format\ ( type(self).__name__ , self.string ) def __lesser(self, other, default): if not isinstance(self, type(other)): return NotImplemented for s,o in itertools.zip_longest(self.groups, other.groups): if s is None: return True if o is None: return False s_k, s_v = s o_k, o_v = o if s_k and o_k: if s_v == o_v: continue return s_v < o_v elif s_k: return True elif o_k: return False else: if s_v == o_v: continue return s_v < o_v return default def __lt__(self, other): return self.__lesser(other, default=False) def __le__(self, other): return self.__lesser(other, default=True) def __eq__(self, other): if not isinstance(self, type(other)): return NotImplemented for s,o in itertools.zip_longest(self.groups, other.groups): if s is None or o is None: return False s_k, s_v = s o_k, o_v = o if s_k and o_k: if s_v == o_v: continue return False elif s_k or o_k: return False else: if s_v == o_v: continue return False return True # functools.total_ordering doesn't create single-call wrappers if both # __le__ and __lt__ exist, so do it manually. def __gt__(self, other): op_result = self.__le__(other) if op_result is NotImplemented: return op_result return not op_result def __ge__(self, other): op_result = self.__lt__(other) if op_result is NotImplemented: return op_result return not op_result # __ne__ is the only implied ordering relationship, it automatically # delegates to __eq__` `
` `>>> import natsort >>> import timeit >>> l1 = ['Apple', 'corn', 'apPlE', 'arbour', 'Corn', 'Banana', 'apple', 'banana'] >>> l2 = list(map(str, range(30))) >>> l3 = ["{} {}".format(x,y) for x in l1 for y in l2] >>> print(timeit.timeit('sorted(l3+["0"], key=NaturalStringA)', number=10000, globals=globals())) 362.4729259099986 >>> print(timeit.timeit('sorted(l3+["0"], key=NaturalStringB)', number=10000, globals=globals())) 189.7340817489967 >>> print(timeit.timeit('sorted(l3+["0"], key=NaturalOrdering.by(OrderingType.PerCharacterSwapCase))', number=10000, globals=globals())) 69.34636392899847 >>> print(timeit.timeit('natsort.natsorted(l3+["0"], alg=natsort.ns.GROUPLETTERS | natsort.ns.LOWERCASEFIRST)', number=10000, globals=globals())) 98.2531585780016` `

` `to_order= [e2,E1,e5,E4,e3] ordered= sorted(to_order, key= lambda x: x.lower()) # ordered should be [E1,e2,e3,E4,e5]` `
` `>>> import re >>> sorted(lst, key=lambda x: int(re.findall(r'\d+\$', x)[0])) ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13']` `