合并sorting链接列表

最近我在刷新一些基础知识,发现合并sorting链表是一个很好的挑战。 如果你有一个很好的实现,那么在这里展示它。

不知道为什么它应该是一个巨大的挑战,因为它是在这里陈述,这是一个简单的Java实现没有任何“聪明的技巧”。

//The main function public Node merge_sort(Node head) { if(head == null || head.next == null) { return head; } Node middle = getMiddle(head); //get the middle of the list Node sHalf = middle.next; middle.next = null; //split the list into two halfs return merge(merge_sort(head),merge_sort(sHalf)); //recurse on that } //Merge subroutine to merge two sorted lists public Node merge(Node a, Node b) { Node dummyHead, curr; dummyHead = new Node(); curr = dummyHead; while(a !=null && b!= null) { if(a.info <= b.info) { curr.next = a; a = a.next; } else { curr.next = b; b = b.next; } curr = curr.next; } curr.next = (a == null) ? b : a; return dummyHead.next; } //Finding the middle element of the list for splitting public Node getMiddle(Node head) { if(head == null) { return head; } Node slow, fast; slow = fast = head; while(fast.next != null && fast.next.next != null) { slow = slow.next; fast = fast.next.next; } return slow; } 

这里有更多的解释 – http://www.dontforgettothink.com/2011/11/23/merge-sort-of-linked-list

更简单/更清晰的实现可能是recursion实现,NLog(N)的执行时间更加清晰。

 typedef struct _aList { struct _aList* next; struct _aList* prev; // Optional. // some data } aList; aList* merge_sort_list_recursive(aList *list,int (*compare)(aList *one,aList *two)) { // Trivial case. if (!list || !list->next) return list; aList *right = list, *temp = list, *last = list, *result = 0, *next = 0, *tail = 0; // Find halfway through the list (by running two pointers, one at twice the speed of the other). while (temp && temp->next) { last = right; right = right->next; temp = temp->next->next; } // Break the list in two. (prev pointers are broken here, but we fix later) last->next = 0; // Recurse on the two smaller lists: list = merge_sort_list_recursive(list, compare); right = merge_sort_list_recursive(right, compare); // Merge: while (list || right) { // Take from empty lists, or compare: if (!right) { next = list; list = list->next; } else if (!list) { next = right; right = right->next; } else if (compare(list, right) < 0) { next = list; list = list->next; } else { next = right; right = right->next; } if (!result) { result=next; } else { tail->next=next; } next->prev = tail; // Optional. tail = next; } return result; } 

注意:这对recursion有一个Log(N)存储要求。 性能应该与我发布的其他策略大致相当。 通过运行合并循环while(list && right)和简单追加剩下的列表(因为我们不关心列表的末尾;知道它们被合并就足够了),这里有一个潜在的优化。

基于以下网站的优秀代码: http : //www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

整理一下,整理一下:

 typedef struct _aList { struct _aList* next; struct _aList* prev; // Optional. // some data } aList; aList *merge_sort_list(aList *list,int (*compare)(aList *one,aList *two)) { int listSize=1,numMerges,leftSize,rightSize; aList *tail,*left,*right,*next; if (!list || !list->next) return list; // Trivial case do { // For each power of two<=list length numMerges=0,left=list;tail=list=0; // Start at the start while (left) { // Do this list_len/listSize times: numMerges++,right=left,leftSize=0,rightSize=listSize; // Cut list into two halves (but don't overrun) while (right && leftSize<listSize) leftSize++,right=right->next; // Run through the lists appending onto what we have so far. while (leftSize>0 || (rightSize>0 && right)) { // Left empty, take right OR Right empty, take left, OR compare. if (!leftSize) {next=right;right=right->next;rightSize--;} else if (!rightSize || !right) {next=left;left=left->next;leftSize--;} else if (compare(left,right)<0) {next=left;left=left->next;leftSize--;} else {next=right;right=right->next;rightSize--;} // Update pointers to keep track of where we are: if (tail) tail->next=next; else list=next; // Sort prev pointer next->prev=tail; // Optional. tail=next; } // Right is now AFTER the list we just sorted, so start the next sort there. left=right; } // Terminate the list, double the list-sort size. tail->next=0,listSize<<=1; } while (numMerges>1); // If we only did one merge, then we just sorted the whole list. return list; } 

NB:这是O(NLog(N))保证,并使用O(1)资源(不recursion,不堆栈,没有)。

一个有趣的方法是维护一个堆栈,并且只在堆栈上的列表具有相同数量的元素时才合并,否则推送列表,直到用完传入列表中的元素,然后合并堆栈。

最简单的就是Gonnet + Baeza Yatesalgorithm手册 。 你用你想要的sorting元素的数目来调用它,它recursion地被平分,直到它达到一个大小为1的列表的请求,然后你从原始列表的前面剥离。 这些全部合并成一个完整大小的sorting列表。

[请注意,在第一篇文章中基于酷的堆栈被称为Online Mergesort,它在Knuth Vol 3的练习中得到了最小的提及]

这是一个可选的recursion版本。 这不需要沿着列表进行分割:我们提供一个指向头元素(不是sorting的一部分)和长度的指针,recursion函数返回一个指向sorting列表结尾的指针。

 element* mergesort(element *head,long lengtho) { long count1=(lengtho/2), count2=(lengtho-count1); element *next1,*next2,*tail1,*tail2,*tail; if (lengtho<=1) return head->next; /* Trivial case. */ tail1 = mergesort(head,count1); tail2 = mergesort(tail1,count2); tail=head; next1 = head->next; next2 = tail1->next; tail1->next = tail2->next; /* in case this ends up as the tail */ while (1) { if(cmp(next1,next2)<=0) { tail->next=next1; tail=next1; if(--count1==0) { tail->next=next2; return tail2; } next1=next1->next; } else { tail->next=next2; tail=next2; if(--count2==0) { tail->next=next1; return tail1; } next2=next2->next; } } } 

我一直在着迷于优化这个algorithm的混乱,下面是我终于到达的。 互联网和StackOverflow上的很多其他代码是非常糟糕的。 有些人试图获得列表的中间点,做recursion,在节点上留下多个循环,维持很多东西 – 所有这些都是不必要的。 MergeSort自然适合于链表,并且algorithm可以是美丽而紧凑的,但是到达这个状态并不是微不足道的。

就我所知,下面的代码保持最小数量的variables并且具有algorithm所需的逻辑步骤的最小数量(即,不使代码不可维护/不可读)。 然而,我并没有尝试尽量减lessLOC,并保持必要的空白空间,以保持可读性。 我已经通过相当严格的unit testingtesting了这个代码。

请注意,这个答案结合了其他答案的一些技巧https://stackoverflow.com/a/3032462/207661 。 尽pipe代码是用C#编写的,但转换为C ++,Java等应该是微不足道的。

 SingleListNode<T> SortLinkedList<T>(SingleListNode<T> head) where T : IComparable<T> { int blockSize = 1, blockCount; do { //Maintain two lists pointing to two blocks, left and right SingleListNode<T> left = head, right = head, tail = null; head = null; //Start a new list blockCount = 0; //Walk through entire list in blocks of size blockCount while (left != null) { blockCount++; //Advance right to start of next block, measure size of left list while doing so int leftSize = 0, rightSize = blockSize; for (;leftSize < blockSize && right != null; ++leftSize) right = right.Next; //Merge two list until their individual ends bool leftEmpty = leftSize == 0, rightEmpty = rightSize == 0 || right == null; while (!leftEmpty || !rightEmpty) { SingleListNode<T> smaller; //Using <= instead of < gives us sort stability if (rightEmpty || (!leftEmpty && left.Value.CompareTo(right.Value) <= 0)) { smaller = left; left = left.Next; --leftSize; leftEmpty = leftSize == 0; } else { smaller = right; right = right.Next; --rightSize; rightEmpty = rightSize == 0 || right == null; } //Update new list if (tail != null) tail.Next = smaller; else head = smaller; tail = smaller; } //right now points to next block for left left = right; } //terminate new list, take care of case when input list is null if (tail != null) tail.Next = null; //Lg n iterations blockSize <<= 1; } while (blockCount > 1); return head; } 

兴趣点

  • 对于需要的1个列表等空列表的情况没有特殊的处理。 这些案件“只是起作用”。
  • 很多“标准”algorithm文本有两个循环来遍历剩余元素来处理一个列表比其他列表短的情况。 上面的代码消除了它的需要。
  • 我们确保sorting稳定
  • 作为热点的内部while循环每次迭代平均评估3个expression式,我认为这是最小可以做到的。

更新:@ ideasman42已经将上面的代码翻译成了C / C ++,并提供了修复注释的build议和更多的改进。 上面的代码现在是最新的这些。

我决定在这里testing这些例子,还有一个方法,最初由Jonathan Cunningham在Pop-11中写的。 我编写了C#中的所有方法,并与一系列不同列表大小进行了比较。 我比较了Raja R Harinath的Mono eglib方法,Shital Shah的C#代码,Jayadev的Java方法,David Gamble的recursion和非recursion版本,Ed Wynn的第一个C代码(这与我的样本数据集崩溃,我没有debugging),坎宁安的版本。 完整的代码在这里: https : //gist.github.com/314e572808f29adb0e41.git 。

Mono eglib基于与Cunningham类似的思想,具有相似的速度,除非列表已经被sorting,在这种情况下,Cunningham的方法要快得多(如果它的部分sorting,eglib稍快)。 eglib代码使用一个固定的表来保存合并sortingrecursion,而坎宁安的方法通过使用递增的recursion级别来工作 – 所以它开始不使用recursion,然后是1深度recursion,然后是2深度recursion等等。这种sorting需要多less步骤。 我发现Cunningham代码更容易遵循,并且没有任何猜测涉及recursion表的大小,所以得到了我的投票。 我从这个页面尝试的其他方法慢了两倍甚至更多。

这里是Pop-11sorting的C#端口:

 /// <summary> /// Sort a linked list in place. Returns the sorted list. /// Originally by Jonathan Cunningham in Pop-11, May 1981. /// Ported to C# by Jon Meyer. /// </summary> public class ListSorter<T> where T : IComparable<T> { SingleListNode<T> workNode = new SingleListNode<T>(default(T)); SingleListNode<T> list; /// <summary> /// Sorts a linked list. Returns the sorted list. /// </summary> public SingleListNode<T> Sort(SingleListNode<T> head) { if (head == null) throw new NullReferenceException("head"); list = head; var run = GetRun(); // get first run // As we progress, we increase the recursion depth. var n = 1; while (list != null) { var run2 = GetSequence(n); run = Merge(run, run2); n++; } return run; } // Get the longest run of ordered elements from list. // The run is returned, and list is updated to point to the // first out-of-order element. SingleListNode<T> GetRun() { var run = list; // the return result is the original list var prevNode = list; var prevItem = list.Value; list = list.Next; // advance to the next item while (list != null) { var comp = prevItem.CompareTo(list.Value); if (comp > 0) { // reached end of sequence prevNode.Next = null; break; } prevItem = list.Value; prevNode = list; list = list.Next; } return run; } // Generates a sequence of Merge and GetRun() operations. // If n is 1, returns GetRun() // If n is 2, returns Merge(GetRun(), GetRun()) // If n is 3, returns Merge(Merge(GetRun(), GetRun()), // Merge(GetRun(), GetRun())) // and so on. SingleListNode<T> GetSequence(int n) { if (n < 2) { return GetRun(); } else { n--; var run1 = GetSequence(n); if (list == null) return run1; var run2 = GetSequence(n); return Merge(run1, run2); } } // Given two ordered lists this returns a list that is the // result of merging the two lists in-place (modifying the pairs // in list1 and list2). SingleListNode<T> Merge(SingleListNode<T> list1, SingleListNode<T> list2) { // we reuse a single work node to hold the result. // Simplifies the number of test cases in the code below. var prevNode = workNode; while (true) { if (list1.Value.CompareTo(list2.Value) <= 0) { // list1 goes first prevNode.Next = list1; prevNode = list1; if ((list1 = list1.Next) == null) { // reached end of list1 - join list2 to prevNode prevNode.Next = list2; break; } } else { // same but for list2 prevNode.Next = list2; prevNode = list2; if ((list2 = list2.Next) == null) { prevNode.Next = list1; break; } } } // the result is in the back of the workNode return workNode.Next; } } 

这是我实现Knuth的“列表合并sorting”(TAOCP Vol.3第2版algorithm5.2.4L)。 我会在最后添加一些评论,但是这里有一个总结:

在随机input上,它比Simon Tatham的代码运行得更快(请参阅Dave Gamble的非recursion答案,带有链接),但比Dave Gamble的recursion代码慢一些。 这比任何一个都难以理解。 至less在我的实现中,它要求每个元素都有两个指向元素的指针。 (另一种select是一个指针和一个布尔标志。)所以,这可能不是一个有用的方法。 然而,一个独特之处在于,如果input已经sorting了很长时间,那么运行速度非常快。

 element *knuthsort(element *list) { /* This is my attempt at implementing Knuth's Algorithm 5.2.4L "List merge sort" from Vol.3 of TAOCP, 2nd ed. */ element *p, *pnext, *q, *qnext, head1, head2, *s, *t; if(!list) return NULL; L1: /* This is the clever L1 from exercise 12, p.167, solution p.647. */ head1.next=list; t=&head2; for(p=list, pnext=p->next; pnext; p=pnext, pnext=p->next) { if( cmp(p,pnext) > 0 ) { t->next=NULL; t->spare=pnext; t=p; } } t->next=NULL; t->spare=NULL; p->spare=NULL; head2.next=head2.spare; L2: /* begin a new pass: */ t=&head2; q=t->next; if(!q) return head1.next; s=&head1; p=s->next; L3: /* compare: */ if( cmp(p,q) > 0 ) goto L6; L4: /* add p onto the current end, s: */ if(s->next) s->next=p; else s->spare=p; s=p; if(p->next) { p=p->next; goto L3; } else p=p->spare; L5: /* complete the sublist by adding q and all its successors: */ s->next=q; s=t; for(qnext=q->next; qnext; q=qnext, qnext=q->next); t=q; q=q->spare; goto L8; L6: /* add q onto the current end, s: */ if(s->next) s->next=q; else s->spare=q; s=q; if(q->next) { q=q->next; goto L3; } else q=q->spare; L7: /* complete the sublist by adding p and all its successors: */ s->next=p; s=t; for(pnext=p->next; pnext; p=pnext, pnext=p->next); t=p; p=p->spare; L8: /* is this end of the pass? */ if(q) goto L3; if(s->next) s->next=p; else s->spare=p; t->next=NULL; t->spare=NULL; goto L2; } 

单声道eglib中有一个非recursion的链表。

基本思想是,各种合并的控制循环与二进制整数的按位递增并行。 有O(n)合并将n个节点“插入”到合并树中,这些合并的等级对应于递增的二进制数字。 使用这个类比,只有合并树的O(log n)个节点需要物化到临时存储arrays中。

链表的另一个非recursion合并sorting示例,其中函数不是类的一部分。 这个示例代码和HP / Microsoft std :: list :: sort都使用相同的基本algorithm。 自底向上,非recursion合并sorting,使用指向数组的第一个节点的小指针(26到32),其中array [i]为0或指向大小为2的列表。 在我的系统Intel 2600K 3.4ghz上,它可以在大约1秒内将400万个具有32位无符号整数的节点作为数据sorting。

 NODE * MergeLists(NODE *, NODE *); /* prototype */ /* sort a list using array of pointers to list */ /* aList[i] == NULL or ptr to list with 2^i nodes */ #define NUMLISTS 32 /* number of lists */ NODE * SortList(NODE *pList) { NODE * aList[NUMLISTS]; /* array of lists */ NODE * pNode; NODE * pNext; int i; if(pList == NULL) /* check for empty list */ return NULL; for(i = 0; i < NUMLISTS; i++) /* init array */ aList[i] = NULL; pNode = pList; /* merge nodes into array */ while(pNode != NULL){ pNext = pNode->next; pNode->next = NULL; for(i = 0; (i < NUMLISTS) && (aList[i] != NULL); i++){ pNode = MergeLists(aList[i], pNode); aList[i] = NULL; } if(i == NUMLISTS) /* don't go beyond end of array */ i--; aList[i] = pNode; pNode = pNext; } pNode = NULL; /* merge array into one list */ for(i = 0; i < NUMLISTS; i++) pNode = MergeLists(aList[i], pNode); return pNode; } /* merge two already sorted lists */ /* compare uses pSrc2 < pSrc1 to follow the STL rule */ /* of only using < and not <= */ NODE * MergeLists(NODE *pSrc1, NODE *pSrc2) { NODE *pDst = NULL; /* destination head ptr */ NODE **ppDst = &pDst; /* ptr to head or prev->next */ if(pSrc1 == NULL) return pSrc2; if(pSrc2 == NULL) return pSrc1; while(1){ if(pSrc2->data < pSrc1->data){ /* if src2 < src1 */ *ppDst = pSrc2; pSrc2 = *(ppDst = &(pSrc2->next)); if(pSrc2 == NULL){ *ppDst = pSrc1; break; } } else { /* src1 <= src2 */ *ppDst = pSrc1; pSrc1 = *(ppDst = &(pSrc1->next)); if(pSrc1 == NULL){ *ppDst = pSrc2; break; } } } return pDst; } 

这是整个代码片断,展示了如何在java中创build链接列表并使用合并sorting对其进行sorting。 我在MergeNode类中创build节点,还有另一个MergesortLinklist类,其中有分割和合并逻辑。

 class MergeNode { Object value; MergeNode next; MergeNode(Object val) { value = val; next = null; } MergeNode() { value = null; next = null; } public Object getValue() { return value; } public void setValue(Object value) { this.value = value; } public MergeNode getNext() { return next; } public void setNext(MergeNode next) { this.next = next; } @Override public String toString() { return "MergeNode [value=" + value + ", next=" + next + "]"; } } public class MergesortLinkList { MergeNode head; static int totalnode; public MergeNode getHead() { return head; } public void setHead(MergeNode head) { this.head = head; } MergeNode add(int i) { // TODO Auto-generated method stub if (head == null) { head = new MergeNode(i); // System.out.println("head value is "+head); return head; } MergeNode temp = head; while (temp.next != null) { temp = temp.next; } temp.next = new MergeNode(i); return head; } MergeNode mergesort(MergeNode nl1) { // TODO Auto-generated method stub if (nl1.next == null) { return nl1; } int counter = 0; MergeNode temp = nl1; while (temp != null) { counter++; temp = temp.next; } System.out.println("total nodes " + counter); int middle = (counter - 1) / 2; temp = nl1; MergeNode left = nl1, right = nl1; int leftindex = 0, rightindex = 0; if (middle == leftindex) { right = left.next; } while (leftindex < middle) { leftindex++; left = left.next; right = left.next; } left.next = null; left = nl1; System.out.println(left.toString()); System.out.println(right.toString()); MergeNode p1 = mergesort(left); MergeNode p2 = mergesort(right); MergeNode node = merge(p1, p2); return node; } MergeNode merge(MergeNode p1, MergeNode p2) { // TODO Auto-generated method stub MergeNode L = p1; MergeNode R = p2; int Lcount = 0, Rcount = 0; MergeNode tempnode = null; while (L != null && R != null) { int val1 = (int) L.value; int val2 = (int) R.value; if (val1 > val2) { if (tempnode == null) { tempnode = new MergeNode(val2); R = R.next; } else { MergeNode store = tempnode; while (store.next != null) { store = store.next; } store.next = new MergeNode(val2); R = R.next; } } else { if (tempnode == null) { tempnode = new MergeNode(val1); L = L.next; } else { MergeNode store = tempnode; while (store.next != null) { store = store.next; } store.next = new MergeNode(val1); L = L.next; } } } MergeNode handle = tempnode; while (L != null) { while (handle.next != null) { handle = handle.next; } handle.next = L; L = null; } // Copy remaining elements of L[] if any while (R != null) { while (handle.next != null) { handle = handle.next; } handle.next = R; R = null; } System.out.println("----------------sorted value-----------"); System.out.println(tempnode.toString()); return tempnode; } public static void main(String[] args) { MergesortLinkList objsort = new MergesortLinkList(); MergeNode n1 = objsort.add(9); MergeNode n2 = objsort.add(7); MergeNode n3 = objsort.add(6); MergeNode n4 = objsort.add(87); MergeNode n5 = objsort.add(16); MergeNode n6 = objsort.add(81); MergeNode n7 = objsort.add(21); MergeNode n8 = objsort.add(16); MergeNode n9 = objsort.add(99); MergeNode n10 = objsort.add(31); MergeNode val = objsort.mergesort(n1); System.out.println("===============sorted values====================="); while (val != null) { System.out.println(" value is " + val.value); val = val.next; } } } 

您可以使用该合并sorting的实现,并编写自己的函数来与链接列表进行交互,就像它是一个数组一样。

合并sorting的一个缺点是它使用了O(n)空间来存储数据。 即当你合并两个子列表For链表时,这可以通过不断改变列表节点中的下一个指针来避免。 最后的实施似乎很整洁,但没有考虑到它。

 public int[] msort(int[] a) { if (a.Length > 1) { int min = a.Length / 2; int max = min; int[] b = new int[min]; int[] c = new int[max]; // dividing main array into two half arrays for (int i = 0; i < min; i++) { b[i] = a[i]; } for (int i = min; i < min + max; i++) { c[i - min] = a[i]; } b = msort(b); c = msort(c); int x = 0; int y = 0; int z = 0; while (b.Length != y && c.Length != z) { if (b[y] < c[z]) { a[x] = b[y]; //r-- x++; y++; } else { a[x] = c[z]; x++; z++; } } while (b.Length != y) { a[x] = b[y]; x++; y++; } while (c.Length != z) { a[x] = c[z]; x++; z++; } } return a; }