findstring的通用前缀

我有4个string:

"h:/a/b/c" "h:/a/b/d" "h:/a/b/e" "h:/a/c" 

我想find这些string的通用前缀,即"h:/a" 。 如何find?

通常我会用分隔符'/'分隔string,并把它放在另一个列表中,依此类推。
有没有更好的方法来做到这一点?

 string[] xs = new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/c" }; string x = string.Join("/", xs.Select(s => s.Split('/').AsEnumerable()) .Transpose() .TakeWhile(s => s.All(d => d == s.First())) .Select(s => s.First())); 

 public static IEnumerable<IEnumerable<T>> Transpose<T>( this IEnumerable<IEnumerable<T>> source) { var enumerators = source.Select(e => e.GetEnumerator()).ToArray(); try { while (enumerators.All(e => e.MoveNext())) { yield return enumerators.Select(e => e.Current).ToArray(); } } finally { Array.ForEach(enumerators, e => e.Dispose()); } } 

我的一个简短的LINQy解决scheme。

 var samples = new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/e" }; var commonPrefix = new string( samples.First().Substring(0, samples.Min(s => s.Length)) .TakeWhile((c, i) => samples.All(s => s[i] == c)).ToArray()); 

只需循环最短string的字符,并将每个字符与其他string中相同位置的字符进行比较。 他们都匹配继续前进。 只要一个不匹配,那么直到当前位置-1的string就是答案。

像(伪代码)

 int count=0; foreach(char c in shortestString) { foreach(string s in otherStrings) { if (s[count]!=c) { return shortestString.SubString(0,count-1); //need to check count is not 0 } } count+=1; } return shortestString; 

基于Sam Holder解决scheme的工作代码(请注意,h:/ a / not h:/ a是问题中最长的公共初始子string):

 using System; namespace CommonPrefix { class Program { static void Main(string[] args) { Console.WriteLine(CommonPrefix(new[] { "h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/c" })); // "h:/a/" Console.WriteLine(CommonPrefix(new[] { "abc", "abc" })); // "abc" Console.WriteLine(CommonPrefix(new[] { "abc" })); // "abc" Console.WriteLine(CommonPrefix(new string[] { })); // "" Console.WriteLine(CommonPrefix(new[] { "a", "abc" })); // "a" Console.WriteLine(CommonPrefix(new[] { "abc", "a" })); // "a" Console.ReadKey(); } private static string CommonPrefix(string[] ss) { if (ss.Length == 0) { return ""; } if (ss.Length == 1) { return ss[0]; } int prefixLength = 0; foreach (char c in ss[0]) { foreach (string s in ss) { if (s.Length <= prefixLength || s[prefixLength] != c) { return ss[0].Substring(0, prefixLength); } } prefixLength++; } return ss[0]; // all strings identical up to length of ss[0] } } } 

这是最长的常见子string问题(虽然它是一个稍微专业的情况,因为你似乎只关心前缀)。 .NET平台中的algorithm没有库的实现,您可以直接调用,但是这里链接的文章充满了如何自己完成的步骤。

我想要一个通用的string前缀,除了我想包含任何字符(如/),我不想要一些性能/花哨的东西,我可以用testing阅读。 所以我有这个: https : //github.com/fschwiet/DreamNJasmine/commit/ad802611ceacc673f2d03c30f7c8199f552b586f

 public class CommonStringPrefix { public static string Of(IEnumerable<string> strings) { var commonPrefix = strings.FirstOrDefault() ?? ""; foreach(var s in strings) { var potentialMatchLength = Math.Min(s.Length, commonPrefix.Length); if (potentialMatchLength < commonPrefix.Length) commonPrefix = commonPrefix.Substring(0, potentialMatchLength); for(var i = 0; i < potentialMatchLength; i++) { if (s[i] != commonPrefix[i]) { commonPrefix = commonPrefix.Substring(0, i); break; } } } return commonPrefix; } } 

以下是c#中triealgorithm的自定义实现( http://en.wikipedia.org/wiki/Trie )。 它用来通过前缀执行索引string。 这个类有O(1)写和读叶节点。 对于前缀search,性能为O(log n),但前缀结果的计数为O(1)。

 using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; public class StringIndex { private Dictionary<char, Item> _rootChars; public StringIndex() { _rootChars = new Dictionary<char, Item>(); } public void Add(string value, string data) { int level = 0; Dictionary<char, Item> currentChars = _rootChars; Item currentItem = null; foreach (char c in value) { if (currentChars.ContainsKey(c)) { currentItem = currentChars[c]; } else { currentItem = new Item() { Level = level, Letter = c }; currentChars.Add(c, currentItem); } currentChars = currentItem.Items; level++; } if (!currentItem.Values.Contains(data)) { currentItem.Values.Add(data); IncrementCount(value); } } private void IncrementCount(string value) { Dictionary<char, Item> currentChars = _rootChars; Item currentItem = null; foreach (char c in value) { currentItem = currentChars[c]; currentItem.Total++; currentChars = currentItem.Items; } } public void Remove(string value, string data) { Dictionary<char, Item> currentChars = _rootChars; Dictionary<char, Item> parentChars = null; Item currentItem = null; foreach (char c in value) { if (currentChars.ContainsKey(c)) { currentItem = currentChars[c]; parentChars = currentChars; currentChars = currentItem.Items; } else { return; // no matches found } } if (currentItem.Values.Contains(data)) { currentItem.Values.Remove(data); DecrementCount(value); if (currentItem.Total == 0) { parentChars.Remove(currentItem.Letter); } } } private void DecrementCount(string value) { Dictionary<char, Item> currentChars = _rootChars; Item currentItem = null; foreach (char c in value) { currentItem = currentChars[c]; currentItem.Total--; currentChars = currentItem.Items; } } public void Clear() { _rootChars.Clear(); } public int GetValuesByPrefixCount(string prefix) { int valuescount = 0; int level = 0; Dictionary<char, Item> currentChars = _rootChars; Item currentItem = null; foreach (char c in prefix) { if (currentChars.ContainsKey(c)) { currentItem = currentChars[c]; currentChars = currentItem.Items; } else { return valuescount; // no matches found } level++; } valuescount = currentItem.Total; return valuescount; } public HashSet<string> GetValuesByPrefixFlattened(string prefix) { var results = GetValuesByPrefix(prefix); return new HashSet<string>(results.SelectMany(x => x)); } public List<HashSet<string>> GetValuesByPrefix(string prefix) { var values = new List<HashSet<string>>(); int level = 0; Dictionary<char, Item> currentChars = _rootChars; Item currentItem = null; foreach (char c in prefix) { if (currentChars.ContainsKey(c)) { currentItem = currentChars[c]; currentChars = currentItem.Items; } else { return values; // no matches found } level++; } ExtractValues(values, currentItem); return values; } public void ExtractValues(List<HashSet<string>> values, Item item) { foreach (Item subitem in item.Items.Values) { ExtractValues(values, subitem); } values.Add(item.Values); } public class Item { public int Level { get; set; } public char Letter { get; set; } public int Total { get; set; } public HashSet<string> Values { get; set; } public Dictionary<char, Item> Items { get; set; } public Item() { Values = new HashSet<string>(); Items = new Dictionary<char, Item>(); } } } 

这里是如何使用这个类的unit testing和示例代码。

 using System; using Microsoft.VisualStudio.TestTools.UnitTesting; [TestClass] public class StringIndexTest { [TestMethod] public void AddAndSearchValues() { var si = new StringIndex(); si.Add("abcdef", "1"); si.Add("abcdeff", "2"); si.Add("abcdeffg", "3"); si.Add("bcdef", "4"); si.Add("bcdefg", "5"); si.Add("cdefg", "6"); si.Add("cdefgh", "7"); var output = si.GetValuesByPrefixFlattened("abc"); Assert.IsTrue(output.Contains("1") && output.Contains("2") && output.Contains("3")); } [TestMethod] public void RemoveAndSearch() { var si = new StringIndex(); si.Add("abcdef", "1"); si.Add("abcdeff", "2"); si.Add("abcdeffg", "3"); si.Add("bcdef", "4"); si.Add("bcdefg", "5"); si.Add("cdefg", "6"); si.Add("cdefgh", "7"); si.Remove("abcdef", "1"); var output = si.GetValuesByPrefixFlattened("abc"); Assert.IsTrue(!output.Contains("1") && output.Contains("2") && output.Contains("3")); } [TestMethod] public void Clear() { var si = new StringIndex(); si.Add("abcdef", "1"); si.Add("abcdeff", "2"); si.Add("abcdeffg", "3"); si.Add("bcdef", "4"); si.Add("bcdefg", "5"); si.Add("cdefg", "6"); si.Add("cdefgh", "7"); si.Clear(); var output = si.GetValuesByPrefix("abc"); Assert.IsTrue(output.Count == 0); } [TestMethod] public void AddAndSearchValuesCount() { var si = new StringIndex(); si.Add("abcdef", "1"); si.Add("abcdeff", "2"); si.Add("abcdeffg", "3"); si.Add("bcdef", "4"); si.Add("bcdefg", "5"); si.Add("cdefg", "6"); si.Add("cdefgh", "7"); si.Remove("cdefgh", "7"); var output1 = si.GetValuesByPrefixCount("abc"); var output2 = si.GetValuesByPrefixCount("b"); var output3 = si.GetValuesByPrefixCount("bc"); var output4 = si.GetValuesByPrefixCount("ca"); Assert.IsTrue(output1 == 3 && output2 == 2 && output3 == 2 && output4 == 0); } } 

有关如何改善这个类的任何build议,欢迎:)

我的方法是,取第一个string。 一封信一封信,另外一封信在同一个索引位置得到相同的字母,如果不匹配则停止。 删除最后一个字符,如果它是分隔符。

 var str_array = new string[]{"h:/a/b/c", "h:/a/b/d", "h:/a/b/e", "h:/a/c"}; var separator = '/'; // get longest common prefix (optinally use ToLowerInvariant) var ret = str_array.Any() ? str_array.First().TakeWhile((s,i) => str_array.All(e => Char.ToLowerInvariant(s) == Char.ToLowerInvariant(e.Skip(i).Take(1).SingleOrDefault()))) : String.Empty; // remove last character if it's a separator (optional) if (ret.LastOrDefault() == separator) ret = ret.Take(ret.Count() -1); string prefix = new String(ret.ToArray()); 

我需要寻找不同string中最长的共同前缀。 我想出了:

 private string FindCommonPrefix(List<string> list) { List<string> prefixes = null; for (int len = 1; ; ++len) { var x = list.Where(s => s.Length >= len) .GroupBy(c => c.Substring(0,len)) .Where(grp => grp.Count() > 1) .Select(grp => grp.Key) .ToList(); if (!x.Any()) { break; } // Copy last list prefixes = new List<string>(x); } return prefixes == null ? string.Empty : prefixes.First(); } 

如果有多个相同长度的前缀,则任意返回find的第一个前缀。 也是区分大小写的。 这两点都可以由读者解决!

我写了这个ICollection扩展,从一组url中找出最长的公共基Uri。

因为它只检查每个斜杠上的string集合,所以通用前缀例程(不包括我的低效algorithm!)会更快一些。 这是冗长的,但容易遵循…我最喜欢的代码types;-)

忽略“http://”和“https://”,以及大小写。

  /// <summary> /// Resolves a common base Uri from a list of Uri strings. Ignores case. Includes the last slash /// </summary> /// <param name="collectionOfUriStrings"></param> /// <returns>Common root in lowercase</returns> public static string GetCommonUri(this ICollection<string> collectionOfUriStrings) { //Check that collection contains entries if (!collectionOfUriStrings.Any()) return string.Empty; //Check that the first is no zero length var firstUri = collectionOfUriStrings.FirstOrDefault(); if(string.IsNullOrEmpty(firstUri)) return string.Empty; //set starting position to be passed '://' int previousSlashPos = firstUri.IndexOf("://", StringComparison.OrdinalIgnoreCase) + 2; int minPos = previousSlashPos + 1; //results return must have a slash after this initial position int nextSlashPos = firstUri.IndexOf("/", previousSlashPos + 1, StringComparison.OrdinalIgnoreCase); //check if any slashes if (nextSlashPos == -1) return string.Empty; do { string common = firstUri.Substring(0, nextSlashPos + 1); //check against whole collection foreach (var collectionOfUriString in collectionOfUriStrings) { if (!collectionOfUriString.StartsWith(common, StringComparison.OrdinalIgnoreCase)) { //return as soon as a mismatch is found return previousSlashPos > minPos ? firstUri.Substring(0, previousSlashPos + 1).ToLower() : string.Empty ; } } previousSlashPos = nextSlashPos; nextSlashPos = firstUri.IndexOf("/", previousSlashPos + 1, StringComparison.OrdinalIgnoreCase); } while (nextSlashPos != -1); return previousSlashPos > minPos ? firstUri.Substring(0, previousSlashPos + 1).ToLower() : string.Empty; }