# 在python大量stringreplace？

``str = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog"` `

` `dict = {"&y":"\033[0;30m", "&c":"\033[0;31m", "&b":"\033[0;32m", "&Y":"\033[0;33m", "&u":"\033[0;34m"}` `

` `mydict = {"&y":"\033[0;30m", "&c":"\033[0;31m", "&b":"\033[0;32m", "&Y":"\033[0;33m", "&u":"\033[0;34m"} mystr = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog" for k, v in mydict.iteritems(): mystr = mystr.replace(k, v) print mystr The ←[0;30mquick ←[0;31mbrown ←[0;32mfox ←[0;33mjumps over the ←[0;34mlazy dog` `

` `mydict = dict([('&' + chr(i), str(i)) for i in list(range(65, 91)) + list(range(97, 123))]) # random inserts between keys from random import randint rawstr = ''.join(mydict.keys()) mystr = '' for i in range(0, len(rawstr), 2): mystr += chr(randint(65,91)) * randint(0,20) # insert between 0 and 20 chars from time import time # How many times to run each solution rep = 10000 print 'Running %d times with string length %d and ' \ 'random inserts of lengths 0-20' % (rep, len(mystr)) # My solution t = time() for x in range(rep): for k, v in mydict.items(): mystr.replace(k, v) #print(mystr) print '%-30s' % 'Tor fixed & variable dict', time()-t from re import sub, compile, escape # Peter Hansen t = time() for x in range(rep): sub(r'(&[a-zA-Z])', r'%(\1)s', mystr) % mydict print '%-30s' % 'Peter fixed & variable dict', time()-t # Claudiu def multiple_replace(dict, text): # Create a regular expression from the dictionary keys regex = compile("(%s)" % "|".join(map(escape, dict.keys()))) # For each match, look-up corresponding value in dictionary return regex.sub(lambda mo: dict[mo.string[mo.start():mo.end()]], text) t = time() for x in range(rep): multiple_replace(mydict, mystr) print '%-30s' % 'Claudio variable dict', time()-t # Claudiu - Precompiled regex = compile("(%s)" % "|".join(map(escape, mydict.keys()))) t = time() for x in range(rep): regex.sub(lambda mo: mydict[mo.string[mo.start():mo.end()]], mystr) print '%-30s' % 'Claudio fixed dict', time()-t # Andrew Y - variable dict def mysubst(somestr, somedict): subs = somestr.split("&") return subs[0] + "".join(map(lambda arg: somedict["&" + arg[0:1]] + arg[1:], subs[1:])) t = time() for x in range(rep): mysubst(mystr, mydict) print '%-30s' % 'Andrew Y variable dict', time()-t # Andrew Y - fixed def repl(s): return mydict["&"+s[0:1]] + s[1:] t = time() for x in range(rep): subs = mystr.split("&") res = subs[0] + "".join(map(repl, subs[1:])) print '%-30s' % 'Andrew Y fixed dict', time()-t` `

` `Running 10000 times with string length 490 and random inserts of lengths 0-20 Tor fixed & variable dict 1.04699993134 Peter fixed & variable dict 0.218999862671 Claudio variable dict 2.48400020599 Claudio fixed dict 0.0940001010895 Andrew Y variable dict 0.0309998989105 Andrew Y fixed dict 0.0310001373291` `

claudiu和安德鲁的解决scheme都保持为0，所以我不得不增加到10 000次。

` `mydict = dict([('&' + chr(i), str(i)) for i in range(39,1024)]) # random inserts between keys from random import randint rawstr = ''.join(mydict.keys()) mystr = '' for i in range(0, len(rawstr), 2): mystr += chr(randint(65,91)) * randint(0,20) # insert between 0 and 20 chars from time import time # How many times to run each solution rep = 10000 print('Running %d times with string length %d and ' \ 'random inserts of lengths 0-20' % (rep, len(mystr))) # Tor Valamo - too long #t = time() #for x in range(rep): # for k, v in mydict.items(): # mystr.replace(k, v) #print('%-30s' % 'Tor fixed & variable dict', time()-t) from re import sub, compile, escape # Peter Hansen t = time() for x in range(rep): sub(r'(&[a-zA-Z])', r'%(\1)s', mystr) % mydict print('%-30s' % 'Peter fixed & variable dict', time()-t) # Peter 2 def dictsub(m): return mydict[m.group()] t = time() for x in range(rep): sub(r'(&[a-zA-Z])', dictsub, mystr) print('%-30s' % 'Peter fixed dict', time()-t) # Claudiu - too long #def multiple_replace(dict, text): # # Create a regular expression from the dictionary keys # regex = compile("(%s)" % "|".join(map(escape, dict.keys()))) # # # For each match, look-up corresponding value in dictionary # return regex.sub(lambda mo: dict[mo.string[mo.start():mo.end()]], text) # #t = time() #for x in range(rep): # multiple_replace(mydict, mystr) #print('%-30s' % 'Claudio variable dict', time()-t) # Claudiu - Precompiled regex = compile("(%s)" % "|".join(map(escape, mydict.keys()))) t = time() for x in range(rep): regex.sub(lambda mo: mydict[mo.string[mo.start():mo.end()]], mystr) print('%-30s' % 'Claudio fixed dict', time()-t) # Separate setup for Andrew and gnibbler optimized dict mydict = dict((k[1], v) for k, v in mydict.items()) # Andrew Y - variable dict def mysubst(somestr, somedict): subs = somestr.split("&") return subs[0] + "".join(map(lambda arg: somedict[arg[0:1]] + arg[1:], subs[1:])) def mysubst2(somestr, somedict): subs = somestr.split("&") return subs[0].join(map(lambda arg: somedict[arg[0:1]] + arg[1:], subs[1:])) t = time() for x in range(rep): mysubst(mystr, mydict) print('%-30s' % 'Andrew Y variable dict', time()-t) t = time() for x in range(rep): mysubst2(mystr, mydict) print('%-30s' % 'Andrew Y variable dict 2', time()-t) # Andrew Y - fixed def repl(s): return mydict[s[0:1]] + s[1:] t = time() for x in range(rep): subs = mystr.split("&") res = subs[0] + "".join(map(repl, subs[1:])) print('%-30s' % 'Andrew Y fixed dict', time()-t) # gnibbler t = time() for x in range(rep): myparts = mystr.split("&") myparts[1:]=[mydict[x[0]]+x[1:] for x in myparts[1:]] "".join(myparts) print('%-30s' % 'gnibbler fixed & variable dict', time()-t)` `

` `Running 10000 times with string length 9491 and random inserts of lengths 0-20 Tor fixed & variable dict 0.0 # disqualified 329 secs Peter fixed & variable dict 2.07799983025 Peter fixed dict 1.53100013733 Claudio variable dict 0.0 # disqualified, 37 secs Claudio fixed dict 1.5 Andrew Y variable dict 0.578000068665 Andrew Y variable dict 2 0.56299996376 Andrew Y fixed dict 0.56200003624 gnibbler fixed & variable dict 0.530999898911` `

（**注意，gnibbler的代码使用不同的字典，其中键没有包含'＆'，Andrew的代码也使用这个替代字典，但没有什么区别，可能只是加速的0.01倍。

` `# using your stated values for str and dict: >>> import re >>> str = re.sub(r'(&[a-zA-Z])', r'%(\1)s', str) >>> str % dict 'The \x1b[0;30mquick \x1b[0;31mbrown \x1b[0;32mfox \x1b[0;33mjumps over the \x1b[0;34mlazy dog'` `

re.sub（）调用replace所有的＆符号序列，后面跟着包含相同模式的模式％（..）s的单个字母。

％格式化利用了string格式化function，可以使字典指定replace，而不是更常见的位置参数。

` `>>> import re >>> def dictsub(m): >>> return dict[m.group()] >>> str = re.sub(r'(&[a-zA-Z])', dictsub, str)` `

（顺便说一句，“dict”和“str”都是内置的函数，如果你在自己的代码中使用这些名字，你会遇到麻烦，以防万一你不知道。这个问题当然是这样的。）

` `from time import time import string import random import re random.seed(1919096) # ensure consistent runs # build dictionary with 40 mappings, representative of original question mydict = dict(('&' + random.choice(string.letters), '\x1b[0;%sm' % (30+i)) for i in range(40)) # build simulated input, with mix of text, spaces, ampersands in reasonable proportions letters = string.letters + ' ' * 12 + '&' * 6 mystr = ''.join(random.choice(letters) for i in range(1000)) # How many times to run each solution rep = 10000 print('Running %d times with string length %d and %d ampersands' % (rep, len(mystr), mystr.count('&'))) # Tor Valamo # fixed from Tor's test, so it actually builds up the final string properly t = time() for x in range(rep): output = mystr for k, v in mydict.items(): output = output.replace(k, v) print('%-30s' % 'Tor fixed & variable dict', time() - t) # capture "known good" output as expected, to verify others expected = output # Peter Hansen # build charset to use in regex for safe dict lookup charset = ''.join(x[1] for x in mydict.keys()) # grab reference to method on regex, for speed patsub = re.compile(r'(&[%s])' % charset).sub t = time() for x in range(rep): output = patsub(r'%(\1)s', mystr) % mydict print('%-30s' % 'Peter fixed & variable dict', time()-t) assert output == expected # Peter 2 def dictsub(m): return mydict[m.group()] t = time() for x in range(rep): output = patsub(dictsub, mystr) print('%-30s' % 'Peter fixed dict', time() - t) assert output == expected # Peter 3 - freaky generator version, to avoid function call overhead def dictsub(d): m = yield None while 1: m = yield d[m.group()] dictsub = dictsub(mydict).send dictsub(None) # "prime" it t = time() for x in range(rep): output = patsub(dictsub, mystr) print('%-30s' % 'Peter generator', time() - t) assert output == expected # Claudiu - Precompiled regex_sub = re.compile("(%s)" % "|".join(mydict.keys())).sub t = time() for x in range(rep): output = regex_sub(lambda mo: mydict[mo.string[mo.start():mo.end()]], mystr) print('%-30s' % 'Claudio fixed dict', time() - t) assert output == expected` `

```    运行10000次，string长度为1000，96个符号
（'Tor fixed＆variable dict'，2.9890000820159912）
（'Peter fixed＆variable dict'，2.6659998893737793）
（'Peter fixed dict'，1.0920000076293945）
（'彼得发电机'，1.0460000038146973）
（'Claudio fixed dict'，1.562000036239624）
```

` `mystr = 'lTEQDMAPvksk k&z Txp vrnhQ GHaO&GNFY&&a...' mydict = {'&p': '\x1b[0;37m', '&q': '\x1b[0;66m', '&v': ...} output = 'lTEQDMAPvksk k←[0;57m Txp vrnhQ GHaO←[0;67mNFY&&a P...'` `

` `mystr = 'VVVVVVVPPPPPPPPPPPPPPPXXXXXXXXYYYFFFFFFFFFFFFEEEEEEEEEEE...' mydict = {'&p': '112', '&q': '113', '&r': '114', '&s': '115', ...} output = # same as mystr since there were no ampersands inside` `

` `str = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog" dict = {"&y":"\033[0;30m", "&c":"\033[0;31m", "&b":"\033[0;32m", "&Y":"\033[0;33m", "&u":"\033[0;34m"} def rep(s): return dict["&"+s[0:1]] + s[1:] subs = str.split("&") res = subs[0] + "".join(map(rep, subs[1:])) print res` `

` `def mysubst(somestr, somedict): subs = somestr.split("&") return subs[0] + "".join(map(lambda arg: somedict["&" + arg[0:1]] + arg[1:], subs[1:]))` `

` `def mysubst(somestr, somedict): subs = somestr.split("&") return subs[0].join(map(lambda arg: somedict["&" + arg[0:1]] + arg[1:], subs[1:]))` `

` `const char *dvals[]={ //"0-64 "","","","","","","","","","", "","","","","","","","","","", "","","","","","","","","","", "","","","","","","","","","", "","","","","","","","","","", "","","","","","","","","","", "","","","","", //AZ "","","","","", "","","","","", "","","","","", "","","","","", "","","","","33", "", // "","","","","","", //az "","32","31","","", "","","","","", "","","","","", "","","","","", "34","","","","30", "" }; int dsub(char*d,char*s){ char *ofs=d; do{ if(*s=='&' && s[1]<='z' && *dvals[s[1]]){ //\033[0; *d++='\\',*d++='0',*d++='3',*d++='3',*d++='[',*d++='0',*d++=';'; //consider as fixed 2 digits *d++=dvals[s[1]][0]; *d++=dvals[s[1]][1]; *d++='m'; s++; //skip //non &,invalid, unused (&) ampersand sequences will go here. }else *d++=*s; }while(*s++); return d-ofs-1; }` `

` `from mylib import * import time start=time.time() instr="The &yquick &cbrown &bfox &Yjumps over the &ulazy dog, skip &Unknown.\n"*100000 x=dsub(instr) end=time.time() print "time taken",end-start,",input str length",len(x) print "first few lines" print x[:1100]` `

` `time taken 0.140000104904 ,input str length 11000000 first few lines The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown. The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.` `

` `def multiple_replace(dict, text): # Create a regular expression from the dictionary keys regex = re.compile("(%s)" % "|".join(map(re.escape, dict.keys()))) # For each match, look-up corresponding value in dictionary return regex.sub(lambda mo: dict[mo.string[mo.start():mo.end()]], text) print multiple_replace(dict, str)` `

` `import re str = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog" dict = {"&y":"\033[0;30m", "&c":"\033[0;31m", "&b":"\033[0;32m", "&Y":"\033[0;33m", "&u":"\033[0;34m"} def programmaticReplacement( match ): return dict[ match.group( 1 ) ] colorstring = re.sub( '(\&.)', programmaticReplacement, str )` `

` `mydict = {"y":"\033[0;30m", "c":"\033[0;31m", "b":"\033[0;32m", "Y":"\033[0;33m", "u":"\033[0;34m"} mystr = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog" myparts = mystr.split("&") myparts[1:]=[mydict[x[0]]+x[1:] for x in myparts[1:]] print "".join(myparts)` `

` `myparts[1:]=[mydict.get(x[0],"&"+x[0])+x[1:] for x in myparts[1:]]` `

` `mystr = "The &yquick &cbrown &bfox &Yjumps over the &&ulazy dog" myparts = mystr.split("&") myparts[1:]=[mydict.get(x[:1],"&"+x[:1])+x[1:] for x in myparts[1:]] print "".join(myparts)` `

`str.replace(old, new)`

build议的解决scheme基于正则expression式以类似的方式工作，因为regexp在场景后面使用fsm。

FollowedBy（'＆'）并不是绝对必要的，但它通过validationparsing器实际上是否位于＆符之前，对所有标记选项进行更昂贵的检查之前，快捷地parsing了parsing过程。

` `from pyparsing import FollowedBy, oneOf escLookup = {"&y":"\033[0;30m", "&c":"\033[0;31m", "&b":"\033[0;32m", "&Y":"\033[0;33m", "&u":"\033[0;34m"} # make a single expression that will look for a leading '&', then try to # match each of the escape expressions ANSIreplacer = FollowedBy('&') + oneOf(escLookup.keys()) # add a parse action that will replace the matched text with the # corresponding ANSI sequence ANSIreplacer.setParseAction(lambda toks: escLookup[toks[0]]) # now use the replacer to transform the test string; throw in some extra # ampersands to show what happens with non-matching sequences src = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog & &Zjumps back" out = ANSIreplacer.transformString(src) print repr(out)` `

` `'The \x1b[0;30mquick \x1b[0;31mbrown \x1b[0;32mfox \x1b[0;33mjumps over the \x1b[0;34mlazy dog & &Zjumps back'` `

` `>>> a=[] >>> str = "The &yquick &cbrown &bfox &Yjumps over the &ulazy dog" >>> d={"&y":"\033[0;30m", ... "&c":"\033[0;31m", ... "&b":"\033[0;32m", ... "&Y":"\033[0;33m", ... "&u":"\033[0;34m"} >>> for item in str.split(): ... if item[:2] in d: ... a.append(d[item[:2]]+item[2:]) ... else: a.append(item) >>> print ' '.join(a)` `

tr.replace（ “＆Y”，字典[ “＆Y”]）

tr.replace（ “＆amp; C”，字典[ “＆amp; C”]）

tr.replace("&b",dict["&b"])

tr.replace("&Y",dict["&Y"])

tr.replace("&u",dict["&u"])

