在JavaScript中以n大小的块分割大string

我想分割一个非常大的string(比方说,10,000个字符)到N大小的块。

就性能而言,最好的方法是做什么?

例如:2分割的"1234567890"会变成["12", "34", "56", "78", "90"]

如果使用String.prototype.match就可以这样做,如果是这样的话,那么从性能angular度来看,这是否是最好的方法呢?

你可以做这样的事情:

 "1234567890".match(/.{1,2}/g); // Results in: ["12", "34", "56", "78", "90"] 

该方法仍然适用于大小不是块大小的确切倍数的string:

 "123456789".match(/.{1,2}/g); // Results in: ["12", "34", "56", "78", "9"] 

一般来说,对于任何你想从中提取n个string的string,你都可以这样做:

 str.match(/.{1,n}/g); // Replace n with the size of the substring 

如果你的string可以包含换行符或者回车符,你可以这样做:

 str.match(/(.|[\r\n]){1,n}/g); // Replace n with the size of the substring 

就性能而言,我尝试了大约10K个字符,并在Chrome上花了一点点时间。 因人而异。

这也可以用在一个可重用的函数中:

 function chunkString(str, length) { return str.match(new RegExp('.{1,' + length + '}', 'g')); } 
  • matchslicesubstrsubstring
  • 比较不同块大小的matchslice
  • matchslice与小块大小比较

底线:

  • match是非常低效的, slice更好,在Firefox的substr / substring更好
  • match对于短string来说效率更低(即使使用caching正则expression式 – 可能是由于正则expression式parsing设置时间)
  • 对于大块大小来说, match效率更低(可能是由于无法“跳跃”)
  • 对于较小的块大小的较长的string, match性能胜过较老的IE分slice ,但仍在所有其他系统上丢失
  • jsperf岩石

这是最快,最高效的解决scheme:

 function chunkString(str, len) { var _size = Math.ceil(str.length/len), _ret = new Array(_size), _offset ; for (var _i=0; _i<_size; _i++) { _offset = _i * len; _ret[_i] = str.substring(_offset, _offset + len); } return _ret; } 

与其他人比较 ; 我赢了 :)

我创build了几个更快的变体,你可以在jsPerf上看到 。 我最喜欢的是这样的:

 function chunkSubstr(str, size) { var numChunks = Math.ceil(str.length / size), chunks = new Array(numChunks); for(var i = 0, o = 0; i < numChunks; ++i, o += size) { chunks[i] = str.substr(o, size); } return chunks; } 
 var str = "123456789"; var chunks = []; var chunkSize = 2; while (str) { if (str.length < chunkSize) { chunks.push(str); break; } else { chunks.push(str.substr(0, chunkSize)); str = str.substr(chunkSize); } } alert(chunks); // chunks == 12,34,56,78,9 

我写了一个扩展函数,所以块的长度也可以是一个数组的数组,如[1,3]

 String.prototype.chunkString = function(len) { var _ret; if (this.length < 1) { return []; } if (typeof len === 'number' && len > 0) { var _size = Math.ceil(this.length / len), _offset = 0; _ret = new Array(_size); for (var _i = 0; _i < _size; _i++) { _ret[_i] = this.substring(_offset, _offset = _offset + len); } } else if (typeof len === 'object' && len.length) { var n = 0, l = this.length, chunk, that = this; _ret = []; do { len.forEach(function(o) { chunk = that.substring(n, n + o); if (chunk !== '') { _ret.push(chunk); n += chunk.length; } }); if (n === 0) { return undefined; // prevent an endless loop when len = [0] } } while (n < l); } return _ret; }; 

代码

 "1234567890123".chunkString([1,3]) 

将返回:

 [ '1', '234', '5', '678', '9', '012', '3' ] 

它将大string拆分为给定单词的小string。

 function chunkSubstr(str, words) { var parts = str.split(" ") , values = [] , i = 0 , tmpVar = ""; $.each(parts, function(index, value) { if(tmpVar.length < words){ tmpVar += " " + value; }else{ values[i] = tmpVar.replace(/\s+/g, " "); i++; tmpVar = value; } }); if(values.length < 1 && parts.length > 0){ values[0] = tmpVar; } return values; } 
 var l = str.length, lc = 0, chunks = [], c = 0, chunkSize = 2; for (; lc < l; c++) { chunks[c] = str.slice(lc, lc += chunkSize); } 

我会用一个正则expression式…

 var chunkStr = function(str, chunkLength) { return str.match(new RegExp('[\\s\\S]{1,' + +chunkLength + '}', 'g')); } 

以原型function的forms:

 String.prototype.lsplit = function(){ return this.match(new RegExp('.{1,'+ ((arguments.length==1)?(isFinite(String(arguments[0]).trim())?arguments[0]:false):1) +'}', 'g')); } 

这里是我使用的代码,它使用String.prototype.slice 。

是的,答案是相当长的,因为它尽可能接近当前的标准,当然也包含了合理的JSDOC评论。 但是,一旦缩小,代码只有828个字节,一旦传输,就只有497个字节。

这添加到String.prototype的1方法(使用Object.defineProperty ,如果可用)是:

  1. toChunks

包括一些testing,以检查function。

担心代码长度会影响性能? 不用担心, http://jsperf.com/chunk-string/3

大部分额外的代码都可以确保代码在多个JavaScript环境中响应相同的代码。

 /*jslint maxlen:80, browser:true, devel:true */ /* * Properties used by toChunks. */ /*property MAX_SAFE_INTEGER, abs, ceil, configurable, defineProperty, enumerable, floor, length, max, min, pow, prototype, slice, toChunks, value, writable */ /* * Properties used in the testing of toChunks implimentation. */ /*property appendChild, createTextNode, floor, fromCharCode, getElementById, length, log, pow, push, random, toChunks */ (function () { 'use strict'; var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1; /** * Defines a new property directly on an object, or modifies an existing * property on an object, and returns the object. * * @private * @function * @param {Object} object * @param {string} property * @param {Object} descriptor * @return {Object} * @see https://goo.gl/CZnEqg */ function $defineProperty(object, property, descriptor) { if (Object.defineProperty) { Object.defineProperty(object, property, descriptor); } else { object[property] = descriptor.value; } return object; } /** * Returns true if the operands are strictly equal with no type conversion. * * @private * @function * @param {*} a * @param {*} b * @return {boolean} * @see http://www.ecma-international.org/ecma-262/5.1/#sec-11.9.4 */ function $strictEqual(a, b) { return a === b; } /** * Returns true if the operand inputArg is undefined. * * @private * @function * @param {*} inputArg * @return {boolean} */ function $isUndefined(inputArg) { return $strictEqual(typeof inputArg, 'undefined'); } /** * The abstract operation throws an error if its argument is a value that * cannot be converted to an Object, otherwise returns the argument. * * @private * @function * @param {*} inputArg The object to be tested. * @throws {TypeError} If inputArg is null or undefined. * @return {*} The inputArg if coercible. * @see https://goo.gl/5GcmVq */ function $requireObjectCoercible(inputArg) { var errStr; if (inputArg === null || $isUndefined(inputArg)) { errStr = 'Cannot convert argument to object: ' + inputArg; throw new TypeError(errStr); } return inputArg; } /** * The abstract operation converts its argument to a value of type string * * @private * @function * @param {*} inputArg * @return {string} * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tostring */ function $toString(inputArg) { var type, val; if (inputArg === null) { val = 'null'; } else { type = typeof inputArg; if (type === 'string') { val = inputArg; } else if (type === 'undefined') { val = type; } else { if (type === 'symbol') { throw new TypeError('Cannot convert symbol to string'); } val = String(inputArg); } } return val; } /** * Returns a string only if the arguments is coercible otherwise throws an * error. * * @private * @function * @param {*} inputArg * @throws {TypeError} If inputArg is null or undefined. * @return {string} */ function $onlyCoercibleToString(inputArg) { return $toString($requireObjectCoercible(inputArg)); } /** * The function evaluates the passed value and converts it to an integer. * * @private * @function * @param {*} inputArg The object to be converted to an integer. * @return {number} If the target value is NaN, null or undefined, 0 is * returned. If the target value is false, 0 is returned * and if true, 1 is returned. * @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.4 */ function $toInteger(inputArg) { var number = +inputArg, val = 0; if ($strictEqual(number, number)) { if (!number || number === Infinity || number === -Infinity) { val = number; } else { val = (number > 0 || -1) * Math.floor(Math.abs(number)); } } return val; } /** * The abstract operation ToLength converts its argument to an integer * suitable for use as the length of an array-like object. * * @private * @function * @param {*} inputArg The object to be converted to a length. * @return {number} If len <= +0 then +0 else if len is +INFINITY then * 2^53-1 else min(len, 2^53-1). * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tolength */ function $toLength(inputArg) { return Math.min(Math.max($toInteger(inputArg), 0), MAX_SAFE_INTEGER); } if (!String.prototype.toChunks) { /** * This method chunks a string into an array of strings of a specified * chunk size. * * @function * @this {string} The string to be chunked. * @param {Number} chunkSize The size of the chunks that the string will * be chunked into. * @returns {Array} Returns an array of the chunked string. */ $defineProperty(String.prototype, 'toChunks', { enumerable: false, configurable: true, writable: true, value: function (chunkSize) { var str = $onlyCoercibleToString(this), chunkLength = $toInteger(chunkSize), chunked = [], numChunks, length, index, start, end; if (chunkLength < 1) { return chunked; } length = $toLength(str.length); numChunks = Math.ceil(length / chunkLength); index = 0; start = 0; end = chunkLength; chunked.length = numChunks; while (index < numChunks) { chunked[index] = str.slice(start, end); start = end; end += chunkLength; index += 1; } return chunked; } }); } }()); /* * Some tests */ (function () { 'use strict'; var pre = document.getElementById('out'), chunkSizes = [], maxChunkSize = 512, testString = '', maxTestString = 100000, chunkSize = 0, index = 1; while (chunkSize < maxChunkSize) { chunkSize = Math.pow(2, index); chunkSizes.push(chunkSize); index += 1; } index = 0; while (index < maxTestString) { testString += String.fromCharCode(Math.floor(Math.random() * 95) + 32); index += 1; } function log(result) { pre.appendChild(document.createTextNode(result + '\n')); } function test() { var strLength = testString.length, czLength = chunkSizes.length, czIndex = 0, czValue, result, numChunks, pass; while (czIndex < czLength) { czValue = chunkSizes[czIndex]; numChunks = Math.ceil(strLength / czValue); result = testString.toChunks(czValue); czIndex += 1; log('chunksize: ' + czValue); log(' Number of chunks:'); log(' Calculated: ' + numChunks); log(' Actual:' + result.length); pass = result.length === numChunks; log(' First chunk size: ' + result[0].length); pass = pass && result[0].length === czValue; log(' Passed: ' + pass); log(''); } } test(); log(''); log('Simple test result'); log('abcdefghijklmnopqrstuvwxyz'.toChunks(3)); }()); 
 <pre id="out"></pre> 
  window.format = function(b, a) { if (!b || isNaN(+a)) return a; var a = b.charAt(0) == "-" ? -a : +a, j = a < 0 ? a = -a : 0, e = b.match(/[^\d\-\+#]/g), h = e && e[e.length - 1] || ".", e = e && e[1] && e[0] || ",", b = b.split(h), a = a.toFixed(b[1] && b[1].length), a = +a + "", d = b[1] && b[1].lastIndexOf("0"), c = a.split("."); if (!c[1] || c[1] && c[1].length <= d) a = (+a).toFixed(d + 1); d = b[0].split(e); b[0] = d.join(""); var f = b[0] && b[0].indexOf("0"); if (f > -1) for (; c[0].length < b[0].length - f;) c[0] = "0" + c[0]; else +c[0] == 0 && (c[0] = ""); a = a.split("."); a[0] = c[0]; if (c = d[1] && d[d.length - 1].length) { for (var d = a[0], f = "", k = d.length % c, g = 0, i = d.length; g < i; g++) f += d.charAt(g), !((g - k + 1) % c) && g < i - c && (f += e); a[0] = f } a[1] = b[1] && a[1] ? h + a[1] : ""; return (j ? "-" : "") + a[0] + a[1] }; var str="1234567890"; var formatstr=format( "##,###.", str); alert(formatstr); This will split the string in reverse order with comma separated after 3 char's. If you want you can change the position. 

使用slice()方法:

 function returnChunksArray(str, chunkSize) { var arr = []; while(str !== '') { arr.push(str.slice(0, chunkSize)); str = str.slice(chunkSize); } return arr; } 

使用substring()方法也可以做到这一点。

 function returnChunksArray(str, chunkSize) { var arr = []; while(str !== '') { arr.push(str.substring(0, chunkSize)); str = str.substring(chunkSize); } return arr; } 

那么这小小的代码呢?

  function splitME (str, size) { let subStr = new RegExp('.{1,' + size + '}', 'g'); return str.match(subStr ); }; 
 function chunkString(str, length = 10) { let result = [], offset = 0; if (str.length <= length) return result.push(str) && result; while (offset < str.length) { result.push(str.substr(offset, length)); offset += length; } return result; }