出现最多的字符串

这是用String本身的方法实现(es6的新增方法不太熟悉,基本上es5的);

  1. 使用String的match方法
  • 用正则匹配返回是的匹配到的字符串的数组
  • 用string匹配,则返回一个类似 ["a", index: 2, input: "dfas"]
  • 没有匹配,则返回null
function getMostChar(str) { //出现的字符 var most_char = ''; var char = ''; var char_arr = []; //出现的次数 var most_count = 0; var count = 0; var count_arr = []; var len = str.length; if (len > 1) { (function() { for (var i = 0; i < len; i++) {char = str[i]; var flag = char_arr.some(function(item, index) { return item == char; })if (!flag) { char_arr.push(char); var pattern = new RegExp(char, 'g'); var result = str.match(pattern); if (result) { count = result.length; }count_arr.push(count); }if (most_count < count) { most_count = count; most_char = char; } } })() } else { char_arr.push(str); most_count = len; most_char = str; } console.log('拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count); }

  1. indexOf 和 do{}while()
  • indexOf() 返回字符出现的第一次的位置,没有匹配则返回-1
  • idnexOf(str,startpos) 的有两个参数
    • str 匹配的字符
    • startpos {type number} 设置从字符串的开始匹配位置(包含这个startpos)
  • do{}while() 错误的也得执行一次
function getMostChar(str) { //出现的字符 var most_char = ''; var char = ''; var char_arr = []; //出现的次数 var most_count = 0; var count = 0; var count_arr = []; //字符串出现的位置 var pos = -1; var len = str.length; if (len > 1) { (function() { for (var i = 0; i < len; i++) { char = str[i]; var flag = char_arr.some(function(item, index) { return item == char; })if (!flag) { char_arr.push(char); do { count++; pos = str.indexOf(char, pos+1); } while(pos>-1) if(i===0){ count = count-1; }count_arr.push(count); }if(most_count < count){ most_count = count; most_char = char; }//重置为下次遍历做准备 count = 0; // 已经检测过不在检测,对pos设置为当前位置 pos = i+1; } })() } else { char_arr.push(str); most_count = len; most_char = str; } console.log('pos: ' + pos, ';拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count); }

while 类似
function getMostChar(str) { //出现的字符 var most_char = ''; var char = ''; var char_arr = []; //出现的次数 var most_count = 0; var count = 0; var count_arr = []; //字符串出现的位置 var pos = 0; var len = str.length; if (len > 1) { (function() { for (var i = 0; i < len; i++) { char = str[i]; var flag = char_arr.some(function(item, index) { return item == char; })if (!flag) { char_arr.push(char); while(pos>-1) { count++; pos = str.indexOf(char, pos+1); } count_arr.push(count); }if(most_count < count){ most_count = count; most_char = char; }//重置为下次遍历做准备 count = 0; pos = i+1; } })() } else { char_arr.push(str); most_count = len; most_char = str; } console.log('pos: ' + pos, ';拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count); }

  1. 用search 方法实现,其实逻辑同indexOf是一样的。只不过要利用字符串的截取方法。
function getMostChar(str) { //出现的字符 var most_char = ''; var char = ''; var char_arr = []; //出现的次数 var most_count = 0; var count = 0; var count_arr = []; //字符串出现的位置 var pos = 0; var sub_str = ''; var sub_str_pos = 0; var len = str.length; if (len > 1) { (function() { for (var i = 0; i < len; i++) { char = str[i]; var flag = char_arr.some(function(item, index) { return item == char; })if (!flag) { char_arr.push(char); var pattern = new RegExp(char, 'g'); while (sub_str_pos > -1) { count++; sub_str = str.slice(pos+1); sub_str_pos = sub_str.search(pattern); pos = (pos + 1) + sub_str_pos; }count_arr.push(count); }if (most_count < count) { most_count = count; most_char = char; }//重置为下次遍历做准备 pos = i + 1; count = 0; sub_str = ''; sub_str_pos = 0; } })() } else { char_arr.push(str); most_count = len; most_char = str; } console.log('pos: ' + pos, ';拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count); }

  1. replace ,这是最最简单的,最好理解。
function getMostChar(str) { //出现的字符 var most_char = ''; var char_arr = []; //出现的次数 var most_count = 0; var count = 0; var count_arr = []; var new_char = str[0]; var len = str.length; while (str) { // 实际上是这样子的 while(str !== '') char_arr.push(new_char); var pattern = new RegExp(new_char, 'g'); str = str.replace(pattern, ''); count = len - str.length; count_arr.push(count); if (most_count < count) { most_count = count; most_char = new_char; } // 重置 len = str.length; new_char = str[0]; }console.log('拥有的字符的种类:' + char_arr, ';出现最多次数的字符:' + most_char, ';出现的次数的数组:' + count_arr, ';出现最多次数:' + most_count); }

  1. split 这个方法使用起来实际上是把字符串变成数组,那就不是String方法。所以没做下去。
这个有什么好玩性了,可以检测作者是怎么用词,检测他用词。
例如,我找汪曾祺的《异秉》,测试:
var str = '小说的内容'; getMostCharSort(str); function getMostCharSort(str) { var kv_arr = []; //出现的字符 var char = str[0]; var most_char = ''; var char_arr = []; //出现的次数 var most_count = 0; var count = 0; var count_arr = []; var len = str.length; while (str) { var pattern = new RegExp(char, 'g'); str = str.replace(pattern, ''); if (!(/\,|\。|\:|\“|\”|\?|\!|\s|\、|\—|\(|\)/ig.test(char))) {char_arr.push(char); count = len - str.length; kv_arr.push({ count: count, char: char }); count_arr.push(count); if (most_count < count) { most_count = count; most_char = char; } } // 重置 len = str.length; char = str[0]; }console.log('拥有的字符的种类:' + char_arr); console.log('出现最多次数的字符:' + most_char); console.log('出现的次数的数组:' + count_arr, ); console.log('出现最多次数:' + most_count); des(kv_arr)show(kv_arr); }function show(arr) { var table = document.createElement('table'); var table_html = '字符次数' + ''+ '字的种类:' +'' + arr.length + ''; arr.forEach(function(item, index) { table_html = table_html + ('' + item.char + ' ' + item.count + ''); })table.innerHTML = table_html; document.body.appendChild(table); }function des(arr){ for(var i = 0; i < arr.length; i++){ for(var j = arr.length-1; j > i; j--) { if(arr[i].count <= arr[j].count) {var des_count = arr[j].count; var des_char = arr[j].char; arr[j].count = arr[i].count; arr[j].char = arr[i].char; arr[i].count = des_count; arr[i].char = des_char; } } } }

【出现最多的字符串】我们将算法改进,不在用冒泡。
function getMostCharSort(str) { var kv_arr = []; //出现的字符 var char = str[0]; //出现的次数 var most_count = 0; var less_count = 0; var center_count = 0; var center_index = 0; var count = 0; var len = str.length; while (str) { // 实际上是这样子的 while(str !== '')var pattern = new RegExp(char, 'g'); str = str.replace(pattern, ''); // 去掉一些特殊的符号 if (!(/\,|\。|\:|\“|\”|\?|\!|\s|\、|\—|\(|\)/ig.test(char))) { count = len - str.length; if (count >= most_count) { kv_arr.unshift({ count: count, char: char }); center_index ++; center_count = kv_arr[center_index]; most_count = count; if(kv_arr.length == 1){ less_count = count; } } else { if (count > center_count) { for (var j = center_index - 1; j > 0; j--) { if (count < kv_arr[j]) { kv_arr.splice(j, 0, { count: count, char: char }) } center_count++ center_count = kv_arr[center_index]; } } else { if (count <= less_count) { kv_arr.push({ count: count, char: char }) less_count = count; } else if(count > less_count) { var less_index = kv_arr.length; for (var i = center_index + 1; i < less_index; i++) { if (count > kv_arr[i]) { kv_arr.splice(i-1,0,{ count: count, char: char }) } } } } } } len = str.length; char = str[0]; }show(kv_arr); }function show(arr) { var table = document.createElement('table'); var table_html = '字符次数' + '' + '字的种类:' + '' + arr.length + ''; arr.forEach(function(item, index) { table_html = table_html + ('' + item.char + ' ' + item.count + ''); })table.innerHTML = table_html; document.body.appendChild(table); }

    推荐阅读