如何将单词转换为数字?

我想将包含字母字符的单词转换为Java中的代表编号。

例如, four hundred four应该评估到404

如果这些字母是asdf这样的乱码,那么这是一个错误。

我知道我可以把裸体字符转换成相当于整数的ascii ,把它们连接在一起,但是我只想要提取英文单词后面的数字。

下面是我试图解决同样的问题时想到的一些代码。 请记住,我不是一个专业,没有疯狂的经验。 这并不慢,但我相信它可以更快/更清洁/等等。 我用它将声音识别的单词转换成数字,以便在我自己的“Jarvis”一个钢铁侠中进行计算。 它可以处理10亿以下的数字,虽然它可以很容易地扩大到包括更高的数量,代价很小的时间。

 public static final String[] DIGITS = {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}; public static final String[] TENS = {null, "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"}; public static final String[] TEENS = {"ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"}; public static final String[] MAGNITUDES = {"hundred", "thousand", "million", "point"}; public static final String[] ZERO = {"zero", "oh"}; public static String replaceNumbers (String input) { String result = ""; String[] decimal = input.split(MAGNITUDES[3]); String[] millions = decimal[0].split(MAGNITUDES[2]); for (int i = 0; i < millions.length; i++) { String[] thousands = millions[i].split(MAGNITUDES[1]); for (int j = 0; j < thousands.length; j++) { int[] triplet = {0, 0, 0}; StringTokenizer set = new StringTokenizer(thousands[j]); if (set.countTokens() == 1) { //If there is only one token given in triplet String uno = set.nextToken(); triplet[0] = 0; for (int k = 0; k < DIGITS.length; k++) { if (uno.equals(DIGITS[k])) { triplet[1] = 0; triplet[2] = k + 1; } if (uno.equals(TENS[k])) { triplet[1] = k + 1; triplet[2] = 0; } } } else if (set.countTokens() == 2) { //If there are two tokens given in triplet String uno = set.nextToken(); String dos = set.nextToken(); if (dos.equals(MAGNITUDES[0])) { //If one of the two tokens is "hundred" for (int k = 0; k < DIGITS.length; k++) { if (uno.equals(DIGITS[k])) { triplet[0] = k + 1; triplet[1] = 0; triplet[2] = 0; } } } else { triplet[0] = 0; for (int k = 0; k < DIGITS.length; k++) { if (uno.equals(TENS[k])) { triplet[1] = k + 1; } if (dos.equals(DIGITS[k])) { triplet[2] = k + 1; } } } } else if (set.countTokens() == 3) { //If there are three tokens given in triplet String uno = set.nextToken(); String dos = set.nextToken(); String tres = set.nextToken(); for (int k = 0; k < DIGITS.length; k++) { if (uno.equals(DIGITS[k])) { triplet[0] = k + 1; } if (tres.equals(DIGITS[k])) { triplet[1] = 0; triplet[2] = k + 1; } if (tres.equals(TENS[k])) { triplet[1] = k + 1; triplet[2] = 0; } } } else if (set.countTokens() == 4) { //If there are four tokens given in triplet String uno = set.nextToken(); String dos = set.nextToken(); String tres = set.nextToken(); String cuatro = set.nextToken(); for (int k = 0; k < DIGITS.length; k++) { if (uno.equals(DIGITS[k])) { triplet[0] = k + 1; } if (cuatro.equals(DIGITS[k])) { triplet[2] = k + 1; } if (tres.equals(TENS[k])) { triplet[1] = k + 1; } } } else { triplet[0] = 0; triplet[1] = 0; triplet[2] = 0; } result = result + Integer.toString(triplet[0]) + Integer.toString(triplet[1]) + Integer.toString(triplet[2]); } } if (decimal.length > 1) { //The number is a decimal StringTokenizer decimalDigits = new StringTokenizer(decimal[1]); result = result + "."; System.out.println(decimalDigits.countTokens() + " decimal digits"); while (decimalDigits.hasMoreTokens()) { String w = decimalDigits.nextToken(); System.out.println(w); if (w.equals(ZERO[0]) || w.equals(ZERO[1])) { result = result + "0"; } for (int j = 0; j < DIGITS.length; j++) { if (w.equals(DIGITS[j])) { result = result + Integer.toString(j + 1); } } } } return result; } 

input必须在语法上正确的语法,否则会有问题(创build一个函数来删除“和”)。 “二十二万五千零八八五八二”stringinput返回:

 two hundred two million fifty three thousand point zero eight five eight oh two 202053000.085802 It took 2 milliseconds. 

基本的策略是有一个valuevariables,你的工作。 每当你看到一个string“一”,“二”,“十一”,“七十”,你会增加这个数额的value 。 当你看到像“百”,“千”,“百万”这样的string时,你会乘以那个value

对于较大的数字,您可能需要创build几个小计,并结合在一起。 处理一个数字如111,374的步骤写成“一万一千三百七十四”

  • “one” – > value[0] += 1 (now 1
  • “百” – > value[0] *= 100 (现在100
  • “十一” – > value[0] += 11 (现在111
  • “千” – > value[0] *= 1000 (现在111000
  • “三” – > value[1] += 3
  • “百” – > value[1] *= 100 (现在300
  • “七十” – > value[1] += 70 (现在是370
  • “four” – > value[1] += 4 now(374)

您仍然需要弄清楚如何决定何时将其构build为多个值。 当你遇到一个小于最近看到的乘数的乘数(“百”)时,你应该开始一个新的小计。

 public class InNumerals5Digits { static String testcase1 = "ninety nine thousand nine hundred ninety nine";// public static void main(String args[]){ InNumerals5Digits testInstance = new InNumerals5Digits(); int result = testInstance.inNumerals(testcase1); System.out.println("Result : "+result); } //write your code here public int inNumerals(String inwords) { int wordnum = 0; String[] arrinwords = inwords.split(" "); int arrinwordsLength = arrinwords.length; if(inwords.equals("zero")) { return 0; } if(inwords.contains("thousand")) { int indexofthousand = inwords.indexOf("thousand"); //System.out.println(indexofthousand); String beforethousand = inwords.substring(0,indexofthousand); //System.out.println(beforethousand); String[] arrbeforethousand = beforethousand.split(" "); int arrbeforethousandLength = arrbeforethousand.length; //System.out.println(arrbeforethousandLength); if(arrbeforethousandLength==2) { wordnum = wordnum + 1000*(wordtonum(arrbeforethousand[0]) + wordtonum(arrbeforethousand[1])); //System.out.println(wordnum); } if(arrbeforethousandLength==1) { wordnum = wordnum + 1000*(wordtonum(arrbeforethousand[0])); //System.out.println(wordnum); } } if(inwords.contains("hundred")) { int indexofhundred = inwords.indexOf("hundred"); //System.out.println(indexofhundred); String beforehundred = inwords.substring(0,indexofhundred); //System.out.println(beforehundred); String[] arrbeforehundred = beforehundred.split(" "); int arrbeforehundredLength = arrbeforehundred.length; wordnum = wordnum + 100*(wordtonum(arrbeforehundred[arrbeforehundredLength-1])); String afterhundred = inwords.substring(indexofhundred+8);//7 for 7 char of hundred and 1 space //System.out.println(afterhundred); String[] arrafterhundred = afterhundred.split(" "); int arrafterhundredLength = arrafterhundred.length; if(arrafterhundredLength==1) { wordnum = wordnum + (wordtonum(arrafterhundred[0])); } if(arrafterhundredLength==2) { wordnum = wordnum + (wordtonum(arrafterhundred[1]) + wordtonum(arrafterhundred[0])); } //System.out.println(wordnum); } if(!inwords.contains("thousand") && !inwords.contains("hundred")) { if(arrinwordsLength==1) { wordnum = wordnum + (wordtonum(arrinwords[0])); } if(arrinwordsLength==2) { wordnum = wordnum + (wordtonum(arrinwords[1]) + wordtonum(arrinwords[0])); } //System.out.println(wordnum); } return wordnum; } public int wordtonum(String word) { int num = 0; switch (word) { case "one": num = 1; break; case "two": num = 2; break; case "three": num = 3; break; case "four": num = 4; break; case "five": num = 5; break; case "six": num = 6; break; case "seven": num = 7; break; case "eight": num = 8; break; case "nine": num = 9; break; case "ten": num = 10; break; case "eleven": num = 11; break; case "twelve": num = 12; break; case "thirteen": num = 13; break; case "fourteen": num = 14; break; case "fifteen": num = 15; break; case "sixteen": num = 16; break; case "seventeen": num = 17; break; case "eighteen": num = 18; break; case "nineteen": num = 19; break; case "twenty": num = 20; break; case "thirty": num = 30; break; case "forty": num = 40; break; case "fifty": num = 50; break; case "sixty": num = 60; break; case "seventy": num = 70; break; case"eighty": num = 80; break; case "ninety": num = 90; break; case "hundred": num = 100; break; case "thousand": num = 1000; break; /*default: num = "Invalid month"; break;*/ } return num; } }