package net.osmand.util; import com.atilika.kuromoji.ipadic.Token; import com.atilika.kuromoji.ipadic.Tokenizer; import java.util.HashMap; import java.util.List; import java.util.Map; import net.osmand.PlatformUtil; import net.sf.junidecode.Junidecode; import org.apache.commons.logging.Log; public class TransliterationHelper { private static TransliterationHelper instance; public final static Log LOG = PlatformUtil.getLog(TransliterationHelper.class); public final static String DEFAULT = "default"; public final static String JAPAN = "Japan"; private static String country = DEFAULT; private static Tokenizer tokenizer; private static Map katakanaMap = new HashMap<>(); private TransliterationHelper() { } static { try { instance = new TransliterationHelper(); } catch (Exception e) { LOG.debug(e.getMessage(), e); } } public static TransliterationHelper getInstance() { return instance; } public static void setCountry(String countryName) { if (!countryName.equals(country)) { switch (countryName) { case "Japan": { country = JAPAN; break; } default: country = DEFAULT; break; } } } public static String getCountry() { return country; } public static String transliterate(String text) { if (tokenizer == null) { tokenizer = new Tokenizer(); } switch (country) { case DEFAULT: return Junidecode.unidecode(text); case JAPAN: return japanese2Romaji(text); } return text; } private static String japanese2Romaji(String text) { boolean capitalizeWords = true; List tokens = tokenizer.tokenize(text); StringBuilder builder = new StringBuilder(); if (katakanaMap.isEmpty()) { initKanaMap(); } for (Token token : tokens) { String type = token.getAllFeaturesArray()[1]; if (token.getAllFeaturesArray()[0].equals("記号")) { builder.append(token.getSurface()); continue; } switch (token.getAllFeaturesArray()[1]) { case "数": case "アルファベット": case "サ変接続": builder.append(token.getSurface()); continue; default: String lastFeature = token.getAllFeaturesArray()[8]; if (lastFeature.equals("*")) { builder.append(token.getSurface()); } else { String romaji = convertKanaToRomaji(token.getAllFeaturesArray()[8]); if (capitalizeWords) { builder.append(romaji.substring(0, 1).toUpperCase()); builder.append(romaji.substring(1)); } else { if (token.getSurface() .equals(token.getPronunciation())) { romaji = romaji.toUpperCase(); } builder.append(romaji); } } } builder.append(" "); } return builder.toString(); } private static String convertKanaToRomaji(String s) { StringBuilder t = new StringBuilder(); for (int i = 0; i < s.length(); i++) { if (i <= s.length() - 2) { if (katakanaMap.containsKey(s.substring(i, i + 2))) { t.append(katakanaMap.get(s.substring(i, i + 2))); i++; } else if (katakanaMap.containsKey(s.substring(i, i + 1))) { t.append(katakanaMap.get(s.substring(i, i + 1))); } else if (s.charAt(i) == 'ッ') { t.append(katakanaMap.get(s.substring(i + 1, i + 2)).charAt(0)); } else { t.append(s.charAt(i)); } } else { if (katakanaMap.containsKey(s.substring(i, i + 1))) { t.append(katakanaMap.get(s.substring(i, i + 1))); } else { t.append(s.charAt(i)); } } } return t.toString(); } private static void initKanaMap() { katakanaMap.put("ア", "a"); katakanaMap.put("イ", "i"); katakanaMap.put("ウ", "u"); katakanaMap.put("エ", "e"); katakanaMap.put("オ", "o"); katakanaMap.put("カ", "ka"); katakanaMap.put("キ", "ki"); katakanaMap.put("ク", "ku"); katakanaMap.put("ケ", "ke"); katakanaMap.put("コ", "ko"); katakanaMap.put("サ", "sa"); katakanaMap.put("シ", "shi"); katakanaMap.put("ス", "su"); katakanaMap.put("セ", "se"); katakanaMap.put("ソ", "so"); katakanaMap.put("タ", "ta"); katakanaMap.put("チ", "chi"); katakanaMap.put("ツ", "tsu"); katakanaMap.put("テ", "te"); katakanaMap.put("ト", "to"); katakanaMap.put("ナ", "na"); katakanaMap.put("ニ", "ni"); katakanaMap.put("ヌ", "nu"); katakanaMap.put("ネ", "ne"); katakanaMap.put("ノ", "no"); katakanaMap.put("ハ", "ha"); katakanaMap.put("ヒ", "hi"); katakanaMap.put("フ", "fu"); katakanaMap.put("ヘ", "he"); katakanaMap.put("ホ", "ho"); katakanaMap.put("マ", "ma"); katakanaMap.put("ミ", "mi"); katakanaMap.put("ム", "mu"); katakanaMap.put("メ", "me"); katakanaMap.put("モ", "mo"); katakanaMap.put("ヤ", "ya"); katakanaMap.put("ユ", "yu"); katakanaMap.put("ヨ", "yo"); katakanaMap.put("ラ", "ra"); katakanaMap.put("リ", "ri"); katakanaMap.put("ル", "ru"); katakanaMap.put("レ", "re"); katakanaMap.put("ロ", "ro"); katakanaMap.put("ワ", "wa"); katakanaMap.put("ヲ", "wo"); katakanaMap.put("ン", "n"); katakanaMap.put("ガ", "ga"); katakanaMap.put("ギ", "gi"); katakanaMap.put("グ", "gu"); katakanaMap.put("ゲ", "ge"); katakanaMap.put("ゴ", "go"); katakanaMap.put("ザ", "za"); katakanaMap.put("ジ", "ji"); katakanaMap.put("ズ", "zu"); katakanaMap.put("ゼ", "ze"); katakanaMap.put("ゾ", "zo"); katakanaMap.put("ダ", "da"); katakanaMap.put("ヂ", "ji"); katakanaMap.put("ヅ", "zu"); katakanaMap.put("デ", "de"); katakanaMap.put("ド", "do"); katakanaMap.put("バ", "ba"); katakanaMap.put("ビ", "bi"); katakanaMap.put("ブ", "bu"); katakanaMap.put("ベ", "be"); katakanaMap.put("ボ", "bo"); katakanaMap.put("パ", "pa"); katakanaMap.put("ピ", "pi"); katakanaMap.put("プ", "pu"); katakanaMap.put("ペ", "pe"); katakanaMap.put("ポ", "po"); katakanaMap.put("キャ", "kya"); katakanaMap.put("キュ", "kyu"); katakanaMap.put("キョ", "kyo"); katakanaMap.put("シャ", "sha"); katakanaMap.put("シュ", "shu"); katakanaMap.put("ショ", "sho"); katakanaMap.put("チャ", "cha"); katakanaMap.put("チュ", "chu"); katakanaMap.put("チョ", "cho"); katakanaMap.put("ニャ", "nya"); katakanaMap.put("ニュ", "nyu"); katakanaMap.put("ニョ", "nyo"); katakanaMap.put("ヒャ", "hya"); katakanaMap.put("ヒュ", "hyu"); katakanaMap.put("ヒョ", "hyo"); katakanaMap.put("リャ", "rya"); katakanaMap.put("リュ", "ryu"); katakanaMap.put("リョ", "ryo"); katakanaMap.put("ギャ", "gya"); katakanaMap.put("ギュ", "gyu"); katakanaMap.put("ギョ", "gyo"); katakanaMap.put("ジャ", "ja"); katakanaMap.put("ジュ", "ju"); katakanaMap.put("ジョ", "jo"); katakanaMap.put("ティ", "ti"); katakanaMap.put("ディ", "di"); katakanaMap.put("ツィ", "tsi"); katakanaMap.put("ヂャ", "dya"); katakanaMap.put("ヂュ", "dyu"); katakanaMap.put("ヂョ", "dyo"); katakanaMap.put("ビャ", "bya"); katakanaMap.put("ビュ", "byu"); katakanaMap.put("ビョ", "byo"); katakanaMap.put("ピャ", "pya"); katakanaMap.put("ピュ", "pyu"); katakanaMap.put("ピョ", "pyo"); katakanaMap.put("ー", "-"); } }