OsmAnd/OsmAnd-java/src/main/java/net/osmand/util/TransliterationHelper.java

245 lines
6.9 KiB
Java
Raw Normal View History

2019-02-18 13:02:38 +01:00
package net.osmand.util;
import com.atilika.kuromoji.ipadic.Token;
import com.atilika.kuromoji.ipadic.Tokenizer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.osmand.PlatformUtil;
import net.sf.junidecode.Junidecode;
import org.apache.commons.logging.Log;
public class TransliterationHelper {
private static TransliterationHelper instance;
public final static Log LOG = PlatformUtil.getLog(TransliterationHelper.class);
2019-02-18 14:15:08 +01:00
public final static String DEFAULT = "default";
public final static String JAPAN = "Japan";
2019-02-18 13:02:38 +01:00
2019-02-18 14:15:08 +01:00
private static String country = DEFAULT;
2019-02-18 13:02:38 +01:00
2019-02-18 17:03:51 +01:00
private static Tokenizer tokenizer;
2019-02-18 14:15:08 +01:00
2019-02-18 17:03:51 +01:00
private static Map<String, String> katakanaMap = new HashMap<>();
2019-02-18 13:02:38 +01:00
2019-02-18 17:03:51 +01:00
private TransliterationHelper() {
}
2019-02-18 13:02:38 +01:00
static {
try {
instance = new TransliterationHelper();
} catch (Exception e) {
LOG.debug(e.getMessage(), e);
}
}
public static TransliterationHelper getInstance() {
return instance;
}
2019-02-18 14:15:08 +01:00
public static void setCountry(String countryName) {
TransliterationHelper.country = countryName;
2019-02-18 13:02:38 +01:00
}
2019-02-18 14:15:08 +01:00
public static String getCountry() {
return country;
2019-02-18 13:02:38 +01:00
}
2019-02-18 17:03:51 +01:00
public static String transliterate(String text) {
if (tokenizer == null) {
2019-02-18 14:15:08 +01:00
tokenizer = new Tokenizer();
}
switch (country) {
2019-02-18 13:02:38 +01:00
case DEFAULT:
return Junidecode.unidecode(text);
2019-02-18 14:15:08 +01:00
case JAPAN:
2019-02-18 13:02:38 +01:00
return japanese2Romaji(text);
}
return text;
}
2019-02-18 17:03:51 +01:00
private static String japanese2Romaji(String text) {
2019-02-18 13:02:38 +01:00
boolean capitalizeWords = true;
2019-02-18 17:03:51 +01:00
List<Token> tokens = tokenizer.tokenize(text);
2019-02-18 13:02:38 +01:00
StringBuilder builder = new StringBuilder();
2019-02-18 14:15:08 +01:00
if (katakanaMap.isEmpty()) {
2019-02-18 13:02:38 +01:00
initKanaMap();
}
for (Token token : tokens) {
String type = token.getAllFeaturesArray()[1];
if (token.getAllFeaturesArray()[0].equals("記号")) {
builder.append(token.getSurface());
continue;
}
switch (token.getAllFeaturesArray()[1]) {
case "":
case "アルファベット":
case "サ変接続":
builder.append(token.getSurface());
continue;
default:
String lastFeature = token.getAllFeaturesArray()[8];
if (lastFeature.equals("*")) {
builder.append(token.getSurface());
} else {
String romaji = convertKanaToRomaji(token.getAllFeaturesArray()[8]);
if (capitalizeWords) {
builder.append(romaji.substring(0, 1).toUpperCase());
builder.append(romaji.substring(1));
} else {
if (token.getSurface()
2019-02-18 17:03:51 +01:00
.equals(token.getPronunciation())) {
2019-02-18 13:02:38 +01:00
romaji = romaji.toUpperCase();
}
builder.append(romaji);
}
}
}
builder.append(" ");
}
return builder.toString();
}
2019-02-18 17:03:51 +01:00
private static String convertKanaToRomaji(String s) {
2019-02-18 13:02:38 +01:00
StringBuilder t = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
if (i <= s.length() - 2) {
2019-02-18 14:15:08 +01:00
if (katakanaMap.containsKey(s.substring(i, i + 2))) {
t.append(katakanaMap.get(s.substring(i, i + 2)));
2019-02-18 13:02:38 +01:00
i++;
2019-02-18 14:15:08 +01:00
} else if (katakanaMap.containsKey(s.substring(i, i + 1))) {
t.append(katakanaMap.get(s.substring(i, i + 1)));
2019-02-18 13:02:38 +01:00
} else if (s.charAt(i) == 'ッ') {
2019-02-18 14:15:08 +01:00
t.append(katakanaMap.get(s.substring(i + 1, i + 2)).charAt(0));
2019-02-18 13:02:38 +01:00
} else {
t.append(s.charAt(i));
}
} else {
2019-02-18 14:15:08 +01:00
if (katakanaMap.containsKey(s.substring(i, i + 1))) {
t.append(katakanaMap.get(s.substring(i, i + 1)));
2019-02-18 13:02:38 +01:00
} else {
t.append(s.charAt(i));
}
}
}
return t.toString();
}
2019-02-18 17:03:51 +01:00
private static void initKanaMap() {
2019-02-18 14:15:08 +01:00
katakanaMap.put("", "a");
katakanaMap.put("", "i");
katakanaMap.put("", "u");
katakanaMap.put("", "e");
katakanaMap.put("", "o");
katakanaMap.put("", "ka");
katakanaMap.put("", "ki");
katakanaMap.put("", "ku");
katakanaMap.put("", "ke");
katakanaMap.put("", "ko");
katakanaMap.put("", "sa");
katakanaMap.put("", "shi");
katakanaMap.put("", "su");
katakanaMap.put("", "se");
katakanaMap.put("", "so");
katakanaMap.put("", "ta");
katakanaMap.put("", "chi");
katakanaMap.put("", "tsu");
katakanaMap.put("", "te");
katakanaMap.put("", "to");
katakanaMap.put("", "na");
katakanaMap.put("", "ni");
katakanaMap.put("", "nu");
katakanaMap.put("", "ne");
katakanaMap.put("", "no");
katakanaMap.put("", "ha");
katakanaMap.put("", "hi");
katakanaMap.put("", "fu");
katakanaMap.put("", "he");
katakanaMap.put("", "ho");
katakanaMap.put("", "ma");
katakanaMap.put("", "mi");
katakanaMap.put("", "mu");
katakanaMap.put("", "me");
katakanaMap.put("", "mo");
katakanaMap.put("", "ya");
katakanaMap.put("", "yu");
katakanaMap.put("", "yo");
katakanaMap.put("", "ra");
katakanaMap.put("", "ri");
katakanaMap.put("", "ru");
katakanaMap.put("", "re");
katakanaMap.put("", "ro");
katakanaMap.put("", "wa");
katakanaMap.put("", "wo");
katakanaMap.put("", "n");
katakanaMap.put("", "ga");
katakanaMap.put("", "gi");
katakanaMap.put("", "gu");
katakanaMap.put("", "ge");
katakanaMap.put("", "go");
katakanaMap.put("", "za");
katakanaMap.put("", "ji");
katakanaMap.put("", "zu");
katakanaMap.put("", "ze");
katakanaMap.put("", "zo");
katakanaMap.put("", "da");
katakanaMap.put("", "ji");
katakanaMap.put("", "zu");
katakanaMap.put("", "de");
katakanaMap.put("", "do");
katakanaMap.put("", "ba");
katakanaMap.put("", "bi");
katakanaMap.put("", "bu");
katakanaMap.put("", "be");
katakanaMap.put("", "bo");
katakanaMap.put("", "pa");
katakanaMap.put("", "pi");
katakanaMap.put("", "pu");
katakanaMap.put("", "pe");
katakanaMap.put("", "po");
katakanaMap.put("キャ", "kya");
katakanaMap.put("キュ", "kyu");
katakanaMap.put("キョ", "kyo");
katakanaMap.put("シャ", "sha");
katakanaMap.put("シュ", "shu");
katakanaMap.put("ショ", "sho");
katakanaMap.put("チャ", "cha");
katakanaMap.put("チュ", "chu");
katakanaMap.put("チョ", "cho");
katakanaMap.put("ニャ", "nya");
katakanaMap.put("ニュ", "nyu");
katakanaMap.put("ニョ", "nyo");
katakanaMap.put("ヒャ", "hya");
katakanaMap.put("ヒュ", "hyu");
katakanaMap.put("ヒョ", "hyo");
katakanaMap.put("リャ", "rya");
katakanaMap.put("リュ", "ryu");
katakanaMap.put("リョ", "ryo");
katakanaMap.put("ギャ", "gya");
katakanaMap.put("ギュ", "gyu");
katakanaMap.put("ギョ", "gyo");
katakanaMap.put("ジャ", "ja");
katakanaMap.put("ジュ", "ju");
katakanaMap.put("ジョ", "jo");
katakanaMap.put("ティ", "ti");
katakanaMap.put("ディ", "di");
katakanaMap.put("ツィ", "tsi");
katakanaMap.put("ヂャ", "dya");
katakanaMap.put("ヂュ", "dyu");
katakanaMap.put("ヂョ", "dyo");
katakanaMap.put("ビャ", "bya");
katakanaMap.put("ビュ", "byu");
katakanaMap.put("ビョ", "byo");
katakanaMap.put("ピャ", "pya");
katakanaMap.put("ピュ", "pyu");
katakanaMap.put("ピョ", "pyo");
katakanaMap.put("", "-");
2019-02-18 13:02:38 +01:00
}
}