Feature - japanese2latin transliteration - in progress

This commit is contained in:
madwasp79 2019-02-07 16:29:44 +02:00
parent 0377b79177
commit 360d55e33e
6 changed files with 100 additions and 2 deletions

Binary file not shown.

Binary file not shown.

View file

@ -523,4 +523,6 @@ public class AndroidUtils {
}
return isKana;
}
}

View file

@ -0,0 +1,89 @@
package net.osmand.plus.helpers;
import com.atilika.kuromoji.ipadic.Token;
import com.atilika.kuromoji.ipadic.Tokenizer;
import java.util.List;
public class CustomTransliterationHelper {
public static String japanese2Romaji(String input) {
boolean capitalizeWords = true;
boolean isDebug = true;
Tokenizer tokenizer = new Tokenizer() ;
List<Token> tokens = tokenizer.tokenize(input);
StringBuffer buffer = new StringBuffer();
KanaToRomaji kanaToRomaji = new KanaToRomaji();
String lastTokenToMerge = "";
for (Token token : tokens) {
String type = token.getAllFeaturesArray()[1];
if (isDebug) {
System.out.println("Type: " + type);
}
if( token.getAllFeaturesArray()[0].equals("記号")) {
buffer.append(token.getSurface());
continue;
}
switch(token.getAllFeaturesArray()[1]) {
case "":
case "アルファベット":
case "サ変接続":
buffer.append(token.getSurface());
//break;
continue;
default:
String lastFeature = token.getAllFeaturesArray()[8];
if (lastFeature.equals("*")) {
buffer.append(token.getSurface());
}
else {
String romaji = kanaToRomaji.convert(token.getAllFeaturesArray()[8]);
if ( lastFeature.endsWith("") ) {
lastTokenToMerge = lastFeature;
continue;
} else {
lastTokenToMerge = "";
}
if ( capitalizeWords == true ) {
buffer.append(romaji.substring(0, 1).toUpperCase());
buffer.append(romaji.substring(1));
} else {
// Convert foreign katakana words to uppercase
if(token.getSurface().equals(token.getPronunciation())) // detect katakana
romaji = romaji.toUpperCase();
buffer.append(romaji);
}
}
}
buffer.append(" ");
}
return buffer.toString();
}
public static boolean isCharCJK(final char c) {
if ((Character.UnicodeBlock.of(c) == Character.UnicodeBlock.HIRAGANA)
||(Character.UnicodeBlock.of(c) == Character.UnicodeBlock.KATAKANA)
||(Character.UnicodeBlock.of(c) == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS)
|| (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A)
|| (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B)
|| (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS)
|| (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS)
|| (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS)) {
return true;
}
return false;
}
public static void main(String[] args) {
System.out.println(japanese2Romaji("川名本町"));
System.out.println(japanese2Romaji("明岸寺"));
System.out.println(japanese2Romaji("つきしま"));
}
}

View file

@ -1,4 +1,4 @@
package net.osmand.plus;
package net.osmand.plus.helpers;
import java.util.HashMap;
import java.util.Map;

View file

@ -12,6 +12,7 @@ import net.osmand.binary.BinaryMapDataObject;
import net.osmand.binary.BinaryMapIndexReader.TagValuePair;
import net.osmand.data.QuadRect;
import net.osmand.data.QuadTree;
import net.osmand.plus.helpers.CustomTransliterationHelper;
import net.osmand.plus.render.OsmandRenderer.RenderingContext;
import net.osmand.render.RenderingRuleSearchRequest;
import net.osmand.render.RenderingRulesStorage;
@ -236,9 +237,15 @@ public class TextRenderer {
TextDrawInfo text = rc.textToDraw.get(i);
if (text.text != null && text.text.length() > 0) {
if (preferredLocale.length() > 0) {
text.text = Junidecode.unidecode(text.text);
if (CustomTransliterationHelper.isCharCJK(text.text.charAt(0))) {
text.text = CustomTransliterationHelper.japanese2Romaji(text.text);
} else {
text.text = Junidecode.unidecode(text.text);
}
}
// sest text size before finding intersection (it is used there)
float textSize = text.textSize * rc.textScale ;
paintText.setTextSize(textSize);