Merge pull request #6569 from osmandapp/jpn_translit

Jpn translit
This commit is contained in:
Alexey 2019-02-20 19:55:02 +03:00 committed by GitHub
commit aeab7bc30b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 300 additions and 30 deletions

View file

@ -82,7 +82,6 @@ dependencies {
testImplementation 'junit:junit:4.12'
testImplementation 'com.google.code.gson:gson:2.8.2'
testImplementation 'org.hamcrest:hamcrest-core:1.3'
implementation group: 'commons-logging', name: 'commons-logging', version: '1.2'
implementation group: 'org.json', name: 'json', version: '20171018'
implementation 'it.unibo.alice.tuprolog:tuprolog:3.2.1'
@ -90,10 +89,8 @@ dependencies {
implementation 'org.apache.commons:commons-compress:1.17'
implementation 'com.moparisthebest:junidecode:0.1.1'
implementation 'com.vividsolutions:jts-core:1.14.0'
implementation 'com.atilika.kuromoji:kuromoji-ipadic:0.9.0'
implementation 'net.sf.kxml:kxml2:2.1.8'
implementation fileTree(dir: "libs", include: ["*.jar"])
implementation fileTree(include: ['*.jar'], dir: 'libs')
}

View file

@ -28,7 +28,7 @@ import net.osmand.data.MapObject;
import net.osmand.data.Postcode;
import net.osmand.data.Street;
import net.osmand.util.MapUtils;
import net.sf.junidecode.Junidecode;
import net.osmand.util.TransliterationHelper;
import org.apache.commons.logging.Log;
@ -152,7 +152,7 @@ public class BinaryMapAddressReaderAdapter {
switch (tag) {
case 0:
if (region.enName == null || region.enName.length() == 0) {
region.enName = region.name == null ? "" : Junidecode.unidecode(region.name);
region.enName = region.name == null ? "" : TransliterationHelper.transliterate(region.name);
}
return;
case OsmandOdb.OsmAndAddressIndex.NAME_FIELD_NUMBER:

View file

@ -17,7 +17,7 @@ import net.osmand.data.TransportStopExit;
import net.osmand.osm.edit.Node;
import net.osmand.osm.edit.Way;
import net.osmand.util.MapUtils;
import net.sf.junidecode.Junidecode;
import net.osmand.util.TransliterationHelper;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.WireFormat;
@ -435,7 +435,7 @@ public class BinaryMapTransportReaderAdapter {
dataObject.setEnName(stringTable.get(dataObject.getEnName(false).charAt(0)));
}
if(dataObject.getName().length() > 0 && dataObject.getName("en").length() == 0){
dataObject.setEnName(Junidecode.unidecode(dataObject.getName()));
dataObject.setEnName(TransliterationHelper.transliterate(dataObject.getName()));
}
if(dataObject.getOperator() != null && dataObject.getOperator().length() > 0){
dataObject.setOperator(stringTable.get(dataObject.getOperator().charAt(0)));
@ -547,7 +547,7 @@ public class BinaryMapTransportReaderAdapter {
case 0:
dataObject.setReferencesToRoutes(req.cacheTypes.toArray());
if(dataObject.getName("en").length() == 0){
dataObject.setEnName(Junidecode.unidecode(dataObject.getName()));
dataObject.setEnName(TransliterationHelper.transliterate(dataObject.getName()));
}
return dataObject;
case OsmandOdb.TransportStop.ROUTES_FIELD_NUMBER :
@ -611,7 +611,7 @@ public class BinaryMapTransportReaderAdapter {
switch (tag) {
case 0:
if (dataObject.getName("en").length() == 0) {
dataObject.setEnName(Junidecode.unidecode(dataObject.getName()));
dataObject.setEnName(TransliterationHelper.transliterate(dataObject.getName()));
}
if (x != 0 || y != 0) {
dataObject.setLocation(BinaryMapIndexReader.TRANSPORT_STOP_ZOOM, x, y);

View file

@ -11,7 +11,7 @@ import net.osmand.binary.BinaryMapRouteReaderAdapter.RouteRegion;
import net.osmand.binary.BinaryMapRouteReaderAdapter.RouteTypeRule;
import net.osmand.util.Algorithms;
import net.osmand.util.MapUtils;
import net.sf.junidecode.Junidecode;
import net.osmand.util.TransliterationHelper;
public class RouteDataObject {
@ -261,7 +261,7 @@ public class RouteDataObject {
}
String nmDef = names.get(region.nameTypeRule);
if(transliterate && nmDef != null && nmDef.length() > 0) {
return Junidecode.unidecode(nmDef);
return TransliterationHelper.transliterate(nmDef);
}
return nmDef;
}
@ -295,7 +295,7 @@ public class RouteDataObject {
}
String refDefault = names.get(region.refTypeRule);
if(transliterate && refDefault != null && refDefault.length() > 0) {
return Junidecode.unidecode(refDefault);
return TransliterationHelper.transliterate(refDefault);
}
return refDefault;
}
@ -358,13 +358,13 @@ public class RouteDataObject {
int k = kt[i];
if(region.routeEncodingRules.size() > k) {
if(!Algorithms.isEmpty(lang) && destinationTagLangFB.equals(region.routeEncodingRules.get(k).getTag())) {
return destRef1 + ((transliterate) ? Junidecode.unidecode(names.get(k)) : names.get(k));
return destRef1 + ((transliterate) ? TransliterationHelper.transliterate(names.get(k)) : names.get(k));
}
if(destinationTagFB.equals(region.routeEncodingRules.get(k).getTag())) {
return destRef1 + ((transliterate) ? Junidecode.unidecode(names.get(k)) : names.get(k));
return destRef1 + ((transliterate) ? TransliterationHelper.transliterate(names.get(k)) : names.get(k));
}
if(!Algorithms.isEmpty(lang) && destinationTagLang.equals(region.routeEncodingRules.get(k).getTag())) {
return destRef1 + ((transliterate) ? Junidecode.unidecode(names.get(k)) : names.get(k));
return destRef1 + ((transliterate) ? TransliterationHelper.transliterate(names.get(k)) : names.get(k));
}
if(destinationTagDefault.equals(region.routeEncodingRules.get(k).getTag())) {
destinationDefault = names.get(k);
@ -372,7 +372,7 @@ public class RouteDataObject {
}
}
if(destinationDefault != null) {
return destRef1 + ((transliterate) ? Junidecode.unidecode(destinationDefault) : destinationDefault);
return destRef1 + ((transliterate) ? TransliterationHelper.transliterate(destinationDefault) : destinationDefault);
}
}
return "".equals(destRef) ? null : destRef;

View file

@ -4,7 +4,7 @@ package net.osmand.data;
import net.osmand.Collator;
import net.osmand.OsmAndCollator;
import net.osmand.util.Algorithms;
import net.sf.junidecode.Junidecode;
import net.osmand.util.TransliterationHelper;
import org.json.JSONObject;
@ -182,7 +182,7 @@ public abstract class MapObject implements Comparable<MapObject> {
return nm;
}
if (transliterate) {
return Junidecode.unidecode(getName());
return TransliterationHelper.transliterate(getName());
}
}
}
@ -194,7 +194,7 @@ public abstract class MapObject implements Comparable<MapObject> {
if (!Algorithms.isEmpty(enName)) {
return this.enName;
} else if (!Algorithms.isEmpty(getName()) && transliterate) {
return Junidecode.unidecode(getName());
return TransliterationHelper.transliterate(getName());
}
return ""; //$NON-NLS-1$
}

View file

@ -0,0 +1,243 @@
package net.osmand.util;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.atilika.kuromoji.ipadic.Token;
import com.atilika.kuromoji.ipadic.Tokenizer;
import net.osmand.PlatformUtil;
import net.sf.junidecode.Junidecode;
import org.apache.commons.logging.Log;
public class TransliterationHelper {
public final static Log LOG = PlatformUtil.getLog(TransliterationHelper.class);
public final static String DEFAULT = "default";
public final static String JAPAN = "Japan";
private static String countryName = DEFAULT;
private static Tokenizer tokenizer;
private static Map<String, String> katakanaMap = new HashMap<>();
private TransliterationHelper() {
}
public static void setCountryName(String countryName) {
if (!countryName.equals(TransliterationHelper.countryName)) {
switch (countryName) {
case "Japan": {
TransliterationHelper.countryName = JAPAN;
break;
}
default:
TransliterationHelper.countryName = DEFAULT;
break;
}
}
}
public static String getCountryName() {
return countryName;
}
public static String transliterate(String text) {
switch (countryName) {
case DEFAULT:
return Junidecode.unidecode(text);
case JAPAN:
return japanese2Romaji(text);
}
return text;
}
private static String japanese2Romaji(String text) {
if (tokenizer == null) {
tokenizer = new Tokenizer();
}
boolean capitalizeWords = true;
List<Token> tokens = tokenizer.tokenize(text);
StringBuilder builder = new StringBuilder();
if (katakanaMap.isEmpty()) {
initKanaMap();
}
for (Token token : tokens) {
String type = token.getAllFeaturesArray()[1];
if (token.getAllFeaturesArray()[0].equals("記号")) {
builder.append(token.getSurface());
continue;
}
switch (token.getAllFeaturesArray()[1]) {
case "":
case "アルファベット":
case "サ変接続":
builder.append(token.getSurface());
continue;
default:
String lastFeature = token.getAllFeaturesArray()[8];
if (lastFeature.equals("*")) {
builder.append(token.getSurface());
} else {
String romaji = convertKanaToRomaji(token.getAllFeaturesArray()[8]);
if (capitalizeWords) {
builder.append(romaji.substring(0, 1).toUpperCase());
builder.append(romaji.substring(1));
} else {
if (token.getSurface()
.equals(token.getPronunciation())) {
romaji = romaji.toUpperCase();
}
builder.append(romaji);
}
}
}
builder.append(" ");
}
return builder.toString();
}
private static String convertKanaToRomaji(String s) {
StringBuilder t = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
if (i <= s.length() - 2) {
if (katakanaMap.containsKey(s.substring(i, i + 2))) {
t.append(katakanaMap.get(s.substring(i, i + 2)));
i++;
} else if (katakanaMap.containsKey(s.substring(i, i + 1))) {
t.append(katakanaMap.get(s.substring(i, i + 1)));
} else if (s.charAt(i) == 'ッ') {
t.append(katakanaMap.get(s.substring(i + 1, i + 2)).charAt(0));
} else {
t.append(s.charAt(i));
}
} else {
if (katakanaMap.containsKey(s.substring(i, i + 1))) {
t.append(katakanaMap.get(s.substring(i, i + 1)));
} else {
t.append(s.charAt(i));
}
}
}
return t.toString();
}
private static void initKanaMap() {
katakanaMap.put("", "a");
katakanaMap.put("", "i");
katakanaMap.put("", "u");
katakanaMap.put("", "e");
katakanaMap.put("", "o");
katakanaMap.put("", "ka");
katakanaMap.put("", "ki");
katakanaMap.put("", "ku");
katakanaMap.put("", "ke");
katakanaMap.put("", "ko");
katakanaMap.put("", "sa");
katakanaMap.put("", "shi");
katakanaMap.put("", "su");
katakanaMap.put("", "se");
katakanaMap.put("", "so");
katakanaMap.put("", "ta");
katakanaMap.put("", "chi");
katakanaMap.put("", "tsu");
katakanaMap.put("", "te");
katakanaMap.put("", "to");
katakanaMap.put("", "na");
katakanaMap.put("", "ni");
katakanaMap.put("", "nu");
katakanaMap.put("", "ne");
katakanaMap.put("", "no");
katakanaMap.put("", "ha");
katakanaMap.put("", "hi");
katakanaMap.put("", "fu");
katakanaMap.put("", "he");
katakanaMap.put("", "ho");
katakanaMap.put("", "ma");
katakanaMap.put("", "mi");
katakanaMap.put("", "mu");
katakanaMap.put("", "me");
katakanaMap.put("", "mo");
katakanaMap.put("", "ya");
katakanaMap.put("", "yu");
katakanaMap.put("", "yo");
katakanaMap.put("", "ra");
katakanaMap.put("", "ri");
katakanaMap.put("", "ru");
katakanaMap.put("", "re");
katakanaMap.put("", "ro");
katakanaMap.put("", "wa");
katakanaMap.put("", "wo");
katakanaMap.put("", "n");
katakanaMap.put("", "ga");
katakanaMap.put("", "gi");
katakanaMap.put("", "gu");
katakanaMap.put("", "ge");
katakanaMap.put("", "go");
katakanaMap.put("", "za");
katakanaMap.put("", "ji");
katakanaMap.put("", "zu");
katakanaMap.put("", "ze");
katakanaMap.put("", "zo");
katakanaMap.put("", "da");
katakanaMap.put("", "ji");
katakanaMap.put("", "zu");
katakanaMap.put("", "de");
katakanaMap.put("", "do");
katakanaMap.put("", "ba");
katakanaMap.put("", "bi");
katakanaMap.put("", "bu");
katakanaMap.put("", "be");
katakanaMap.put("", "bo");
katakanaMap.put("", "pa");
katakanaMap.put("", "pi");
katakanaMap.put("", "pu");
katakanaMap.put("", "pe");
katakanaMap.put("", "po");
katakanaMap.put("キャ", "kya");
katakanaMap.put("キュ", "kyu");
katakanaMap.put("キョ", "kyo");
katakanaMap.put("シャ", "sha");
katakanaMap.put("シュ", "shu");
katakanaMap.put("ショ", "sho");
katakanaMap.put("チャ", "cha");
katakanaMap.put("チュ", "chu");
katakanaMap.put("チョ", "cho");
katakanaMap.put("ニャ", "nya");
katakanaMap.put("ニュ", "nyu");
katakanaMap.put("ニョ", "nyo");
katakanaMap.put("ヒャ", "hya");
katakanaMap.put("ヒュ", "hyu");
katakanaMap.put("ヒョ", "hyo");
katakanaMap.put("リャ", "rya");
katakanaMap.put("リュ", "ryu");
katakanaMap.put("リョ", "ryo");
katakanaMap.put("ギャ", "gya");
katakanaMap.put("ギュ", "gyu");
katakanaMap.put("ギョ", "gyo");
katakanaMap.put("ジャ", "ja");
katakanaMap.put("ジュ", "ju");
katakanaMap.put("ジョ", "jo");
katakanaMap.put("ティ", "ti");
katakanaMap.put("ディ", "di");
katakanaMap.put("ツィ", "tsi");
katakanaMap.put("ヂャ", "dya");
katakanaMap.put("ヂュ", "dyu");
katakanaMap.put("ヂョ", "dyo");
katakanaMap.put("ビャ", "bya");
katakanaMap.put("ビュ", "byu");
katakanaMap.put("ビョ", "byo");
katakanaMap.put("ピャ", "pya");
katakanaMap.put("ピュ", "pyu");
katakanaMap.put("ピョ", "pyo");
katakanaMap.put("", "-");
}
}

View file

@ -66,6 +66,12 @@ android {
warningsAsErrors false
}
packagingOptions {
exclude '/META-INF/CONTRIBUTORS.md'
exclude '/META-INF/LICENSE.md'
exclude '/META-INF/NOTICE.md'
}
// This is from OsmAndCore_android.aar - for some reason it's not inherited
aaptOptions {
// Don't compress any embedded resources
@ -391,7 +397,7 @@ dependencies {
implementation 'com.moparisthebest:junidecode:0.1.1'
implementation 'org.immutables:gson:2.5.0'
implementation 'com.vividsolutions:jts-core:1.14.0'
implementation 'com.atilika.kuromoji:kuromoji-ipadic:0.9.0'
implementation 'com.squareup.picasso:picasso:2.71828'
// JS core
implementation group: 'org.mozilla', name: 'rhino', version: '1.7.9'

View file

@ -18,7 +18,7 @@ import net.osmand.plus.OsmandApplication;
import net.osmand.plus.R;
import net.osmand.util.Algorithms;
import net.osmand.util.MapUtils;
import net.sf.junidecode.Junidecode;
import net.osmand.util.TransliterationHelper;
import org.apache.commons.logging.Log;
import org.xmlpull.v1.XmlPullParser;
@ -137,7 +137,7 @@ public class NominatimPoiFilter extends PoiUIFilter {
a.setId(Long.parseLong(parser.getAttributeValue("", "place_id"))); //$NON-NLS-1$ //$NON-NLS-2$
String name = parser.getAttributeValue("", "display_name"); //$NON-NLS-1$//$NON-NLS-2$
a.setName(name);
a.setEnName(Junidecode.unidecode(name));
a.setEnName(TransliterationHelper.transliterate(getName()));
a.setSubType(parser.getAttributeValue("", "type")); //$NON-NLS-1$//$NON-NLS-2$
PoiType pt = poiTypes.getPoiTypeByKey(a.getSubType());
a.setType(pt != null ? pt.getCategory() : poiTypes.getOtherPoiCategory());
@ -153,7 +153,7 @@ public class NominatimPoiFilter extends PoiUIFilter {
String name = parser.getText();
if (name != null) {
a.setName(name);
a.setEnName(Junidecode.unidecode(name));
a.setEnName(TransliterationHelper.transliterate(getName()));
}
}
}

View file

@ -1,6 +1,5 @@
package net.osmand.plus.render;
import gnu.trove.iterator.TIntObjectIterator;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TIntArrayList;
@ -56,6 +55,7 @@ import net.osmand.util.Algorithms;
import net.osmand.util.MapAlgorithms;
import net.osmand.util.MapUtils;
import net.osmand.util.TransliterationHelper;
import org.apache.commons.logging.Log;
import android.content.Context;
@ -300,6 +300,7 @@ public class MapRenderRepositories {
if(library == null) {
return;
}
boolean containsJapanMapData = false;
boolean useLive = context.getSettings().USE_OSM_LIVE_FOR_ROUTING.get();
for (String mapName : files.keySet()) {
BinaryMapIndexReader fr = files.get(mapName);
@ -313,8 +314,18 @@ public class MapRenderRepositories {
}
log.debug("Native resource " + mapName + " initialized " + (System.currentTimeMillis() - time) + " ms"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (fr.getCountryName().equals("Japan")) {
containsJapanMapData = true;
}
}
}
if (containsJapanMapData) {
TransliterationHelper.setCountryName("Japan");
} else {
TransliterationHelper.setCountryName("");
}
}
private void readRouteDataAsMapObjects(SearchRequest<BinaryMapDataObject> sr, BinaryMapIndexReader c,
@ -530,6 +541,7 @@ public class MapRenderRepositories {
}
MapIndex mi = null;
searchRequest = BinaryMapIndexReader.buildSearchRequest(leftX, rightX, topY, bottomY, zoom, searchFilter);
boolean containsJapanMapData = false;
for (BinaryMapIndexReader c : files.values()) {
boolean basemap = c.isBasemap();
searchRequest.clearSearchResults();
@ -546,6 +558,9 @@ public class MapRenderRepositories {
} else {
renderedState |= 2;
}
if (c.getCountryName().equals("Japan")) {
containsJapanMapData = true;
}
}
for (BinaryMapDataObject r : res) {
if (checkForDuplicateObjectIds && !basemap) {
@ -585,6 +600,12 @@ public class MapRenderRepositories {
land[0] = true;
}
}
if (containsJapanMapData) {
TransliterationHelper.setCountryName("Japan");
} else {
TransliterationHelper.setCountryName("");
}
return mi;
}

View file

@ -1,5 +1,6 @@
package net.osmand.plus.render;
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.procedure.TIntObjectProcedure;
@ -8,6 +9,7 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import net.osmand.PlatformUtil;
import net.osmand.binary.BinaryMapDataObject;
import net.osmand.binary.BinaryMapIndexReader.TagValuePair;
import net.osmand.data.QuadRect;
@ -16,7 +18,7 @@ import net.osmand.plus.render.OsmandRenderer.RenderingContext;
import net.osmand.render.RenderingRuleSearchRequest;
import net.osmand.render.RenderingRulesStorage;
import net.osmand.util.Algorithms;
import net.sf.junidecode.Junidecode;
import net.osmand.util.TransliterationHelper;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.Canvas;
@ -29,6 +31,7 @@ import android.graphics.PointF;
import android.graphics.Rect;
import android.graphics.RectF;
import android.graphics.Typeface;
import org.apache.commons.logging.Log;
public class TextRenderer {
@ -236,7 +239,7 @@ public class TextRenderer {
TextDrawInfo text = rc.textToDraw.get(i);
if (text.text != null && text.text.length() > 0) {
if (preferredLocale.length() > 0) {
text.text = Junidecode.unidecode(text.text);
text.text = TransliterationHelper.transliterate(text.text);
}
// sest text size before finding intersection (it is used there)