From 016320795c0a0ba066df4948c62154d8f487c794 Mon Sep 17 00:00:00 2001 From: Kseniia Date: Mon, 1 Feb 2021 19:31:47 +0200 Subject: [PATCH] fix search --- .../osmand/binary/BinaryMapIndexReader.java | 79 ++++++++++++++- .../binary/BinaryMapPoiReaderAdapter.java | 95 ++++++++++++++----- 2 files changed, 146 insertions(+), 28 deletions(-) diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java index 8743940a74..9fbf00ed4e 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java @@ -2161,7 +2161,7 @@ public class BinaryMapIndexReader { private static boolean testAddressSearch = false; private static boolean testAddressSearchName = false; private static boolean testAddressJustifySearch = false; - private static boolean testPoiSearch = false; + private static boolean testPoiSearch = true; private static boolean testPoiSearchOnPath = false; private static boolean testTransportSearch = true; @@ -2177,7 +2177,7 @@ public class BinaryMapIndexReader { public static void main(String[] args) throws IOException { File fl = new File(System.getProperty("maps") + "/Synthetic_test_rendering.obf"); - fl = new File("/home/madwasp79/OsmAnd-maps/Poly_center2.obf"); + fl = new File("/Users/plotva/work/osmand/maps/Wikivoyage.obf"); RandomAccessFile raf = new RandomAccessFile(fl, "r"); @@ -2325,7 +2325,7 @@ public class BinaryMapIndexReader { private static void testPoiSearchByName(BinaryMapIndexReader reader) throws IOException { println("Searching by name..."); - SearchRequest req = buildSearchPoiRequest(0, 0, "Art", + SearchRequest req = buildSearchPoiRequest(0, 0, "central ukraine", 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, null); reader.searchPoiByName(req); @@ -2385,6 +2385,79 @@ public class BinaryMapIndexReader { } + int readIndexedStringTable(Collator instance, String query, String prefix, HashMap map, int charMatches) throws IOException { + String key = null; + while (true) { + int t = codedIS.readTag(); + int tag = WireFormat.getTagFieldNumber(t); + switch (tag) { + case 0: + return charMatches; + case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : + key = codedIS.readString(); + if(prefix.length() > 0){ + key = prefix + key; + } + // check query is part of key (the best matching) + if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){ + if(query.length() >= charMatches){ + if(query.length() > charMatches){ + charMatches = query.length(); + map.clear(); + } + } else { + key = null; + } + // check key is part of query + } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) { + if (key.length() >= charMatches) { + if (key.length() > charMatches) { + charMatches = key.length(); + map.clear(); + } + } else { + key = null; + } + } else { + key = null; + } + break; + case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER: + int val = readInt(); + if (key != null) { + String[] words = query.split(" "); + String keyByWord = null; + for (String w : words) { + if (instance.equals(w.substring(0, charMatches), key)) { + keyByWord = w; + } + } + if (map.containsKey(keyByWord)) { + map.get(keyByWord).add(val); + } else { + TIntArrayList list = new TIntArrayList(); + list.add(val); + map.put(keyByWord, list); + } + } + break; + case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : + int len = codedIS.readRawVarint32(); + int oldLim = codedIS.pushLimit(len); + if (key != null) { + charMatches = readIndexedStringTable(instance, query, key, map, charMatches); + } else { + codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); + } + codedIS.popLimit(oldLim); + break; + default: + skipUnknownField(t); + break; + } + } + } + int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException { String key = null; while (true) { diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java index cb348cf9b4..190576454c 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java @@ -9,8 +9,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map.Entry; import net.osmand.Collator; import net.osmand.CollatorStringMatcher; @@ -293,6 +296,7 @@ public class BinaryMapPoiReaderAdapter { } protected void searchPoiByName(PoiRegion region, SearchRequest req) throws IOException { + HashMap offsetsMap = new HashMap<>(); TIntLongHashMap offsets = new TIntLongHashMap(); String query = normalizeSearchPoiByNameQuery(req.nameQuery); CollatorStringMatcher matcher = new CollatorStringMatcher(query, @@ -312,18 +316,48 @@ public class BinaryMapPoiReaderAdapter { int length = readInt(); int oldLimit = codedIS.pushLimit(length); // here offsets are sorted by distance - offsets = readPoiNameIndex(matcher.getCollator(), query, req); + offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req); codedIS.popLimit(oldLimit); break; case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER: // also offsets can be randomly skipped by limit Integer[] offKeys = new Integer[offsets.size()]; - if (offsets.size() > 0) { - int[] keys = offsets.keys(); - for (int i = 0; i < keys.length; i++) { - offKeys[i] = keys[i]; + if (offsetsMap.size() > 0) { + List> setKeys = new ArrayList<>(); + for (Entry item : offsetsMap.entrySet()) { + TIntLongHashMap sets = item.getValue(); + + Integer[] offKeysFinal = new Integer[sets.size()]; + int[] keys = sets.keys(); + for (int i = 0; i < keys.length; i++) { + offKeysFinal[i] = keys[i]; + } + offsets.putAll(sets); + HashSet generalSet = new HashSet(Arrays.asList(offKeysFinal)); + setKeys.add(generalSet); + } + HashSet firstSet = new HashSet(); + HashSet secondSet = new HashSet(); + HashSet finalSet = new HashSet(); + for (HashSet keySet : setKeys) { + if (setKeys.size() == 1) { + finalSet.addAll(keySet); + } else { + if (firstSet.size() == 0) { + firstSet.addAll(keySet); + } else { + secondSet.addAll(firstSet); + secondSet.retainAll(keySet); + finalSet.addAll(secondSet); + } + } } final TIntLongHashMap foffsets = offsets; + offKeys = finalSet.toArray(new Integer[finalSet.size()]); + for (Integer key : offKeys) { + foffsets.put(key, offsets.get(key)); + } + Arrays.sort(offKeys, new Comparator() { @Override public int compare(Integer object1, Integer object2) { @@ -332,7 +366,7 @@ public class BinaryMapPoiReaderAdapter { }); int p = BUCKET_SEARCH_BY_NAME * 3; if (p < offKeys.length) { - for (int i = p + BUCKET_SEARCH_BY_NAME; ; i += BUCKET_SEARCH_BY_NAME) { + for (int i = p + BUCKET_SEARCH_BY_NAME;; i += BUCKET_SEARCH_BY_NAME) { if (i > offKeys.length) { Arrays.sort(offKeys, p, offKeys.length); break; @@ -344,7 +378,6 @@ public class BinaryMapPoiReaderAdapter { } } - LOG.info("Searched poi structure in " + (System.currentTimeMillis() - time) + "ms. Found " + offKeys.length + " subtrees"); for (int j = 0; j < offKeys.length; j++) { @@ -368,42 +401,54 @@ public class BinaryMapPoiReaderAdapter { } } - private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { - TIntLongHashMap offsets = new TIntLongHashMap(); - TIntArrayList dataOffsets = null; + private HashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { + HashMap dataOffsetsMap = null; + HashMap offsetsMap = new HashMap<>(); int offset = 0; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); switch (tag) { case 0: - return offsets; + return offsetsMap; case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: { int length = readInt(); int oldLimit = codedIS.pushLimit(length); - dataOffsets = new TIntArrayList(); + dataOffsetsMap = new HashMap<>(); offset = codedIS.getTotalBytesRead(); - map.readIndexedStringTable(instance, query, "", dataOffsets, 0); + map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0); codedIS.popLimit(oldLimit); break; } case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: { - if (dataOffsets != null) { - dataOffsets.sort(); // 1104125 - for (int i = 0; i < dataOffsets.size(); i++) { - codedIS.seek(dataOffsets.get(i) + offset); - int len = codedIS.readRawVarint32(); - int oldLim = codedIS.pushLimit(len); - readPoiNameIndexData(offsets, req); - codedIS.popLimit(oldLim); - if (req.isCancelled()) { - codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsets; + offsetsMap = new HashMap<>(); + if (dataOffsetsMap != null) { + for (Entry item : dataOffsetsMap.entrySet()) { + TIntLongHashMap offsets = new TIntLongHashMap(); + TIntArrayList dataOffsets = item.getValue(); + String word = item.getKey(); + dataOffsets.sort(); // 1104125 + for (int i = 0; i < dataOffsets.size(); i++) { + codedIS.seek(dataOffsets.get(i) + offset); + int len = codedIS.readRawVarint32(); + int oldLim = codedIS.pushLimit(len); + readPoiNameIndexData(offsets, req); + codedIS.popLimit(oldLim); + + if (offsetsMap.containsKey(word)) { + offsetsMap.get(word).putAll(offsets); + } else { + TIntLongHashMap map = new TIntLongHashMap(); + map.putAll(offsets); + offsetsMap.put(word, map); + } } } + codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); + return offsetsMap; } codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsets; + return offsetsMap; } default: skipUnknownField(t);