From 016320795c0a0ba066df4948c62154d8f487c794 Mon Sep 17 00:00:00 2001 From: Kseniia Date: Mon, 1 Feb 2021 19:31:47 +0200 Subject: [PATCH 1/4] fix search --- .../osmand/binary/BinaryMapIndexReader.java | 79 ++++++++++++++- .../binary/BinaryMapPoiReaderAdapter.java | 95 ++++++++++++++----- 2 files changed, 146 insertions(+), 28 deletions(-) diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java index 8743940a74..9fbf00ed4e 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java @@ -2161,7 +2161,7 @@ public class BinaryMapIndexReader { private static boolean testAddressSearch = false; private static boolean testAddressSearchName = false; private static boolean testAddressJustifySearch = false; - private static boolean testPoiSearch = false; + private static boolean testPoiSearch = true; private static boolean testPoiSearchOnPath = false; private static boolean testTransportSearch = true; @@ -2177,7 +2177,7 @@ public class BinaryMapIndexReader { public static void main(String[] args) throws IOException { File fl = new File(System.getProperty("maps") + "/Synthetic_test_rendering.obf"); - fl = new File("/home/madwasp79/OsmAnd-maps/Poly_center2.obf"); + fl = new File("/Users/plotva/work/osmand/maps/Wikivoyage.obf"); RandomAccessFile raf = new RandomAccessFile(fl, "r"); @@ -2325,7 +2325,7 @@ public class BinaryMapIndexReader { private static void testPoiSearchByName(BinaryMapIndexReader reader) throws IOException { println("Searching by name..."); - SearchRequest req = buildSearchPoiRequest(0, 0, "Art", + SearchRequest req = buildSearchPoiRequest(0, 0, "central ukraine", 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, null); reader.searchPoiByName(req); @@ -2385,6 +2385,79 @@ public class BinaryMapIndexReader { } + int readIndexedStringTable(Collator instance, String query, String prefix, HashMap map, int charMatches) throws IOException { + String key = null; + while (true) { + int t = codedIS.readTag(); + int tag = WireFormat.getTagFieldNumber(t); + switch (tag) { + case 0: + return charMatches; + case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : + key = codedIS.readString(); + if(prefix.length() > 0){ + key = prefix + key; + } + // check query is part of key (the best matching) + if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){ + if(query.length() >= charMatches){ + if(query.length() > charMatches){ + charMatches = query.length(); + map.clear(); + } + } else { + key = null; + } + // check key is part of query + } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) { + if (key.length() >= charMatches) { + if (key.length() > charMatches) { + charMatches = key.length(); + map.clear(); + } + } else { + key = null; + } + } else { + key = null; + } + break; + case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER: + int val = readInt(); + if (key != null) { + String[] words = query.split(" "); + String keyByWord = null; + for (String w : words) { + if (instance.equals(w.substring(0, charMatches), key)) { + keyByWord = w; + } + } + if (map.containsKey(keyByWord)) { + map.get(keyByWord).add(val); + } else { + TIntArrayList list = new TIntArrayList(); + list.add(val); + map.put(keyByWord, list); + } + } + break; + case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : + int len = codedIS.readRawVarint32(); + int oldLim = codedIS.pushLimit(len); + if (key != null) { + charMatches = readIndexedStringTable(instance, query, key, map, charMatches); + } else { + codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); + } + codedIS.popLimit(oldLim); + break; + default: + skipUnknownField(t); + break; + } + } + } + int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException { String key = null; while (true) { diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java index cb348cf9b4..190576454c 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java @@ -9,8 +9,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map.Entry; import net.osmand.Collator; import net.osmand.CollatorStringMatcher; @@ -293,6 +296,7 @@ public class BinaryMapPoiReaderAdapter { } protected void searchPoiByName(PoiRegion region, SearchRequest req) throws IOException { + HashMap offsetsMap = new HashMap<>(); TIntLongHashMap offsets = new TIntLongHashMap(); String query = normalizeSearchPoiByNameQuery(req.nameQuery); CollatorStringMatcher matcher = new CollatorStringMatcher(query, @@ -312,18 +316,48 @@ public class BinaryMapPoiReaderAdapter { int length = readInt(); int oldLimit = codedIS.pushLimit(length); // here offsets are sorted by distance - offsets = readPoiNameIndex(matcher.getCollator(), query, req); + offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req); codedIS.popLimit(oldLimit); break; case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER: // also offsets can be randomly skipped by limit Integer[] offKeys = new Integer[offsets.size()]; - if (offsets.size() > 0) { - int[] keys = offsets.keys(); - for (int i = 0; i < keys.length; i++) { - offKeys[i] = keys[i]; + if (offsetsMap.size() > 0) { + List> setKeys = new ArrayList<>(); + for (Entry item : offsetsMap.entrySet()) { + TIntLongHashMap sets = item.getValue(); + + Integer[] offKeysFinal = new Integer[sets.size()]; + int[] keys = sets.keys(); + for (int i = 0; i < keys.length; i++) { + offKeysFinal[i] = keys[i]; + } + offsets.putAll(sets); + HashSet generalSet = new HashSet(Arrays.asList(offKeysFinal)); + setKeys.add(generalSet); + } + HashSet firstSet = new HashSet(); + HashSet secondSet = new HashSet(); + HashSet finalSet = new HashSet(); + for (HashSet keySet : setKeys) { + if (setKeys.size() == 1) { + finalSet.addAll(keySet); + } else { + if (firstSet.size() == 0) { + firstSet.addAll(keySet); + } else { + secondSet.addAll(firstSet); + secondSet.retainAll(keySet); + finalSet.addAll(secondSet); + } + } } final TIntLongHashMap foffsets = offsets; + offKeys = finalSet.toArray(new Integer[finalSet.size()]); + for (Integer key : offKeys) { + foffsets.put(key, offsets.get(key)); + } + Arrays.sort(offKeys, new Comparator() { @Override public int compare(Integer object1, Integer object2) { @@ -332,7 +366,7 @@ public class BinaryMapPoiReaderAdapter { }); int p = BUCKET_SEARCH_BY_NAME * 3; if (p < offKeys.length) { - for (int i = p + BUCKET_SEARCH_BY_NAME; ; i += BUCKET_SEARCH_BY_NAME) { + for (int i = p + BUCKET_SEARCH_BY_NAME;; i += BUCKET_SEARCH_BY_NAME) { if (i > offKeys.length) { Arrays.sort(offKeys, p, offKeys.length); break; @@ -344,7 +378,6 @@ public class BinaryMapPoiReaderAdapter { } } - LOG.info("Searched poi structure in " + (System.currentTimeMillis() - time) + "ms. Found " + offKeys.length + " subtrees"); for (int j = 0; j < offKeys.length; j++) { @@ -368,42 +401,54 @@ public class BinaryMapPoiReaderAdapter { } } - private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { - TIntLongHashMap offsets = new TIntLongHashMap(); - TIntArrayList dataOffsets = null; + private HashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { + HashMap dataOffsetsMap = null; + HashMap offsetsMap = new HashMap<>(); int offset = 0; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); switch (tag) { case 0: - return offsets; + return offsetsMap; case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: { int length = readInt(); int oldLimit = codedIS.pushLimit(length); - dataOffsets = new TIntArrayList(); + dataOffsetsMap = new HashMap<>(); offset = codedIS.getTotalBytesRead(); - map.readIndexedStringTable(instance, query, "", dataOffsets, 0); + map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0); codedIS.popLimit(oldLimit); break; } case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: { - if (dataOffsets != null) { - dataOffsets.sort(); // 1104125 - for (int i = 0; i < dataOffsets.size(); i++) { - codedIS.seek(dataOffsets.get(i) + offset); - int len = codedIS.readRawVarint32(); - int oldLim = codedIS.pushLimit(len); - readPoiNameIndexData(offsets, req); - codedIS.popLimit(oldLim); - if (req.isCancelled()) { - codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsets; + offsetsMap = new HashMap<>(); + if (dataOffsetsMap != null) { + for (Entry item : dataOffsetsMap.entrySet()) { + TIntLongHashMap offsets = new TIntLongHashMap(); + TIntArrayList dataOffsets = item.getValue(); + String word = item.getKey(); + dataOffsets.sort(); // 1104125 + for (int i = 0; i < dataOffsets.size(); i++) { + codedIS.seek(dataOffsets.get(i) + offset); + int len = codedIS.readRawVarint32(); + int oldLim = codedIS.pushLimit(len); + readPoiNameIndexData(offsets, req); + codedIS.popLimit(oldLim); + + if (offsetsMap.containsKey(word)) { + offsetsMap.get(word).putAll(offsets); + } else { + TIntLongHashMap map = new TIntLongHashMap(); + map.putAll(offsets); + offsetsMap.put(word, map); + } } } + codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); + return offsetsMap; } codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsets; + return offsetsMap; } default: skipUnknownField(t); From 3cb9ab9eff8c35c951a0ca3db3c7e995f09849ab Mon Sep 17 00:00:00 2001 From: Victor Shcherb Date: Mon, 1 Feb 2021 19:47:20 +0100 Subject: [PATCH 2/4] Try to separate how we read & search poi by exact multiword --- .../binary/BinaryMapAddressReaderAdapter.java | 5 +- .../osmand/binary/BinaryMapIndexReader.java | 147 ++++++------------ .../binary/BinaryMapPoiReaderAdapter.java | 120 ++++++-------- 3 files changed, 100 insertions(+), 172 deletions(-) diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java index 18f7b2549e..8722c96acb 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java @@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter { indexOffset = codedIS.getTotalBytesRead(); int oldLimit = codedIS.pushLimit(length); // here offsets are sorted by distance - map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0); + TIntArrayList charsList = new TIntArrayList(); + charsList.add(0); + map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList); codedIS.popLimit(oldLimit); break; case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER: diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java index 9fbf00ed4e..d00bbcd09f 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java @@ -2385,127 +2385,76 @@ public class BinaryMapIndexReader { } - int readIndexedStringTable(Collator instance, String query, String prefix, HashMap map, int charMatches) throws IOException { + void readIndexedStringTable(Collator instance, List queries, String prefix, List listOffsets, TIntArrayList charMatchesList) throws IOException { String key = null; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); switch (tag) { case 0: - return charMatches; + return; case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : key = codedIS.readString(); - if(prefix.length() > 0){ + if (prefix.length() > 0) { key = prefix + key; } - // check query is part of key (the best matching) - if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){ - if(query.length() >= charMatches){ - if(query.length() > charMatches){ - charMatches = query.length(); - map.clear(); + for (int i = 0; i < queries.size(); i++) { + int charMatches = charMatchesList.get(i); + if (charMatches < 0) { + continue; + } + String query = queries.get(i); + // check query is part of key (the best matching) + if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { + if (query.length() >= charMatches) { + if (query.length() > charMatches) { + charMatchesList.set(i, query.length()); + listOffsets.get(i).clear(); + } + } else { + charMatchesList.set(i, -1); + } + // check key is part of query + } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { + if (key.length() >= charMatches) { + if (key.length() > charMatches) { + charMatchesList.set(i, key.length()); + listOffsets.get(i).clear(); + } + } else { + charMatchesList.set(i, -1); } } else { - key = null; + charMatchesList.set(i, -1); } - // check key is part of query - } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) { - if (key.length() >= charMatches) { - if (key.length() > charMatches) { - charMatches = key.length(); - map.clear(); - } - } else { - key = null; - } - } else { - key = null; - } - break; - case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER: - int val = readInt(); - if (key != null) { - String[] words = query.split(" "); - String keyByWord = null; - for (String w : words) { - if (instance.equals(w.substring(0, charMatches), key)) { - keyByWord = w; - } - } - if (map.containsKey(keyByWord)) { - map.get(keyByWord).add(val); - } else { - TIntArrayList list = new TIntArrayList(); - list.add(val); - map.put(keyByWord, list); - } - } - break; - case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : - int len = codedIS.readRawVarint32(); - int oldLim = codedIS.pushLimit(len); - if (key != null) { - charMatches = readIndexedStringTable(instance, query, key, map, charMatches); - } else { - codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - } - codedIS.popLimit(oldLim); - break; - default: - skipUnknownField(t); - break; - } - } - } - - int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException { - String key = null; - while (true) { - int t = codedIS.readTag(); - int tag = WireFormat.getTagFieldNumber(t); - switch (tag) { - case 0: - return charMatches; - case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : - key = codedIS.readString(); - if(prefix.length() > 0){ - key = prefix + key; - } - // check query is part of key (the best matching) - if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)){ - if(query.length() >= charMatches){ - if(query.length() > charMatches){ - charMatches = query.length(); - list.clear(); - } - } else { - key = null; - } - // check key is part of query - } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { - if (key.length() >= charMatches) { - if (key.length() > charMatches) { - charMatches = key.length(); - list.clear(); - } - } else { - key = null; - } - } else { - key = null; } break; case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER : int val = readInt(); - if (key != null) { - list.add(val); + for (int i = 0; i < queries.size(); i++) { + if (charMatchesList.get(i) >= 0) { + listOffsets.get(i).add(val); + } } break; case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : int len = codedIS.readRawVarint32(); int oldLim = codedIS.pushLimit(len); - if (key != null) { - charMatches = readIndexedStringTable(instance, query, key, list, charMatches); + boolean shouldWeReadSubtable = false; + for (int i = 0; i < queries.size(); i++) { + if (charMatchesList.get(i) >= 0) { + shouldWeReadSubtable = true; + } + } + if (shouldWeReadSubtable && key != null) { + TIntArrayList subcharMatchesList = new TIntArrayList(charMatchesList); + readIndexedStringTable(instance, queries, key, listOffsets, subcharMatchesList); + // looks like true + for (int i = 0; i < queries.size(); i++) { + if (subcharMatchesList.get(i) >= charMatchesList.get(i)) { + charMatchesList.set(i, subcharMatchesList.get(i)); + } + } } else { codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); } diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java index 190576454c..1b959a8cf9 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java @@ -1,20 +1,21 @@ package net.osmand.binary; -import gnu.trove.list.array.TIntArrayList; -import gnu.trove.map.hash.TIntLongHashMap; -import gnu.trove.set.hash.TLongHashSet; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedList; import java.util.List; -import java.util.Map.Entry; +import org.apache.commons.logging.Log; + +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.WireFormat; + +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntLongHashMap; +import gnu.trove.set.hash.TLongHashSet; import net.osmand.Collator; import net.osmand.CollatorStringMatcher; import net.osmand.CollatorStringMatcher.StringMatcherMode; @@ -29,11 +30,6 @@ import net.osmand.osm.MapPoiTypes; import net.osmand.osm.PoiCategory; import net.osmand.util.MapUtils; -import org.apache.commons.logging.Log; - -import com.google.protobuf.CodedInputStream; -import com.google.protobuf.WireFormat; - public class BinaryMapPoiReaderAdapter { private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class); @@ -41,7 +37,7 @@ public class BinaryMapPoiReaderAdapter { private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1; private static final int ZOOM_TO_SKIP_FILTER_READ = 6; private static final int ZOOM_TO_SKIP_FILTER = 3; - private static final int BUCKET_SEARCH_BY_NAME = 5; + private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100? public static class PoiSubType { public boolean text; @@ -296,7 +292,6 @@ public class BinaryMapPoiReaderAdapter { } protected void searchPoiByName(PoiRegion region, SearchRequest req) throws IOException { - HashMap offsetsMap = new HashMap<>(); TIntLongHashMap offsets = new TIntLongHashMap(); String query = normalizeSearchPoiByNameQuery(req.nameQuery); CollatorStringMatcher matcher = new CollatorStringMatcher(query, @@ -316,48 +311,18 @@ public class BinaryMapPoiReaderAdapter { int length = readInt(); int oldLimit = codedIS.pushLimit(length); // here offsets are sorted by distance - offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req); + offsets = readPoiNameIndex(matcher.getCollator(), query, req); codedIS.popLimit(oldLimit); break; case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER: // also offsets can be randomly skipped by limit Integer[] offKeys = new Integer[offsets.size()]; - if (offsetsMap.size() > 0) { - List> setKeys = new ArrayList<>(); - for (Entry item : offsetsMap.entrySet()) { - TIntLongHashMap sets = item.getValue(); - - Integer[] offKeysFinal = new Integer[sets.size()]; - int[] keys = sets.keys(); - for (int i = 0; i < keys.length; i++) { - offKeysFinal[i] = keys[i]; - } - offsets.putAll(sets); - HashSet generalSet = new HashSet(Arrays.asList(offKeysFinal)); - setKeys.add(generalSet); - } - HashSet firstSet = new HashSet(); - HashSet secondSet = new HashSet(); - HashSet finalSet = new HashSet(); - for (HashSet keySet : setKeys) { - if (setKeys.size() == 1) { - finalSet.addAll(keySet); - } else { - if (firstSet.size() == 0) { - firstSet.addAll(keySet); - } else { - secondSet.addAll(firstSet); - secondSet.retainAll(keySet); - finalSet.addAll(secondSet); - } - } + if (offsets.size() > 0) { + int[] keys = offsets.keys(); + for (int i = 0; i < keys.length; i++) { + offKeys[i] = keys[i]; } final TIntLongHashMap foffsets = offsets; - offKeys = finalSet.toArray(new Integer[finalSet.size()]); - for (Integer key : offKeys) { - foffsets.put(key, offsets.get(key)); - } - Arrays.sort(offKeys, new Comparator() { @Override public int compare(Integer object1, Integer object2) { @@ -401,54 +366,65 @@ public class BinaryMapPoiReaderAdapter { } } - private HashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { - HashMap dataOffsetsMap = null; - HashMap offsetsMap = new HashMap<>(); + private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { + TIntLongHashMap offsets = new TIntLongHashMap(); + List listOffsets = null; + List listOfSepOffsets = new ArrayList(); int offset = 0; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); switch (tag) { case 0: - return offsetsMap; + return offsets; case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: { int length = readInt(); int oldLimit = codedIS.pushLimit(length); - dataOffsetsMap = new HashMap<>(); offset = codedIS.getTotalBytesRead(); - map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0); + List queries = Arrays.asList(query.split(" ")); + TIntArrayList charsList = new TIntArrayList(queries.size()); + listOffsets = new ArrayList(queries.size()); + while(listOffsets.size() < queries.size()) { + charsList.add(0); + listOffsets.add(new TIntArrayList()); + } + map.readIndexedStringTable(instance, queries, "", listOffsets, charsList); codedIS.popLimit(oldLimit); break; } case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: { - offsetsMap = new HashMap<>(); - if (dataOffsetsMap != null) { - for (Entry item : dataOffsetsMap.entrySet()) { - TIntLongHashMap offsets = new TIntLongHashMap(); - TIntArrayList dataOffsets = item.getValue(); - String word = item.getKey(); + if (listOffsets != null) { + for (TIntArrayList dataOffsets : listOffsets) { + TIntLongHashMap offsetMap = new TIntLongHashMap(); + listOfSepOffsets.add(offsetMap); dataOffsets.sort(); // 1104125 for (int i = 0; i < dataOffsets.size(); i++) { codedIS.seek(dataOffsets.get(i) + offset); int len = codedIS.readRawVarint32(); int oldLim = codedIS.pushLimit(len); - readPoiNameIndexData(offsets, req); + readPoiNameIndexData(offsetMap, req); codedIS.popLimit(oldLim); - - if (offsetsMap.containsKey(word)) { - offsetsMap.get(word).putAll(offsets); - } else { - TIntLongHashMap map = new TIntLongHashMap(); - map.putAll(offsets); - offsetsMap.put(word, map); + if (req.isCancelled()) { + codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); + return offsets; + } + } + } + } + if (listOfSepOffsets.size() > 0) { + offsets.putAll(listOfSepOffsets.get(0)); + for (int j = 1; j < listOfSepOffsets.size(); j++) { + TIntLongHashMap mp = listOfSepOffsets.get(j); + // offsets.retainAll(mp); -- calculate intresection of mp & offsets + for (int chKey : offsets.keys()) { + if (!mp.containsKey(chKey)) { + offsets.remove(chKey); } } } - codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsetsMap; } codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsetsMap; + return offsets; } default: skipUnknownField(t); From 816ed5722b2ec8c629dadb85eb150da80a7f4321 Mon Sep 17 00:00:00 2001 From: Victor Shcherb Date: Mon, 1 Feb 2021 19:49:06 +0100 Subject: [PATCH 3/4] Don't allow empty words --- .../java/net/osmand/binary/BinaryMapPoiReaderAdapter.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java index 1b959a8cf9..fd6fe7bce9 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java @@ -381,7 +381,12 @@ public class BinaryMapPoiReaderAdapter { int length = readInt(); int oldLimit = codedIS.pushLimit(length); offset = codedIS.getTotalBytesRead(); - List queries = Arrays.asList(query.split(" ")); + List queries = new ArrayList<>(); + for (String word : query.split(" ")) { + if (word.trim().length() > 0) { + queries.add(word.trim()); + } + } TIntArrayList charsList = new TIntArrayList(queries.size()); listOffsets = new ArrayList(queries.size()); while(listOffsets.size() < queries.size()) { From 8a5ff112642e25c1dbcc11804602ad454ea46c69 Mon Sep 17 00:00:00 2001 From: Victor Shcherb Date: Thu, 4 Feb 2021 19:24:44 +0100 Subject: [PATCH 4/4] Fix search --- .../osmand/binary/BinaryMapIndexReader.java | 52 +++++++++---------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java index d00bbcd09f..dc16b89f30 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java @@ -2163,7 +2163,7 @@ public class BinaryMapIndexReader { private static boolean testAddressJustifySearch = false; private static boolean testPoiSearch = true; private static boolean testPoiSearchOnPath = false; - private static boolean testTransportSearch = true; + private static boolean testTransportSearch = false; private static int sleft = MapUtils.get31TileNumberX(27.55079); private static int sright = MapUtils.get31TileNumberX(27.55317); @@ -2177,7 +2177,7 @@ public class BinaryMapIndexReader { public static void main(String[] args) throws IOException { File fl = new File(System.getProperty("maps") + "/Synthetic_test_rendering.obf"); - fl = new File("/Users/plotva/work/osmand/maps/Wikivoyage.obf"); + fl = new File(System.getProperty("maps") +"/Wikivoyage.obf__"); RandomAccessFile raf = new RandomAccessFile(fl, "r"); @@ -2385,8 +2385,10 @@ public class BinaryMapIndexReader { } - void readIndexedStringTable(Collator instance, List queries, String prefix, List listOffsets, TIntArrayList charMatchesList) throws IOException { + void readIndexedStringTable(Collator instance, List queries, String prefix, List listOffsets, TIntArrayList matchedCharacters) throws IOException { String key = null; + boolean[] matched = new boolean[matchedCharacters.size()]; + boolean shouldWeReadSubtable = false; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); @@ -2398,41 +2400,41 @@ public class BinaryMapIndexReader { if (prefix.length() > 0) { key = prefix + key; } + shouldWeReadSubtable = false; for (int i = 0; i < queries.size(); i++) { - int charMatches = charMatchesList.get(i); - if (charMatches < 0) { + int charMatches = matchedCharacters.get(i); + String query = queries.get(i); + matched[i] = false; + if (query == null) { continue; } - String query = queries.get(i); + // check query is part of key (the best matching) if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { if (query.length() >= charMatches) { if (query.length() > charMatches) { - charMatchesList.set(i, query.length()); + matchedCharacters.set(i, query.length()); listOffsets.get(i).clear(); } - } else { - charMatchesList.set(i, -1); + matched[i] = true; } // check key is part of query } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { if (key.length() >= charMatches) { if (key.length() > charMatches) { - charMatchesList.set(i, key.length()); + matchedCharacters.set(i, key.length()); listOffsets.get(i).clear(); } - } else { - charMatchesList.set(i, -1); + matched[i] = true; } - } else { - charMatchesList.set(i, -1); } + shouldWeReadSubtable |= matched[i]; } break; case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER : int val = readInt(); for (int i = 0; i < queries.size(); i++) { - if (charMatchesList.get(i) >= 0) { + if (matched[i]) { listOffsets.get(i).add(val); } } @@ -2440,21 +2442,15 @@ public class BinaryMapIndexReader { case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : int len = codedIS.readRawVarint32(); int oldLim = codedIS.pushLimit(len); - boolean shouldWeReadSubtable = false; - for (int i = 0; i < queries.size(); i++) { - if (charMatchesList.get(i) >= 0) { - shouldWeReadSubtable = true; - } - } if (shouldWeReadSubtable && key != null) { - TIntArrayList subcharMatchesList = new TIntArrayList(charMatchesList); - readIndexedStringTable(instance, queries, key, listOffsets, subcharMatchesList); - // looks like true - for (int i = 0; i < queries.size(); i++) { - if (subcharMatchesList.get(i) >= charMatchesList.get(i)) { - charMatchesList.set(i, subcharMatchesList.get(i)); + List subqueries = new ArrayList<>(queries); + // reset query so we don't search what was not matched + for(int i = 0; i < queries.size(); i++) { + if(!matched[i]) { + subqueries.set(i, null); } - } + } + readIndexedStringTable(instance, subqueries, key, listOffsets, matchedCharacters); } else { codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); }