From 3cb9ab9eff8c35c951a0ca3db3c7e995f09849ab Mon Sep 17 00:00:00 2001 From: Victor Shcherb Date: Mon, 1 Feb 2021 19:47:20 +0100 Subject: [PATCH] Try to separate how we read & search poi by exact multiword --- .../binary/BinaryMapAddressReaderAdapter.java | 5 +- .../osmand/binary/BinaryMapIndexReader.java | 147 ++++++------------ .../binary/BinaryMapPoiReaderAdapter.java | 120 ++++++-------- 3 files changed, 100 insertions(+), 172 deletions(-) diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java index 18f7b2549e..8722c96acb 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java @@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter { indexOffset = codedIS.getTotalBytesRead(); int oldLimit = codedIS.pushLimit(length); // here offsets are sorted by distance - map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0); + TIntArrayList charsList = new TIntArrayList(); + charsList.add(0); + map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList); codedIS.popLimit(oldLimit); break; case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER: diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java index 9fbf00ed4e..d00bbcd09f 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java @@ -2385,127 +2385,76 @@ public class BinaryMapIndexReader { } - int readIndexedStringTable(Collator instance, String query, String prefix, HashMap map, int charMatches) throws IOException { + void readIndexedStringTable(Collator instance, List queries, String prefix, List listOffsets, TIntArrayList charMatchesList) throws IOException { String key = null; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); switch (tag) { case 0: - return charMatches; + return; case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : key = codedIS.readString(); - if(prefix.length() > 0){ + if (prefix.length() > 0) { key = prefix + key; } - // check query is part of key (the best matching) - if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){ - if(query.length() >= charMatches){ - if(query.length() > charMatches){ - charMatches = query.length(); - map.clear(); + for (int i = 0; i < queries.size(); i++) { + int charMatches = charMatchesList.get(i); + if (charMatches < 0) { + continue; + } + String query = queries.get(i); + // check query is part of key (the best matching) + if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { + if (query.length() >= charMatches) { + if (query.length() > charMatches) { + charMatchesList.set(i, query.length()); + listOffsets.get(i).clear(); + } + } else { + charMatchesList.set(i, -1); + } + // check key is part of query + } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { + if (key.length() >= charMatches) { + if (key.length() > charMatches) { + charMatchesList.set(i, key.length()); + listOffsets.get(i).clear(); + } + } else { + charMatchesList.set(i, -1); } } else { - key = null; + charMatchesList.set(i, -1); } - // check key is part of query - } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) { - if (key.length() >= charMatches) { - if (key.length() > charMatches) { - charMatches = key.length(); - map.clear(); - } - } else { - key = null; - } - } else { - key = null; - } - break; - case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER: - int val = readInt(); - if (key != null) { - String[] words = query.split(" "); - String keyByWord = null; - for (String w : words) { - if (instance.equals(w.substring(0, charMatches), key)) { - keyByWord = w; - } - } - if (map.containsKey(keyByWord)) { - map.get(keyByWord).add(val); - } else { - TIntArrayList list = new TIntArrayList(); - list.add(val); - map.put(keyByWord, list); - } - } - break; - case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : - int len = codedIS.readRawVarint32(); - int oldLim = codedIS.pushLimit(len); - if (key != null) { - charMatches = readIndexedStringTable(instance, query, key, map, charMatches); - } else { - codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - } - codedIS.popLimit(oldLim); - break; - default: - skipUnknownField(t); - break; - } - } - } - - int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException { - String key = null; - while (true) { - int t = codedIS.readTag(); - int tag = WireFormat.getTagFieldNumber(t); - switch (tag) { - case 0: - return charMatches; - case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : - key = codedIS.readString(); - if(prefix.length() > 0){ - key = prefix + key; - } - // check query is part of key (the best matching) - if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)){ - if(query.length() >= charMatches){ - if(query.length() > charMatches){ - charMatches = query.length(); - list.clear(); - } - } else { - key = null; - } - // check key is part of query - } else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { - if (key.length() >= charMatches) { - if (key.length() > charMatches) { - charMatches = key.length(); - list.clear(); - } - } else { - key = null; - } - } else { - key = null; } break; case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER : int val = readInt(); - if (key != null) { - list.add(val); + for (int i = 0; i < queries.size(); i++) { + if (charMatchesList.get(i) >= 0) { + listOffsets.get(i).add(val); + } } break; case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : int len = codedIS.readRawVarint32(); int oldLim = codedIS.pushLimit(len); - if (key != null) { - charMatches = readIndexedStringTable(instance, query, key, list, charMatches); + boolean shouldWeReadSubtable = false; + for (int i = 0; i < queries.size(); i++) { + if (charMatchesList.get(i) >= 0) { + shouldWeReadSubtable = true; + } + } + if (shouldWeReadSubtable && key != null) { + TIntArrayList subcharMatchesList = new TIntArrayList(charMatchesList); + readIndexedStringTable(instance, queries, key, listOffsets, subcharMatchesList); + // looks like true + for (int i = 0; i < queries.size(); i++) { + if (subcharMatchesList.get(i) >= charMatchesList.get(i)) { + charMatchesList.set(i, subcharMatchesList.get(i)); + } + } } else { codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); } diff --git a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java index 190576454c..1b959a8cf9 100644 --- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java +++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java @@ -1,20 +1,21 @@ package net.osmand.binary; -import gnu.trove.list.array.TIntArrayList; -import gnu.trove.map.hash.TIntLongHashMap; -import gnu.trove.set.hash.TLongHashSet; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedList; import java.util.List; -import java.util.Map.Entry; +import org.apache.commons.logging.Log; + +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.WireFormat; + +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntLongHashMap; +import gnu.trove.set.hash.TLongHashSet; import net.osmand.Collator; import net.osmand.CollatorStringMatcher; import net.osmand.CollatorStringMatcher.StringMatcherMode; @@ -29,11 +30,6 @@ import net.osmand.osm.MapPoiTypes; import net.osmand.osm.PoiCategory; import net.osmand.util.MapUtils; -import org.apache.commons.logging.Log; - -import com.google.protobuf.CodedInputStream; -import com.google.protobuf.WireFormat; - public class BinaryMapPoiReaderAdapter { private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class); @@ -41,7 +37,7 @@ public class BinaryMapPoiReaderAdapter { private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1; private static final int ZOOM_TO_SKIP_FILTER_READ = 6; private static final int ZOOM_TO_SKIP_FILTER = 3; - private static final int BUCKET_SEARCH_BY_NAME = 5; + private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100? public static class PoiSubType { public boolean text; @@ -296,7 +292,6 @@ public class BinaryMapPoiReaderAdapter { } protected void searchPoiByName(PoiRegion region, SearchRequest req) throws IOException { - HashMap offsetsMap = new HashMap<>(); TIntLongHashMap offsets = new TIntLongHashMap(); String query = normalizeSearchPoiByNameQuery(req.nameQuery); CollatorStringMatcher matcher = new CollatorStringMatcher(query, @@ -316,48 +311,18 @@ public class BinaryMapPoiReaderAdapter { int length = readInt(); int oldLimit = codedIS.pushLimit(length); // here offsets are sorted by distance - offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req); + offsets = readPoiNameIndex(matcher.getCollator(), query, req); codedIS.popLimit(oldLimit); break; case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER: // also offsets can be randomly skipped by limit Integer[] offKeys = new Integer[offsets.size()]; - if (offsetsMap.size() > 0) { - List> setKeys = new ArrayList<>(); - for (Entry item : offsetsMap.entrySet()) { - TIntLongHashMap sets = item.getValue(); - - Integer[] offKeysFinal = new Integer[sets.size()]; - int[] keys = sets.keys(); - for (int i = 0; i < keys.length; i++) { - offKeysFinal[i] = keys[i]; - } - offsets.putAll(sets); - HashSet generalSet = new HashSet(Arrays.asList(offKeysFinal)); - setKeys.add(generalSet); - } - HashSet firstSet = new HashSet(); - HashSet secondSet = new HashSet(); - HashSet finalSet = new HashSet(); - for (HashSet keySet : setKeys) { - if (setKeys.size() == 1) { - finalSet.addAll(keySet); - } else { - if (firstSet.size() == 0) { - firstSet.addAll(keySet); - } else { - secondSet.addAll(firstSet); - secondSet.retainAll(keySet); - finalSet.addAll(secondSet); - } - } + if (offsets.size() > 0) { + int[] keys = offsets.keys(); + for (int i = 0; i < keys.length; i++) { + offKeys[i] = keys[i]; } final TIntLongHashMap foffsets = offsets; - offKeys = finalSet.toArray(new Integer[finalSet.size()]); - for (Integer key : offKeys) { - foffsets.put(key, offsets.get(key)); - } - Arrays.sort(offKeys, new Comparator() { @Override public int compare(Integer object1, Integer object2) { @@ -401,54 +366,65 @@ public class BinaryMapPoiReaderAdapter { } } - private HashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { - HashMap dataOffsetsMap = null; - HashMap offsetsMap = new HashMap<>(); + private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest req) throws IOException { + TIntLongHashMap offsets = new TIntLongHashMap(); + List listOffsets = null; + List listOfSepOffsets = new ArrayList(); int offset = 0; while (true) { int t = codedIS.readTag(); int tag = WireFormat.getTagFieldNumber(t); switch (tag) { case 0: - return offsetsMap; + return offsets; case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: { int length = readInt(); int oldLimit = codedIS.pushLimit(length); - dataOffsetsMap = new HashMap<>(); offset = codedIS.getTotalBytesRead(); - map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0); + List queries = Arrays.asList(query.split(" ")); + TIntArrayList charsList = new TIntArrayList(queries.size()); + listOffsets = new ArrayList(queries.size()); + while(listOffsets.size() < queries.size()) { + charsList.add(0); + listOffsets.add(new TIntArrayList()); + } + map.readIndexedStringTable(instance, queries, "", listOffsets, charsList); codedIS.popLimit(oldLimit); break; } case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: { - offsetsMap = new HashMap<>(); - if (dataOffsetsMap != null) { - for (Entry item : dataOffsetsMap.entrySet()) { - TIntLongHashMap offsets = new TIntLongHashMap(); - TIntArrayList dataOffsets = item.getValue(); - String word = item.getKey(); + if (listOffsets != null) { + for (TIntArrayList dataOffsets : listOffsets) { + TIntLongHashMap offsetMap = new TIntLongHashMap(); + listOfSepOffsets.add(offsetMap); dataOffsets.sort(); // 1104125 for (int i = 0; i < dataOffsets.size(); i++) { codedIS.seek(dataOffsets.get(i) + offset); int len = codedIS.readRawVarint32(); int oldLim = codedIS.pushLimit(len); - readPoiNameIndexData(offsets, req); + readPoiNameIndexData(offsetMap, req); codedIS.popLimit(oldLim); - - if (offsetsMap.containsKey(word)) { - offsetsMap.get(word).putAll(offsets); - } else { - TIntLongHashMap map = new TIntLongHashMap(); - map.putAll(offsets); - offsetsMap.put(word, map); + if (req.isCancelled()) { + codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); + return offsets; + } + } + } + } + if (listOfSepOffsets.size() > 0) { + offsets.putAll(listOfSepOffsets.get(0)); + for (int j = 1; j < listOfSepOffsets.size(); j++) { + TIntLongHashMap mp = listOfSepOffsets.get(j); + // offsets.retainAll(mp); -- calculate intresection of mp & offsets + for (int chKey : offsets.keys()) { + if (!mp.containsKey(chKey)) { + offsets.remove(chKey); } } } - codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsetsMap; } codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); - return offsetsMap; + return offsets; } default: skipUnknownField(t);