Try to separate how we read & search poi by exact multiword

2021-02-01 19:47:20 +01:00 · 2021-02-01 19:47:20 +01:00 · 3cb9ab9eff
commit 3cb9ab9eff
parent 016320795c
3 changed files with 100 additions and 172 deletions
--- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java
+++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java
@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter {
 				indexOffset = codedIS.getTotalBytesRead();
 				int oldLimit = codedIS.pushLimit(length);
 				// here offsets are sorted by distance
-				map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0);
+				TIntArrayList charsList = new TIntArrayList();
 				charsList.add(0);
 				map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList);
 				codedIS.popLimit(oldLimit);
 				break;
 			case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER:
--- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java
+++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java
@ -2385,127 +2385,76 @@ public class BinaryMapIndexReader {
 	}
-	int readIndexedStringTable(Collator instance, String query, String prefix, HashMap<String, TIntArrayList> map, int charMatches) throws IOException {
+	void readIndexedStringTable(Collator instance, List<String> queries, String prefix, List<TIntArrayList> listOffsets, TIntArrayList charMatchesList) throws IOException {
 		String key = null;
 		while (true) {
 			int t = codedIS.readTag();
 			int tag = WireFormat.getTagFieldNumber(t);
 			switch (tag) {
 			case 0:
-				return charMatches;
+				return;
 			case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
 				key = codedIS.readString();
-				if(prefix.length() > 0){
+				if (prefix.length() > 0) {
 					key = prefix + key;
 				}
 				for (int i = 0; i < queries.size(); i++) {
 					int charMatches = charMatchesList.get(i);
 					if (charMatches < 0) {
 						continue;
 					}
 					String query = queries.get(i);
 					// check query is part of key (the best matching)
-				if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){
+					if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
-					if(query.length() >= charMatches){
+						if (query.length() >= charMatches) {
-						if(query.length() > charMatches){
+							if (query.length() > charMatches) {
-							charMatches = query.length();
+								charMatchesList.set(i, query.length());
-							map.clear();
+								listOffsets.get(i).clear();
 							}
 						} else {
-						key = null;
+							charMatchesList.set(i, -1);
 					}
 					// check key is part of query
 				} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) {
 					if (key.length() >= charMatches) {
 						if (key.length() > charMatches) {
 							charMatches = key.length();
 							map.clear();
 						}
 					} else {
 						key = null;
 					}
 				} else {
 					key = null;
 				}
 				break;
 			case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER:
 				int val = readInt();
 				if (key != null) {
 					String[] words = query.split(" ");
 					String keyByWord = null;
 					for (String w : words) {
 						if (instance.equals(w.substring(0, charMatches), key)) {
 							keyByWord = w;
 						}
 					}
 					if (map.containsKey(keyByWord)) {
 						map.get(keyByWord).add(val);
 					} else {
 						TIntArrayList list = new TIntArrayList();
 						list.add(val);
 						map.put(keyByWord, list);
 					}
 				}
 				break;
 			case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
 				int len = codedIS.readRawVarint32();
 				int oldLim = codedIS.pushLimit(len);
 				if (key != null) {
 					charMatches = readIndexedStringTable(instance, query, key, map, charMatches);
 				} else {
 					codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
 				}
 				codedIS.popLimit(oldLim);
 				break;
 			default:
 				skipUnknownField(t);
 				break;
 			}
 		}
 	}
 	int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException {
 		String key = null;
 		while (true) {
 			int t = codedIS.readTag();
 			int tag = WireFormat.getTagFieldNumber(t);
 			switch (tag) {
 			case 0:
 				return charMatches;
 			case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
 				key = codedIS.readString();
 				if(prefix.length() > 0){
 					key = prefix + key;
 				}
 				// check query is part of key (the best matching)
 				if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)){
 					if(query.length() >= charMatches){
 						if(query.length() > charMatches){
 							charMatches = query.length();
 							list.clear();
 						}
 					} else {
 						key = null;
 						}
 						// check key is part of query
 					} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
 						if (key.length() >= charMatches) {
 							if (key.length() > charMatches) {
-							charMatches = key.length();
+								charMatchesList.set(i, key.length());
-							list.clear();
+								listOffsets.get(i).clear();
 							}
 						} else {
-						key = null;
+							charMatchesList.set(i, -1);
 						}
 					} else {
-					key = null;
+						charMatchesList.set(i, -1);
 					}
 				}
 				break;
 			case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER :
 				int val = readInt();
-				if (key != null) {
+				for (int i = 0; i < queries.size(); i++) {
-					list.add(val);
+					if (charMatchesList.get(i) >= 0) {
 						listOffsets.get(i).add(val);
 					}
 				}
 				break;
 			case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
 				int len = codedIS.readRawVarint32();
 				int oldLim = codedIS.pushLimit(len);
-				if (key != null) {
+				boolean shouldWeReadSubtable = false;
-					charMatches = readIndexedStringTable(instance, query, key, list, charMatches);
+				for (int i = 0; i < queries.size(); i++) {
 					if (charMatchesList.get(i) >= 0) {
 						shouldWeReadSubtable = true;
 					}
 				}
 				if (shouldWeReadSubtable && key != null) {
 					TIntArrayList subcharMatchesList = new TIntArrayList(charMatchesList);
 					readIndexedStringTable(instance, queries, key, listOffsets, subcharMatchesList);
 					// looks like true
 					for (int i = 0; i < queries.size(); i++) {
 						if (subcharMatchesList.get(i) >= charMatchesList.get(i)) {
 							charMatchesList.set(i, subcharMatchesList.get(i));
 						}
 					}	
 				} else {
 					codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
 				}
--- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java
+++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java
@ -1,20 +1,21 @@
 package net.osmand.binary;
 import gnu.trove.list.array.TIntArrayList;
 import gnu.trove.map.hash.TIntLongHashMap;
 import gnu.trove.set.hash.TLongHashSet;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map.Entry;
 import org.apache.commons.logging.Log;
 import com.google.protobuf.CodedInputStream;
 import com.google.protobuf.WireFormat;
 import gnu.trove.list.array.TIntArrayList;
 import gnu.trove.map.hash.TIntLongHashMap;
 import gnu.trove.set.hash.TLongHashSet;
 import net.osmand.Collator;
 import net.osmand.CollatorStringMatcher;
 import net.osmand.CollatorStringMatcher.StringMatcherMode;
@ -29,11 +30,6 @@ import net.osmand.osm.MapPoiTypes;
 import net.osmand.osm.PoiCategory;
 import net.osmand.util.MapUtils;
 import org.apache.commons.logging.Log;
 import com.google.protobuf.CodedInputStream;
 import com.google.protobuf.WireFormat;
 public class BinaryMapPoiReaderAdapter {
 	private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class);
@ -41,7 +37,7 @@ public class BinaryMapPoiReaderAdapter {
 	private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1;
 	private static final int ZOOM_TO_SKIP_FILTER_READ = 6;
 	private static final int ZOOM_TO_SKIP_FILTER = 3;
-	private static final int BUCKET_SEARCH_BY_NAME = 5;
+	private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100? 
 	public static class PoiSubType {
 		public boolean text;
@ -296,7 +292,6 @@ public class BinaryMapPoiReaderAdapter {
 	}
 	protected void searchPoiByName(PoiRegion region, SearchRequest<Amenity> req) throws IOException {
 		HashMap<String, TIntLongHashMap> offsetsMap = new HashMap<>();
 		TIntLongHashMap offsets = new TIntLongHashMap();
 		String query = normalizeSearchPoiByNameQuery(req.nameQuery);
 		CollatorStringMatcher matcher = new CollatorStringMatcher(query,
@ -316,48 +311,18 @@ public class BinaryMapPoiReaderAdapter {
 				int length = readInt();
 				int oldLimit = codedIS.pushLimit(length);
 				// here offsets are sorted by distance
-				offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req);
+				offsets = readPoiNameIndex(matcher.getCollator(), query, req);
 				codedIS.popLimit(oldLimit);
 				break;
 			case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER:
 				// also offsets can be randomly skipped by limit
 				Integer[] offKeys = new Integer[offsets.size()];
-				if (offsetsMap.size() > 0) {
+				if (offsets.size() > 0) {
-					List<HashSet<Integer>> setKeys = new ArrayList<>();
+					int[] keys = offsets.keys();
 					for (Entry<String, TIntLongHashMap> item : offsetsMap.entrySet()) {
 						TIntLongHashMap sets = item.getValue();
 						Integer[] offKeysFinal = new Integer[sets.size()];
 						int[] keys = sets.keys();
 					for (int i = 0; i < keys.length; i++) {
-							offKeysFinal[i] = keys[i];
+						offKeys[i] = keys[i];
 						}
 						offsets.putAll(sets);
 						HashSet<Integer> generalSet = new HashSet<Integer>(Arrays.asList(offKeysFinal));
 						setKeys.add(generalSet);
 					}
 					HashSet<Integer> firstSet = new HashSet<Integer>();
 					HashSet<Integer> secondSet = new HashSet<Integer>();
 					HashSet<Integer> finalSet = new HashSet<Integer>();
 					for (HashSet<Integer> keySet : setKeys) {
 						if (setKeys.size() == 1) {
 							finalSet.addAll(keySet);
 						} else {
 							if (firstSet.size() == 0) {
 								firstSet.addAll(keySet);
 							} else {
 								secondSet.addAll(firstSet);
 								secondSet.retainAll(keySet);
 								finalSet.addAll(secondSet);
 							}
 						}
 					}
 					final TIntLongHashMap foffsets = offsets;
 					offKeys = finalSet.toArray(new Integer[finalSet.size()]);
 					for (Integer key : offKeys) {
 						foffsets.put(key, offsets.get(key));
 					}
 					Arrays.sort(offKeys, new Comparator<Integer>() {
 						@Override
 						public int compare(Integer object1, Integer object2) {
@ -401,54 +366,65 @@ public class BinaryMapPoiReaderAdapter {
 		}
 	}
-	private HashMap<String,TIntLongHashMap> readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
+	private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
-		HashMap<String, TIntArrayList> dataOffsetsMap = null;
+		TIntLongHashMap offsets = new TIntLongHashMap();
-		HashMap<String, TIntLongHashMap> offsetsMap = new HashMap<>();
+		List<TIntArrayList> listOffsets = null;
 		List<TIntLongHashMap> listOfSepOffsets = new ArrayList<TIntLongHashMap>();
 		int offset = 0;
 		while (true) {
 			int t = codedIS.readTag();
 			int tag = WireFormat.getTagFieldNumber(t);
 			switch (tag) {
 			case 0:
-				return offsetsMap;
+				return offsets;
 			case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: {
 				int length = readInt();
 				int oldLimit = codedIS.pushLimit(length);
 				dataOffsetsMap = new HashMap<>();
 				offset = codedIS.getTotalBytesRead();
-				map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0);
+				List<String> queries = Arrays.asList(query.split(" "));
 				TIntArrayList charsList = new TIntArrayList(queries.size());
 				listOffsets = new ArrayList<TIntArrayList>(queries.size());
 				while(listOffsets.size() < queries.size()) {
 					charsList.add(0);
 					listOffsets.add(new TIntArrayList());
 				}
 				map.readIndexedStringTable(instance, queries, "", listOffsets, charsList);
 				codedIS.popLimit(oldLimit);
 				break;
 			}
 			case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: {
-				offsetsMap = new HashMap<>();
+				if (listOffsets != null) {
-				if (dataOffsetsMap != null) {
+					for (TIntArrayList dataOffsets : listOffsets) {
-					for (Entry<String, TIntArrayList> item : dataOffsetsMap.entrySet()) {
+						TIntLongHashMap offsetMap = new TIntLongHashMap();
-						TIntLongHashMap offsets = new TIntLongHashMap();
+						listOfSepOffsets.add(offsetMap);
 						TIntArrayList dataOffsets = item.getValue();
 						String word = item.getKey();
 						dataOffsets.sort(); // 1104125
 						for (int i = 0; i < dataOffsets.size(); i++) {
 							codedIS.seek(dataOffsets.get(i) + offset);
 							int len = codedIS.readRawVarint32();
 							int oldLim = codedIS.pushLimit(len);
-							readPoiNameIndexData(offsets, req);
+							readPoiNameIndexData(offsetMap, req);
 							codedIS.popLimit(oldLim);
-
+							if (req.isCancelled()) {
-							if (offsetsMap.containsKey(word)) {
+								codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
-								offsetsMap.get(word).putAll(offsets);
+								return offsets;
-							} else {
+							}
-								TIntLongHashMap map = new TIntLongHashMap();
+						}
-								map.putAll(offsets);
+					}
-								offsetsMap.put(word, map);
+				}
 				if (listOfSepOffsets.size() > 0) {
 					offsets.putAll(listOfSepOffsets.get(0));
 					for (int j = 1; j < listOfSepOffsets.size(); j++) {
 						TIntLongHashMap mp = listOfSepOffsets.get(j);
 						// offsets.retainAll(mp); -- calculate intresection of mp & offsets
 						for (int chKey : offsets.keys()) {
 							if (!mp.containsKey(chKey)) {
 								offsets.remove(chKey);
 							}
 						}
 					}
 				}
 				codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
-					return offsetsMap;
+				return offsets;
 				}
 				codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
 				return offsetsMap;
 			}
 			default:
 				skipUnknownField(t);