Try to separate how we read & search poi by exact multiword

2021-02-01 19:47:20 +01:00 · 2021-02-01 19:47:20 +01:00 · 3cb9ab9eff
commit 3cb9ab9eff
parent 016320795c
3 changed files with 100 additions and 172 deletions
--- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java
+++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapAddressReaderAdapter.java
@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;

@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter {
 				indexOffset = codedIS.getTotalBytesRead();
 				int oldLimit = codedIS.pushLimit(length);
 				// here offsets are sorted by distance
-				map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0);
+				TIntArrayList charsList = new TIntArrayList();
+				charsList.add(0);
+				map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList);
 				codedIS.popLimit(oldLimit);
 				break;
 			case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER:
--- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java
+++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapIndexReader.java
@ -2385,127 +2385,76 @@ public class BinaryMapIndexReader {

 	}

-	int readIndexedStringTable(Collator instance, String query, String prefix, HashMap<String, TIntArrayList> map, int charMatches) throws IOException {
+	void readIndexedStringTable(Collator instance, List<String> queries, String prefix, List<TIntArrayList> listOffsets, TIntArrayList charMatchesList) throws IOException {
 		String key = null;
 		while (true) {
 			int t = codedIS.readTag();
 			int tag = WireFormat.getTagFieldNumber(t);
 			switch (tag) {
 			case 0:
-				return charMatches;
+				return;
 			case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
 				key = codedIS.readString();
-				if(prefix.length() > 0){
+				if (prefix.length() > 0) {
 					key = prefix + key;
 				}
-				// check query is part of key (the best matching)
-				if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){
-					if(query.length() >= charMatches){
-						if(query.length() > charMatches){
-							charMatches = query.length();
-							map.clear();
+				for (int i = 0; i < queries.size(); i++) {
+					int charMatches = charMatchesList.get(i);
+					if (charMatches < 0) {
+						continue;
+					}
+					String query = queries.get(i);
+					// check query is part of key (the best matching)
+					if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
+						if (query.length() >= charMatches) {
+							if (query.length() > charMatches) {
+								charMatchesList.set(i, query.length());
+								listOffsets.get(i).clear();
+							}
+						} else {
+							charMatchesList.set(i, -1);
+						}
+						// check key is part of query
+					} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
+						if (key.length() >= charMatches) {
+							if (key.length() > charMatches) {
+								charMatchesList.set(i, key.length());
+								listOffsets.get(i).clear();
+							}
+						} else {
+							charMatchesList.set(i, -1);
 						}
 					} else {
-						key = null;
+						charMatchesList.set(i, -1);
 					}
-					// check key is part of query
-				} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) {
-					if (key.length() >= charMatches) {
-						if (key.length() > charMatches) {
-							charMatches = key.length();
-							map.clear();
-						}
-					} else {
-						key = null;
-					}
-				} else {
-					key = null;
-				}
-				break;
-			case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER:
-				int val = readInt();
-				if (key != null) {
-					String[] words = query.split(" ");
-					String keyByWord = null;
-					for (String w : words) {
-						if (instance.equals(w.substring(0, charMatches), key)) {
-							keyByWord = w;
-						}
-					}
-					if (map.containsKey(keyByWord)) {
-						map.get(keyByWord).add(val);
-					} else {
-						TIntArrayList list = new TIntArrayList();
-						list.add(val);
-						map.put(keyByWord, list);
-					}
-				}
-				break;
-			case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
-				int len = codedIS.readRawVarint32();
-				int oldLim = codedIS.pushLimit(len);
-				if (key != null) {
-					charMatches = readIndexedStringTable(instance, query, key, map, charMatches);
-				} else {
-					codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
-				}
-				codedIS.popLimit(oldLim);
-				break;
-			default:
-				skipUnknownField(t);
-				break;
-			}
-		}
-	}
-	
-	int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException {
-		String key = null;
-		while (true) {
-			int t = codedIS.readTag();
-			int tag = WireFormat.getTagFieldNumber(t);
-			switch (tag) {
-			case 0:
-				return charMatches;
-			case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
-				key = codedIS.readString();
-				if(prefix.length() > 0){
-					key = prefix + key;
-				}
-				// check query is part of key (the best matching)
-				if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)){
-					if(query.length() >= charMatches){
-						if(query.length() > charMatches){
-							charMatches = query.length();
-							list.clear();
-						}
-					} else {
-						key = null;
-					}
-					// check key is part of query
-				} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
-					if (key.length() >= charMatches) {
-						if (key.length() > charMatches) {
-							charMatches = key.length();
-							list.clear();
-						}
-					} else {
-						key = null;
-					}
-				} else {
-					key = null;
 				}
 				break;
 			case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER :
 				int val = readInt();
-				if (key != null) {
-					list.add(val);
+				for (int i = 0; i < queries.size(); i++) {
+					if (charMatchesList.get(i) >= 0) {
+						listOffsets.get(i).add(val);
+					}
 				}
 				break;
 			case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
 				int len = codedIS.readRawVarint32();
 				int oldLim = codedIS.pushLimit(len);
-				if (key != null) {
-					charMatches = readIndexedStringTable(instance, query, key, list, charMatches);
+				boolean shouldWeReadSubtable = false;
+				for (int i = 0; i < queries.size(); i++) {
+					if (charMatchesList.get(i) >= 0) {
+						shouldWeReadSubtable = true;
+					}
+				}
+				if (shouldWeReadSubtable && key != null) {
+					TIntArrayList subcharMatchesList = new TIntArrayList(charMatchesList);
+					readIndexedStringTable(instance, queries, key, listOffsets, subcharMatchesList);
+					// looks like true
+					for (int i = 0; i < queries.size(); i++) {
+						if (subcharMatchesList.get(i) >= charMatchesList.get(i)) {
+							charMatchesList.set(i, subcharMatchesList.get(i));
+						}
+					}	
 				} else {
 					codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
 				}
--- a/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java
+++ b/OsmAnd-java/src/main/java/net/osmand/binary/BinaryMapPoiReaderAdapter.java
@ -1,20 +1,21 @@
 package net.osmand.binary;


-import gnu.trove.list.array.TIntArrayList;
-import gnu.trove.map.hash.TIntLongHashMap;
-import gnu.trove.set.hash.TLongHashSet;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map.Entry;

+import org.apache.commons.logging.Log;
+
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.WireFormat;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntLongHashMap;
+import gnu.trove.set.hash.TLongHashSet;
 import net.osmand.Collator;
 import net.osmand.CollatorStringMatcher;
 import net.osmand.CollatorStringMatcher.StringMatcherMode;
@ -29,11 +30,6 @@ import net.osmand.osm.MapPoiTypes;
 import net.osmand.osm.PoiCategory;
 import net.osmand.util.MapUtils;

-import org.apache.commons.logging.Log;
-
-import com.google.protobuf.CodedInputStream;
-import com.google.protobuf.WireFormat;
-
 public class BinaryMapPoiReaderAdapter {
 	private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class);

@ -41,7 +37,7 @@ public class BinaryMapPoiReaderAdapter {
 	private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1;
 	private static final int ZOOM_TO_SKIP_FILTER_READ = 6;
 	private static final int ZOOM_TO_SKIP_FILTER = 3;
-	private static final int BUCKET_SEARCH_BY_NAME = 5;
+	private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100? 

 	public static class PoiSubType {
 		public boolean text;
@ -296,7 +292,6 @@ public class BinaryMapPoiReaderAdapter {
 	}

 	protected void searchPoiByName(PoiRegion region, SearchRequest<Amenity> req) throws IOException {
-		HashMap<String, TIntLongHashMap> offsetsMap = new HashMap<>();
 		TIntLongHashMap offsets = new TIntLongHashMap();
 		String query = normalizeSearchPoiByNameQuery(req.nameQuery);
 		CollatorStringMatcher matcher = new CollatorStringMatcher(query,
@ -316,48 +311,18 @@ public class BinaryMapPoiReaderAdapter {
 				int length = readInt();
 				int oldLimit = codedIS.pushLimit(length);
 				// here offsets are sorted by distance
-				offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req);
+				offsets = readPoiNameIndex(matcher.getCollator(), query, req);
 				codedIS.popLimit(oldLimit);
 				break;
 			case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER:
 				// also offsets can be randomly skipped by limit
 				Integer[] offKeys = new Integer[offsets.size()];
-				if (offsetsMap.size() > 0) {
-					List<HashSet<Integer>> setKeys = new ArrayList<>();
-					for (Entry<String, TIntLongHashMap> item : offsetsMap.entrySet()) {
-						TIntLongHashMap sets = item.getValue();
-
-						Integer[] offKeysFinal = new Integer[sets.size()];
-						int[] keys = sets.keys();
-						for (int i = 0; i < keys.length; i++) {
-							offKeysFinal[i] = keys[i];
-						}
-						offsets.putAll(sets);
-						HashSet<Integer> generalSet = new HashSet<Integer>(Arrays.asList(offKeysFinal));
-						setKeys.add(generalSet);
-					}
-					HashSet<Integer> firstSet = new HashSet<Integer>();
-					HashSet<Integer> secondSet = new HashSet<Integer>();
-					HashSet<Integer> finalSet = new HashSet<Integer>();
-					for (HashSet<Integer> keySet : setKeys) {
-						if (setKeys.size() == 1) {
-							finalSet.addAll(keySet);
-						} else {
-							if (firstSet.size() == 0) {
-								firstSet.addAll(keySet);
-							} else {
-								secondSet.addAll(firstSet);
-								secondSet.retainAll(keySet);
-								finalSet.addAll(secondSet);
-							}
-						}
+				if (offsets.size() > 0) {
+					int[] keys = offsets.keys();
+					for (int i = 0; i < keys.length; i++) {
+						offKeys[i] = keys[i];
 					}
 					final TIntLongHashMap foffsets = offsets;
-					offKeys = finalSet.toArray(new Integer[finalSet.size()]);
-					for (Integer key : offKeys) {
-						foffsets.put(key, offsets.get(key));
-					}
-
 					Arrays.sort(offKeys, new Comparator<Integer>() {
 						@Override
 						public int compare(Integer object1, Integer object2) {
@ -401,54 +366,65 @@ public class BinaryMapPoiReaderAdapter {
 		}
 	}

-	private HashMap<String,TIntLongHashMap> readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
-		HashMap<String, TIntArrayList> dataOffsetsMap = null;
-		HashMap<String, TIntLongHashMap> offsetsMap = new HashMap<>();
+	private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
+		TIntLongHashMap offsets = new TIntLongHashMap();
+		List<TIntArrayList> listOffsets = null;
+		List<TIntLongHashMap> listOfSepOffsets = new ArrayList<TIntLongHashMap>();
 		int offset = 0;
 		while (true) {
 			int t = codedIS.readTag();
 			int tag = WireFormat.getTagFieldNumber(t);
 			switch (tag) {
 			case 0:
-				return offsetsMap;
+				return offsets;
 			case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: {
 				int length = readInt();
 				int oldLimit = codedIS.pushLimit(length);
-				dataOffsetsMap = new HashMap<>();
 				offset = codedIS.getTotalBytesRead();
-				map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0);
+				List<String> queries = Arrays.asList(query.split(" "));
+				TIntArrayList charsList = new TIntArrayList(queries.size());
+				listOffsets = new ArrayList<TIntArrayList>(queries.size());
+				while(listOffsets.size() < queries.size()) {
+					charsList.add(0);
+					listOffsets.add(new TIntArrayList());
+				}
+				map.readIndexedStringTable(instance, queries, "", listOffsets, charsList);
 				codedIS.popLimit(oldLimit);
 				break;
 			}
 			case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: {
-				offsetsMap = new HashMap<>();
-				if (dataOffsetsMap != null) {
-					for (Entry<String, TIntArrayList> item : dataOffsetsMap.entrySet()) {
-						TIntLongHashMap offsets = new TIntLongHashMap();
-						TIntArrayList dataOffsets = item.getValue();
-						String word = item.getKey();
+				if (listOffsets != null) {
+					for (TIntArrayList dataOffsets : listOffsets) {
+						TIntLongHashMap offsetMap = new TIntLongHashMap();
+						listOfSepOffsets.add(offsetMap);
 						dataOffsets.sort(); // 1104125
 						for (int i = 0; i < dataOffsets.size(); i++) {
 							codedIS.seek(dataOffsets.get(i) + offset);
 							int len = codedIS.readRawVarint32();
 							int oldLim = codedIS.pushLimit(len);
-							readPoiNameIndexData(offsets, req);
+							readPoiNameIndexData(offsetMap, req);
 							codedIS.popLimit(oldLim);
-
-							if (offsetsMap.containsKey(word)) {
-								offsetsMap.get(word).putAll(offsets);
-							} else {
-								TIntLongHashMap map = new TIntLongHashMap();
-								map.putAll(offsets);
-								offsetsMap.put(word, map);
+							if (req.isCancelled()) {
+								codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
+								return offsets;
+							}
+						}
+					}
+				}
+				if (listOfSepOffsets.size() > 0) {
+					offsets.putAll(listOfSepOffsets.get(0));
+					for (int j = 1; j < listOfSepOffsets.size(); j++) {
+						TIntLongHashMap mp = listOfSepOffsets.get(j);
+						// offsets.retainAll(mp); -- calculate intresection of mp & offsets
+						for (int chKey : offsets.keys()) {
+							if (!mp.containsKey(chKey)) {
+								offsets.remove(chKey);
 							}
 						}
 					}
-					codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
-					return offsetsMap;
 				}
 				codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
-				return offsetsMap;
+				return offsets;
 			}
 			default:
 				skipUnknownField(t);