Try to separate how we read & search poi by exact multiword
This commit is contained in:
parent
016320795c
commit
3cb9ab9eff
3 changed files with 100 additions and 172 deletions
|
@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter {
|
|||
indexOffset = codedIS.getTotalBytesRead();
|
||||
int oldLimit = codedIS.pushLimit(length);
|
||||
// here offsets are sorted by distance
|
||||
map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0);
|
||||
TIntArrayList charsList = new TIntArrayList();
|
||||
charsList.add(0);
|
||||
map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList);
|
||||
codedIS.popLimit(oldLimit);
|
||||
break;
|
||||
case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER:
|
||||
|
|
|
@ -2385,127 +2385,76 @@ public class BinaryMapIndexReader {
|
|||
|
||||
}
|
||||
|
||||
int readIndexedStringTable(Collator instance, String query, String prefix, HashMap<String, TIntArrayList> map, int charMatches) throws IOException {
|
||||
void readIndexedStringTable(Collator instance, List<String> queries, String prefix, List<TIntArrayList> listOffsets, TIntArrayList charMatchesList) throws IOException {
|
||||
String key = null;
|
||||
while (true) {
|
||||
int t = codedIS.readTag();
|
||||
int tag = WireFormat.getTagFieldNumber(t);
|
||||
switch (tag) {
|
||||
case 0:
|
||||
return charMatches;
|
||||
return;
|
||||
case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
|
||||
key = codedIS.readString();
|
||||
if (prefix.length() > 0) {
|
||||
key = prefix + key;
|
||||
}
|
||||
// check query is part of key (the best matching)
|
||||
if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_STARTS_FROM_SPACE)){
|
||||
if(query.length() >= charMatches){
|
||||
if(query.length() > charMatches){
|
||||
charMatches = query.length();
|
||||
map.clear();
|
||||
}
|
||||
} else {
|
||||
key = null;
|
||||
}
|
||||
// check key is part of query
|
||||
} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_STARTS_FROM_SPACE)) {
|
||||
if (key.length() >= charMatches) {
|
||||
if (key.length() > charMatches) {
|
||||
charMatches = key.length();
|
||||
map.clear();
|
||||
}
|
||||
} else {
|
||||
key = null;
|
||||
}
|
||||
} else {
|
||||
key = null;
|
||||
}
|
||||
break;
|
||||
case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER:
|
||||
int val = readInt();
|
||||
if (key != null) {
|
||||
String[] words = query.split(" ");
|
||||
String keyByWord = null;
|
||||
for (String w : words) {
|
||||
if (instance.equals(w.substring(0, charMatches), key)) {
|
||||
keyByWord = w;
|
||||
}
|
||||
}
|
||||
if (map.containsKey(keyByWord)) {
|
||||
map.get(keyByWord).add(val);
|
||||
} else {
|
||||
TIntArrayList list = new TIntArrayList();
|
||||
list.add(val);
|
||||
map.put(keyByWord, list);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
|
||||
int len = codedIS.readRawVarint32();
|
||||
int oldLim = codedIS.pushLimit(len);
|
||||
if (key != null) {
|
||||
charMatches = readIndexedStringTable(instance, query, key, map, charMatches);
|
||||
} else {
|
||||
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
|
||||
}
|
||||
codedIS.popLimit(oldLim);
|
||||
break;
|
||||
default:
|
||||
skipUnknownField(t);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException {
|
||||
String key = null;
|
||||
while (true) {
|
||||
int t = codedIS.readTag();
|
||||
int tag = WireFormat.getTagFieldNumber(t);
|
||||
switch (tag) {
|
||||
case 0:
|
||||
return charMatches;
|
||||
case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
|
||||
key = codedIS.readString();
|
||||
if(prefix.length() > 0){
|
||||
key = prefix + key;
|
||||
for (int i = 0; i < queries.size(); i++) {
|
||||
int charMatches = charMatchesList.get(i);
|
||||
if (charMatches < 0) {
|
||||
continue;
|
||||
}
|
||||
String query = queries.get(i);
|
||||
// check query is part of key (the best matching)
|
||||
if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
|
||||
if (query.length() >= charMatches) {
|
||||
if (query.length() > charMatches) {
|
||||
charMatches = query.length();
|
||||
list.clear();
|
||||
charMatchesList.set(i, query.length());
|
||||
listOffsets.get(i).clear();
|
||||
}
|
||||
} else {
|
||||
key = null;
|
||||
charMatchesList.set(i, -1);
|
||||
}
|
||||
// check key is part of query
|
||||
} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
|
||||
if (key.length() >= charMatches) {
|
||||
if (key.length() > charMatches) {
|
||||
charMatches = key.length();
|
||||
list.clear();
|
||||
charMatchesList.set(i, key.length());
|
||||
listOffsets.get(i).clear();
|
||||
}
|
||||
} else {
|
||||
key = null;
|
||||
charMatchesList.set(i, -1);
|
||||
}
|
||||
} else {
|
||||
key = null;
|
||||
charMatchesList.set(i, -1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER :
|
||||
int val = readInt();
|
||||
if (key != null) {
|
||||
list.add(val);
|
||||
for (int i = 0; i < queries.size(); i++) {
|
||||
if (charMatchesList.get(i) >= 0) {
|
||||
listOffsets.get(i).add(val);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
|
||||
int len = codedIS.readRawVarint32();
|
||||
int oldLim = codedIS.pushLimit(len);
|
||||
if (key != null) {
|
||||
charMatches = readIndexedStringTable(instance, query, key, list, charMatches);
|
||||
boolean shouldWeReadSubtable = false;
|
||||
for (int i = 0; i < queries.size(); i++) {
|
||||
if (charMatchesList.get(i) >= 0) {
|
||||
shouldWeReadSubtable = true;
|
||||
}
|
||||
}
|
||||
if (shouldWeReadSubtable && key != null) {
|
||||
TIntArrayList subcharMatchesList = new TIntArrayList(charMatchesList);
|
||||
readIndexedStringTable(instance, queries, key, listOffsets, subcharMatchesList);
|
||||
// looks like true
|
||||
for (int i = 0; i < queries.size(); i++) {
|
||||
if (subcharMatchesList.get(i) >= charMatchesList.get(i)) {
|
||||
charMatchesList.set(i, subcharMatchesList.get(i));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
|
||||
}
|
||||
|
|
|
@ -1,20 +1,21 @@
|
|||
package net.osmand.binary;
|
||||
|
||||
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import gnu.trove.map.hash.TIntLongHashMap;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
||||
import com.google.protobuf.CodedInputStream;
|
||||
import com.google.protobuf.WireFormat;
|
||||
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import gnu.trove.map.hash.TIntLongHashMap;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import net.osmand.Collator;
|
||||
import net.osmand.CollatorStringMatcher;
|
||||
import net.osmand.CollatorStringMatcher.StringMatcherMode;
|
||||
|
@ -29,11 +30,6 @@ import net.osmand.osm.MapPoiTypes;
|
|||
import net.osmand.osm.PoiCategory;
|
||||
import net.osmand.util.MapUtils;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
||||
import com.google.protobuf.CodedInputStream;
|
||||
import com.google.protobuf.WireFormat;
|
||||
|
||||
public class BinaryMapPoiReaderAdapter {
|
||||
private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class);
|
||||
|
||||
|
@ -41,7 +37,7 @@ public class BinaryMapPoiReaderAdapter {
|
|||
private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1;
|
||||
private static final int ZOOM_TO_SKIP_FILTER_READ = 6;
|
||||
private static final int ZOOM_TO_SKIP_FILTER = 3;
|
||||
private static final int BUCKET_SEARCH_BY_NAME = 5;
|
||||
private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100?
|
||||
|
||||
public static class PoiSubType {
|
||||
public boolean text;
|
||||
|
@ -296,7 +292,6 @@ public class BinaryMapPoiReaderAdapter {
|
|||
}
|
||||
|
||||
protected void searchPoiByName(PoiRegion region, SearchRequest<Amenity> req) throws IOException {
|
||||
HashMap<String, TIntLongHashMap> offsetsMap = new HashMap<>();
|
||||
TIntLongHashMap offsets = new TIntLongHashMap();
|
||||
String query = normalizeSearchPoiByNameQuery(req.nameQuery);
|
||||
CollatorStringMatcher matcher = new CollatorStringMatcher(query,
|
||||
|
@ -316,48 +311,18 @@ public class BinaryMapPoiReaderAdapter {
|
|||
int length = readInt();
|
||||
int oldLimit = codedIS.pushLimit(length);
|
||||
// here offsets are sorted by distance
|
||||
offsetsMap = readPoiNameIndex(matcher.getCollator(), query, req);
|
||||
offsets = readPoiNameIndex(matcher.getCollator(), query, req);
|
||||
codedIS.popLimit(oldLimit);
|
||||
break;
|
||||
case OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER:
|
||||
// also offsets can be randomly skipped by limit
|
||||
Integer[] offKeys = new Integer[offsets.size()];
|
||||
if (offsetsMap.size() > 0) {
|
||||
List<HashSet<Integer>> setKeys = new ArrayList<>();
|
||||
for (Entry<String, TIntLongHashMap> item : offsetsMap.entrySet()) {
|
||||
TIntLongHashMap sets = item.getValue();
|
||||
|
||||
Integer[] offKeysFinal = new Integer[sets.size()];
|
||||
int[] keys = sets.keys();
|
||||
if (offsets.size() > 0) {
|
||||
int[] keys = offsets.keys();
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
offKeysFinal[i] = keys[i];
|
||||
}
|
||||
offsets.putAll(sets);
|
||||
HashSet<Integer> generalSet = new HashSet<Integer>(Arrays.asList(offKeysFinal));
|
||||
setKeys.add(generalSet);
|
||||
}
|
||||
HashSet<Integer> firstSet = new HashSet<Integer>();
|
||||
HashSet<Integer> secondSet = new HashSet<Integer>();
|
||||
HashSet<Integer> finalSet = new HashSet<Integer>();
|
||||
for (HashSet<Integer> keySet : setKeys) {
|
||||
if (setKeys.size() == 1) {
|
||||
finalSet.addAll(keySet);
|
||||
} else {
|
||||
if (firstSet.size() == 0) {
|
||||
firstSet.addAll(keySet);
|
||||
} else {
|
||||
secondSet.addAll(firstSet);
|
||||
secondSet.retainAll(keySet);
|
||||
finalSet.addAll(secondSet);
|
||||
}
|
||||
}
|
||||
offKeys[i] = keys[i];
|
||||
}
|
||||
final TIntLongHashMap foffsets = offsets;
|
||||
offKeys = finalSet.toArray(new Integer[finalSet.size()]);
|
||||
for (Integer key : offKeys) {
|
||||
foffsets.put(key, offsets.get(key));
|
||||
}
|
||||
|
||||
Arrays.sort(offKeys, new Comparator<Integer>() {
|
||||
@Override
|
||||
public int compare(Integer object1, Integer object2) {
|
||||
|
@ -401,54 +366,65 @@ public class BinaryMapPoiReaderAdapter {
|
|||
}
|
||||
}
|
||||
|
||||
private HashMap<String,TIntLongHashMap> readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
|
||||
HashMap<String, TIntArrayList> dataOffsetsMap = null;
|
||||
HashMap<String, TIntLongHashMap> offsetsMap = new HashMap<>();
|
||||
private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
|
||||
TIntLongHashMap offsets = new TIntLongHashMap();
|
||||
List<TIntArrayList> listOffsets = null;
|
||||
List<TIntLongHashMap> listOfSepOffsets = new ArrayList<TIntLongHashMap>();
|
||||
int offset = 0;
|
||||
while (true) {
|
||||
int t = codedIS.readTag();
|
||||
int tag = WireFormat.getTagFieldNumber(t);
|
||||
switch (tag) {
|
||||
case 0:
|
||||
return offsetsMap;
|
||||
return offsets;
|
||||
case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: {
|
||||
int length = readInt();
|
||||
int oldLimit = codedIS.pushLimit(length);
|
||||
dataOffsetsMap = new HashMap<>();
|
||||
offset = codedIS.getTotalBytesRead();
|
||||
map.readIndexedStringTable(instance, query, "", dataOffsetsMap, 0);
|
||||
List<String> queries = Arrays.asList(query.split(" "));
|
||||
TIntArrayList charsList = new TIntArrayList(queries.size());
|
||||
listOffsets = new ArrayList<TIntArrayList>(queries.size());
|
||||
while(listOffsets.size() < queries.size()) {
|
||||
charsList.add(0);
|
||||
listOffsets.add(new TIntArrayList());
|
||||
}
|
||||
map.readIndexedStringTable(instance, queries, "", listOffsets, charsList);
|
||||
codedIS.popLimit(oldLimit);
|
||||
break;
|
||||
}
|
||||
case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: {
|
||||
offsetsMap = new HashMap<>();
|
||||
if (dataOffsetsMap != null) {
|
||||
for (Entry<String, TIntArrayList> item : dataOffsetsMap.entrySet()) {
|
||||
TIntLongHashMap offsets = new TIntLongHashMap();
|
||||
TIntArrayList dataOffsets = item.getValue();
|
||||
String word = item.getKey();
|
||||
if (listOffsets != null) {
|
||||
for (TIntArrayList dataOffsets : listOffsets) {
|
||||
TIntLongHashMap offsetMap = new TIntLongHashMap();
|
||||
listOfSepOffsets.add(offsetMap);
|
||||
dataOffsets.sort(); // 1104125
|
||||
for (int i = 0; i < dataOffsets.size(); i++) {
|
||||
codedIS.seek(dataOffsets.get(i) + offset);
|
||||
int len = codedIS.readRawVarint32();
|
||||
int oldLim = codedIS.pushLimit(len);
|
||||
readPoiNameIndexData(offsets, req);
|
||||
readPoiNameIndexData(offsetMap, req);
|
||||
codedIS.popLimit(oldLim);
|
||||
|
||||
if (offsetsMap.containsKey(word)) {
|
||||
offsetsMap.get(word).putAll(offsets);
|
||||
} else {
|
||||
TIntLongHashMap map = new TIntLongHashMap();
|
||||
map.putAll(offsets);
|
||||
offsetsMap.put(word, map);
|
||||
if (req.isCancelled()) {
|
||||
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
|
||||
return offsets;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (listOfSepOffsets.size() > 0) {
|
||||
offsets.putAll(listOfSepOffsets.get(0));
|
||||
for (int j = 1; j < listOfSepOffsets.size(); j++) {
|
||||
TIntLongHashMap mp = listOfSepOffsets.get(j);
|
||||
// offsets.retainAll(mp); -- calculate intresection of mp & offsets
|
||||
for (int chKey : offsets.keys()) {
|
||||
if (!mp.containsKey(chKey)) {
|
||||
offsets.remove(chKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
|
||||
return offsetsMap;
|
||||
}
|
||||
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
|
||||
return offsetsMap;
|
||||
return offsets;
|
||||
}
|
||||
default:
|
||||
skipUnknownField(t);
|
||||
|
|
Loading…
Reference in a new issue