Merge pull request #10732 from osmandapp/Tsearch

fix search
This commit is contained in:
vshcherb 2021-02-08 12:48:36 +01:00 committed by GitHub
commit eb21102c82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 104 additions and 57 deletions

View file

@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter {
indexOffset = codedIS.getTotalBytesRead();
int oldLimit = codedIS.pushLimit(length);
// here offsets are sorted by distance
map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0);
TIntArrayList charsList = new TIntArrayList();
charsList.add(0);
map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList);
codedIS.popLimit(oldLimit);
break;
case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER:

View file

@ -2161,9 +2161,9 @@ public class BinaryMapIndexReader {
private static boolean testAddressSearch = false;
private static boolean testAddressSearchName = false;
private static boolean testAddressJustifySearch = false;
private static boolean testPoiSearch = false;
private static boolean testPoiSearch = true;
private static boolean testPoiSearchOnPath = false;
private static boolean testTransportSearch = true;
private static boolean testTransportSearch = false;
private static int sleft = MapUtils.get31TileNumberX(27.55079);
private static int sright = MapUtils.get31TileNumberX(27.55317);
@ -2177,7 +2177,7 @@ public class BinaryMapIndexReader {
public static void main(String[] args) throws IOException {
File fl = new File(System.getProperty("maps") + "/Synthetic_test_rendering.obf");
fl = new File("/home/madwasp79/OsmAnd-maps/Poly_center2.obf");
fl = new File(System.getProperty("maps") +"/Wikivoyage.obf__");
RandomAccessFile raf = new RandomAccessFile(fl, "r");
@ -2325,7 +2325,7 @@ public class BinaryMapIndexReader {
private static void testPoiSearchByName(BinaryMapIndexReader reader) throws IOException {
println("Searching by name...");
SearchRequest<Amenity> req = buildSearchPoiRequest(0, 0, "Art",
SearchRequest<Amenity> req = buildSearchPoiRequest(0, 0, "central ukraine",
0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, null);
reader.searchPoiByName(req);
@ -2385,54 +2385,72 @@ public class BinaryMapIndexReader {
}
int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException {
void readIndexedStringTable(Collator instance, List<String> queries, String prefix, List<TIntArrayList> listOffsets, TIntArrayList matchedCharacters) throws IOException {
String key = null;
boolean[] matched = new boolean[matchedCharacters.size()];
boolean shouldWeReadSubtable = false;
while (true) {
int t = codedIS.readTag();
int tag = WireFormat.getTagFieldNumber(t);
switch (tag) {
case 0:
return charMatches;
return;
case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
key = codedIS.readString();
if(prefix.length() > 0){
if (prefix.length() > 0) {
key = prefix + key;
}
// check query is part of key (the best matching)
if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)){
if(query.length() >= charMatches){
if(query.length() > charMatches){
charMatches = query.length();
list.clear();
shouldWeReadSubtable = false;
for (int i = 0; i < queries.size(); i++) {
int charMatches = matchedCharacters.get(i);
String query = queries.get(i);
matched[i] = false;
if (query == null) {
continue;
}
} else {
key = null;
// check query is part of key (the best matching)
if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
if (query.length() >= charMatches) {
if (query.length() > charMatches) {
matchedCharacters.set(i, query.length());
listOffsets.get(i).clear();
}
matched[i] = true;
}
// check key is part of query
} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
if (key.length() >= charMatches) {
if (key.length() > charMatches) {
charMatches = key.length();
list.clear();
matchedCharacters.set(i, key.length());
listOffsets.get(i).clear();
}
} else {
key = null;
matched[i] = true;
}
} else {
key = null;
}
shouldWeReadSubtable |= matched[i];
}
break;
case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER :
int val = readInt();
if (key != null) {
list.add(val);
for (int i = 0; i < queries.size(); i++) {
if (matched[i]) {
listOffsets.get(i).add(val);
}
}
break;
case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
int len = codedIS.readRawVarint32();
int oldLim = codedIS.pushLimit(len);
if (key != null) {
charMatches = readIndexedStringTable(instance, query, key, list, charMatches);
if (shouldWeReadSubtable && key != null) {
List<String> subqueries = new ArrayList<>(queries);
// reset query so we don't search what was not matched
for(int i = 0; i < queries.size(); i++) {
if(!matched[i]) {
subqueries.set(i, null);
}
}
readIndexedStringTable(instance, subqueries, key, listOffsets, matchedCharacters);
} else {
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
}

View file

@ -1,10 +1,6 @@
package net.osmand.binary;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntLongHashMap;
import gnu.trove.set.hash.TLongHashSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -12,6 +8,14 @@ import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.logging.Log;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.WireFormat;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntLongHashMap;
import gnu.trove.set.hash.TLongHashSet;
import net.osmand.Collator;
import net.osmand.CollatorStringMatcher;
import net.osmand.CollatorStringMatcher.StringMatcherMode;
@ -26,11 +30,6 @@ import net.osmand.osm.MapPoiTypes;
import net.osmand.osm.PoiCategory;
import net.osmand.util.MapUtils;
import org.apache.commons.logging.Log;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.WireFormat;
public class BinaryMapPoiReaderAdapter {
private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class);
@ -38,7 +37,7 @@ public class BinaryMapPoiReaderAdapter {
private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1;
private static final int ZOOM_TO_SKIP_FILTER_READ = 6;
private static final int ZOOM_TO_SKIP_FILTER = 3;
private static final int BUCKET_SEARCH_BY_NAME = 5;
private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100?
public static class PoiSubType {
public boolean text;
@ -332,7 +331,7 @@ public class BinaryMapPoiReaderAdapter {
});
int p = BUCKET_SEARCH_BY_NAME * 3;
if (p < offKeys.length) {
for (int i = p + BUCKET_SEARCH_BY_NAME; ; i += BUCKET_SEARCH_BY_NAME) {
for (int i = p + BUCKET_SEARCH_BY_NAME;; i += BUCKET_SEARCH_BY_NAME) {
if (i > offKeys.length) {
Arrays.sort(offKeys, p, offKeys.length);
break;
@ -344,7 +343,6 @@ public class BinaryMapPoiReaderAdapter {
}
}
LOG.info("Searched poi structure in " + (System.currentTimeMillis() - time) +
"ms. Found " + offKeys.length + " subtrees");
for (int j = 0; j < offKeys.length; j++) {
@ -370,7 +368,8 @@ public class BinaryMapPoiReaderAdapter {
private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
TIntLongHashMap offsets = new TIntLongHashMap();
TIntArrayList dataOffsets = null;
List<TIntArrayList> listOffsets = null;
List<TIntLongHashMap> listOfSepOffsets = new ArrayList<TIntLongHashMap>();
int offset = 0;
while (true) {
int t = codedIS.readTag();
@ -381,20 +380,34 @@ public class BinaryMapPoiReaderAdapter {
case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: {
int length = readInt();
int oldLimit = codedIS.pushLimit(length);
dataOffsets = new TIntArrayList();
offset = codedIS.getTotalBytesRead();
map.readIndexedStringTable(instance, query, "", dataOffsets, 0);
List<String> queries = new ArrayList<>();
for (String word : query.split(" ")) {
if (word.trim().length() > 0) {
queries.add(word.trim());
}
}
TIntArrayList charsList = new TIntArrayList(queries.size());
listOffsets = new ArrayList<TIntArrayList>(queries.size());
while(listOffsets.size() < queries.size()) {
charsList.add(0);
listOffsets.add(new TIntArrayList());
}
map.readIndexedStringTable(instance, queries, "", listOffsets, charsList);
codedIS.popLimit(oldLimit);
break;
}
case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: {
if (dataOffsets != null) {
if (listOffsets != null) {
for (TIntArrayList dataOffsets : listOffsets) {
TIntLongHashMap offsetMap = new TIntLongHashMap();
listOfSepOffsets.add(offsetMap);
dataOffsets.sort(); // 1104125
for (int i = 0; i < dataOffsets.size(); i++) {
codedIS.seek(dataOffsets.get(i) + offset);
int len = codedIS.readRawVarint32();
int oldLim = codedIS.pushLimit(len);
readPoiNameIndexData(offsets, req);
readPoiNameIndexData(offsetMap, req);
codedIS.popLimit(oldLim);
if (req.isCancelled()) {
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
@ -402,6 +415,19 @@ public class BinaryMapPoiReaderAdapter {
}
}
}
}
if (listOfSepOffsets.size() > 0) {
offsets.putAll(listOfSepOffsets.get(0));
for (int j = 1; j < listOfSepOffsets.size(); j++) {
TIntLongHashMap mp = listOfSepOffsets.get(j);
// offsets.retainAll(mp); -- calculate intresection of mp & offsets
for (int chKey : offsets.keys()) {
if (!mp.containsKey(chKey)) {
offsets.remove(chKey);
}
}
}
}
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
return offsets;
}