Merge pull request #10732 from osmandapp/Tsearch

fix search
This commit is contained in:
vshcherb 2021-02-08 12:48:36 +01:00 committed by GitHub
commit eb21102c82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 104 additions and 57 deletions

View file

@ -8,6 +8,7 @@ import gnu.trove.set.hash.TIntHashSet;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -627,7 +628,9 @@ public class BinaryMapAddressReaderAdapter {
indexOffset = codedIS.getTotalBytesRead(); indexOffset = codedIS.getTotalBytesRead();
int oldLimit = codedIS.pushLimit(length); int oldLimit = codedIS.pushLimit(length);
// here offsets are sorted by distance // here offsets are sorted by distance
map.readIndexedStringTable(stringMatcher.getCollator(), req.nameQuery, "", loffsets, 0); TIntArrayList charsList = new TIntArrayList();
charsList.add(0);
map.readIndexedStringTable(stringMatcher.getCollator(), Collections.singletonList(req.nameQuery), "", Collections.singletonList(loffsets), charsList);
codedIS.popLimit(oldLimit); codedIS.popLimit(oldLimit);
break; break;
case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER: case OsmAndAddressNameIndexData.ATOM_FIELD_NUMBER:

View file

@ -2161,9 +2161,9 @@ public class BinaryMapIndexReader {
private static boolean testAddressSearch = false; private static boolean testAddressSearch = false;
private static boolean testAddressSearchName = false; private static boolean testAddressSearchName = false;
private static boolean testAddressJustifySearch = false; private static boolean testAddressJustifySearch = false;
private static boolean testPoiSearch = false; private static boolean testPoiSearch = true;
private static boolean testPoiSearchOnPath = false; private static boolean testPoiSearchOnPath = false;
private static boolean testTransportSearch = true; private static boolean testTransportSearch = false;
private static int sleft = MapUtils.get31TileNumberX(27.55079); private static int sleft = MapUtils.get31TileNumberX(27.55079);
private static int sright = MapUtils.get31TileNumberX(27.55317); private static int sright = MapUtils.get31TileNumberX(27.55317);
@ -2177,7 +2177,7 @@ public class BinaryMapIndexReader {
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
File fl = new File(System.getProperty("maps") + "/Synthetic_test_rendering.obf"); File fl = new File(System.getProperty("maps") + "/Synthetic_test_rendering.obf");
fl = new File("/home/madwasp79/OsmAnd-maps/Poly_center2.obf"); fl = new File(System.getProperty("maps") +"/Wikivoyage.obf__");
RandomAccessFile raf = new RandomAccessFile(fl, "r"); RandomAccessFile raf = new RandomAccessFile(fl, "r");
@ -2325,7 +2325,7 @@ public class BinaryMapIndexReader {
private static void testPoiSearchByName(BinaryMapIndexReader reader) throws IOException { private static void testPoiSearchByName(BinaryMapIndexReader reader) throws IOException {
println("Searching by name..."); println("Searching by name...");
SearchRequest<Amenity> req = buildSearchPoiRequest(0, 0, "Art", SearchRequest<Amenity> req = buildSearchPoiRequest(0, 0, "central ukraine",
0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, null); 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, null);
reader.searchPoiByName(req); reader.searchPoiByName(req);
@ -2385,54 +2385,72 @@ public class BinaryMapIndexReader {
} }
int readIndexedStringTable(Collator instance, String query, String prefix, TIntArrayList list, int charMatches) throws IOException { void readIndexedStringTable(Collator instance, List<String> queries, String prefix, List<TIntArrayList> listOffsets, TIntArrayList matchedCharacters) throws IOException {
String key = null; String key = null;
boolean[] matched = new boolean[matchedCharacters.size()];
boolean shouldWeReadSubtable = false;
while (true) { while (true) {
int t = codedIS.readTag(); int t = codedIS.readTag();
int tag = WireFormat.getTagFieldNumber(t); int tag = WireFormat.getTagFieldNumber(t);
switch (tag) { switch (tag) {
case 0: case 0:
return charMatches; return;
case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER : case OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER :
key = codedIS.readString(); key = codedIS.readString();
if(prefix.length() > 0){ if (prefix.length() > 0) {
key = prefix + key; key = prefix + key;
} }
// check query is part of key (the best matching) shouldWeReadSubtable = false;
if(CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)){ for (int i = 0; i < queries.size(); i++) {
if(query.length() >= charMatches){ int charMatches = matchedCharacters.get(i);
if(query.length() > charMatches){ String query = queries.get(i);
charMatches = query.length(); matched[i] = false;
list.clear(); if (query == null) {
} continue;
} else {
key = null;
} }
// check key is part of query
} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) { // check query is part of key (the best matching)
if (key.length() >= charMatches) { if (CollatorStringMatcher.cmatches(instance, key, query, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
if (key.length() > charMatches) { if (query.length() >= charMatches) {
charMatches = key.length(); if (query.length() > charMatches) {
list.clear(); matchedCharacters.set(i, query.length());
listOffsets.get(i).clear();
}
matched[i] = true;
}
// check key is part of query
} else if (CollatorStringMatcher.cmatches(instance, query, key, StringMatcherMode.CHECK_ONLY_STARTS_WITH)) {
if (key.length() >= charMatches) {
if (key.length() > charMatches) {
matchedCharacters.set(i, key.length());
listOffsets.get(i).clear();
}
matched[i] = true;
} }
} else {
key = null;
} }
} else { shouldWeReadSubtable |= matched[i];
key = null;
} }
break; break;
case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER : case OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER :
int val = readInt(); int val = readInt();
if (key != null) { for (int i = 0; i < queries.size(); i++) {
list.add(val); if (matched[i]) {
listOffsets.get(i).add(val);
}
} }
break; break;
case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER : case OsmandOdb.IndexedStringTable.SUBTABLES_FIELD_NUMBER :
int len = codedIS.readRawVarint32(); int len = codedIS.readRawVarint32();
int oldLim = codedIS.pushLimit(len); int oldLim = codedIS.pushLimit(len);
if (key != null) { if (shouldWeReadSubtable && key != null) {
charMatches = readIndexedStringTable(instance, query, key, list, charMatches); List<String> subqueries = new ArrayList<>(queries);
// reset query so we don't search what was not matched
for(int i = 0; i < queries.size(); i++) {
if(!matched[i]) {
subqueries.set(i, null);
}
}
readIndexedStringTable(instance, subqueries, key, listOffsets, matchedCharacters);
} else { } else {
codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
} }

View file

@ -1,10 +1,6 @@
package net.osmand.binary; package net.osmand.binary;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntLongHashMap;
import gnu.trove.set.hash.TLongHashSet;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -12,6 +8,14 @@ import java.util.Comparator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.apache.commons.logging.Log;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.WireFormat;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntLongHashMap;
import gnu.trove.set.hash.TLongHashSet;
import net.osmand.Collator; import net.osmand.Collator;
import net.osmand.CollatorStringMatcher; import net.osmand.CollatorStringMatcher;
import net.osmand.CollatorStringMatcher.StringMatcherMode; import net.osmand.CollatorStringMatcher.StringMatcherMode;
@ -26,11 +30,6 @@ import net.osmand.osm.MapPoiTypes;
import net.osmand.osm.PoiCategory; import net.osmand.osm.PoiCategory;
import net.osmand.util.MapUtils; import net.osmand.util.MapUtils;
import org.apache.commons.logging.Log;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.WireFormat;
public class BinaryMapPoiReaderAdapter { public class BinaryMapPoiReaderAdapter {
private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class); private static final Log LOG = PlatformUtil.getLog(BinaryMapPoiReaderAdapter.class);
@ -38,7 +37,7 @@ public class BinaryMapPoiReaderAdapter {
private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1; private static final int CATEGORY_MASK = (1 << SHIFT_BITS_CATEGORY) - 1;
private static final int ZOOM_TO_SKIP_FILTER_READ = 6; private static final int ZOOM_TO_SKIP_FILTER_READ = 6;
private static final int ZOOM_TO_SKIP_FILTER = 3; private static final int ZOOM_TO_SKIP_FILTER = 3;
private static final int BUCKET_SEARCH_BY_NAME = 5; private static final int BUCKET_SEARCH_BY_NAME = 15; // should be bigger 100?
public static class PoiSubType { public static class PoiSubType {
public boolean text; public boolean text;
@ -332,7 +331,7 @@ public class BinaryMapPoiReaderAdapter {
}); });
int p = BUCKET_SEARCH_BY_NAME * 3; int p = BUCKET_SEARCH_BY_NAME * 3;
if (p < offKeys.length) { if (p < offKeys.length) {
for (int i = p + BUCKET_SEARCH_BY_NAME; ; i += BUCKET_SEARCH_BY_NAME) { for (int i = p + BUCKET_SEARCH_BY_NAME;; i += BUCKET_SEARCH_BY_NAME) {
if (i > offKeys.length) { if (i > offKeys.length) {
Arrays.sort(offKeys, p, offKeys.length); Arrays.sort(offKeys, p, offKeys.length);
break; break;
@ -344,7 +343,6 @@ public class BinaryMapPoiReaderAdapter {
} }
} }
LOG.info("Searched poi structure in " + (System.currentTimeMillis() - time) + LOG.info("Searched poi structure in " + (System.currentTimeMillis() - time) +
"ms. Found " + offKeys.length + " subtrees"); "ms. Found " + offKeys.length + " subtrees");
for (int j = 0; j < offKeys.length; j++) { for (int j = 0; j < offKeys.length; j++) {
@ -370,7 +368,8 @@ public class BinaryMapPoiReaderAdapter {
private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException { private TIntLongHashMap readPoiNameIndex(Collator instance, String query, SearchRequest<Amenity> req) throws IOException {
TIntLongHashMap offsets = new TIntLongHashMap(); TIntLongHashMap offsets = new TIntLongHashMap();
TIntArrayList dataOffsets = null; List<TIntArrayList> listOffsets = null;
List<TIntLongHashMap> listOfSepOffsets = new ArrayList<TIntLongHashMap>();
int offset = 0; int offset = 0;
while (true) { while (true) {
int t = codedIS.readTag(); int t = codedIS.readTag();
@ -381,24 +380,51 @@ public class BinaryMapPoiReaderAdapter {
case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: { case OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER: {
int length = readInt(); int length = readInt();
int oldLimit = codedIS.pushLimit(length); int oldLimit = codedIS.pushLimit(length);
dataOffsets = new TIntArrayList();
offset = codedIS.getTotalBytesRead(); offset = codedIS.getTotalBytesRead();
map.readIndexedStringTable(instance, query, "", dataOffsets, 0); List<String> queries = new ArrayList<>();
for (String word : query.split(" ")) {
if (word.trim().length() > 0) {
queries.add(word.trim());
}
}
TIntArrayList charsList = new TIntArrayList(queries.size());
listOffsets = new ArrayList<TIntArrayList>(queries.size());
while(listOffsets.size() < queries.size()) {
charsList.add(0);
listOffsets.add(new TIntArrayList());
}
map.readIndexedStringTable(instance, queries, "", listOffsets, charsList);
codedIS.popLimit(oldLimit); codedIS.popLimit(oldLimit);
break; break;
} }
case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: { case OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER: {
if (dataOffsets != null) { if (listOffsets != null) {
dataOffsets.sort(); // 1104125 for (TIntArrayList dataOffsets : listOffsets) {
for (int i = 0; i < dataOffsets.size(); i++) { TIntLongHashMap offsetMap = new TIntLongHashMap();
codedIS.seek(dataOffsets.get(i) + offset); listOfSepOffsets.add(offsetMap);
int len = codedIS.readRawVarint32(); dataOffsets.sort(); // 1104125
int oldLim = codedIS.pushLimit(len); for (int i = 0; i < dataOffsets.size(); i++) {
readPoiNameIndexData(offsets, req); codedIS.seek(dataOffsets.get(i) + offset);
codedIS.popLimit(oldLim); int len = codedIS.readRawVarint32();
if (req.isCancelled()) { int oldLim = codedIS.pushLimit(len);
codedIS.skipRawBytes(codedIS.getBytesUntilLimit()); readPoiNameIndexData(offsetMap, req);
return offsets; codedIS.popLimit(oldLim);
if (req.isCancelled()) {
codedIS.skipRawBytes(codedIS.getBytesUntilLimit());
return offsets;
}
}
}
}
if (listOfSepOffsets.size() > 0) {
offsets.putAll(listOfSepOffsets.get(0));
for (int j = 1; j < listOfSepOffsets.size(); j++) {
TIntLongHashMap mp = listOfSepOffsets.get(j);
// offsets.retainAll(mp); -- calculate intresection of mp & offsets
for (int chKey : offsets.keys()) {
if (!mp.containsKey(chKey)) {
offsets.remove(chKey);
}
} }
} }
} }