Update full text search

This commit is contained in:
Victor Shcherb 2016-07-24 22:34:26 +02:00
parent 3d49d718f5
commit c6fb461244
4 changed files with 127 additions and 71 deletions

View file

@ -22,6 +22,8 @@ import net.osmand.osm.MapPoiTypes;
import net.osmand.search.core.ObjectType; import net.osmand.search.core.ObjectType;
import net.osmand.search.core.SearchCoreAPI; import net.osmand.search.core.SearchCoreAPI;
import net.osmand.search.core.SearchCoreFactory; import net.osmand.search.core.SearchCoreFactory;
import net.osmand.search.core.SearchCoreFactory.SearchBuildingAndIntersectionsByStreetAPI;
import net.osmand.search.core.SearchCoreFactory.SearchStreetByCityAPI;
import net.osmand.search.core.SearchPhrase; import net.osmand.search.core.SearchPhrase;
import net.osmand.search.core.SearchResult; import net.osmand.search.core.SearchResult;
import net.osmand.search.core.SearchSettings; import net.osmand.search.core.SearchSettings;
@ -95,13 +97,16 @@ public class SearchUICore {
} }
public void init() { public void init() {
apis.add(new SearchCoreFactory.SearchLocationAndUrlAPI());
apis.add(new SearchCoreFactory.SearchAmenityTypesAPI(poiTypes)); apis.add(new SearchCoreFactory.SearchAmenityTypesAPI(poiTypes));
apis.add(new SearchCoreFactory.SearchAmenityByTypeAPI(poiTypes)); apis.add(new SearchCoreFactory.SearchAmenityByTypeAPI(poiTypes));
apis.add(new SearchCoreFactory.SearchAmenityByNameAPI()); apis.add(new SearchCoreFactory.SearchAmenityByNameAPI());
apis.add(new SearchCoreFactory.SearchStreetByCityAPI()); SearchBuildingAndIntersectionsByStreetAPI streetsApi =
apis.add(new SearchCoreFactory.SearchBuildingAndIntersectionsByStreetAPI()); new SearchCoreFactory.SearchBuildingAndIntersectionsByStreetAPI();
apis.add(new SearchCoreFactory.SearchLocationAndUrlAPI()); apis.add(streetsApi);
apis.add(new SearchCoreFactory.SearchAddressByNameAPI()); SearchStreetByCityAPI cityApi = new SearchCoreFactory.SearchStreetByCityAPI(streetsApi);
apis.add(cityApi);
apis.add(new SearchCoreFactory.SearchAddressByNameAPI(streetsApi, cityApi));
} }
public void registerAPI(SearchCoreAPI api) { public void registerAPI(SearchCoreAPI api) {
@ -174,7 +179,6 @@ public class SearchUICore {
if (!rm.isCancelled()) { if (!rm.isCancelled()) {
sortSearchResults(phrase, rm.getRequestResults()); sortSearchResults(phrase, rm.getRequestResults());
filterSearchDuplicateResults(phrase, rm.getRequestResults()); filterSearchDuplicateResults(phrase, rm.getRequestResults());
justifySearchResults(phrase, rm);
LOG.info(">> Search phrase " + phrase + " " + rm.getRequestResults().size()); LOG.info(">> Search phrase " + phrase + " " + rm.getRequestResults().size());
SearchResultCollection collection = new SearchResultCollection(rm.getRequestResults(), SearchResultCollection collection = new SearchResultCollection(rm.getRequestResults(),
@ -195,35 +199,8 @@ public class SearchUICore {
return quickRes; return quickRes;
} }
protected void justifySearchResults(SearchPhrase phrase, SearchResultMatcher rm) {
List<SearchResult> res = rm.getRequestResults();
if(!phrase.getUnknownSearchWords().isEmpty()) {
boolean resort = false;
int presize = res.size();
for(int i = 0; i < presize || i < LIMIT_JUSTIFY_RESULTS; ) {
SearchResult st = res.get(i);
// st.foundWordCount could be used
SearchPhrase pp = phrase.selectWord(st,
phrase.getUnknownSearchWords(), phrase.isLastUnknownSearchWordComplete());
SearchResultMatcher srm = new SearchResultMatcher(null, rm.request,
rm.requestNumber, totalLimit);
srm.setParentSearchResult(st);
searchInBackground(pp, srm);
if(srm.getRequestResults().size() > 0) {
rm.getRequestResults().remove(i);
rm.getRequestResults().addAll(srm.getRequestResults());
resort = true;
} else {
i++;
}
}
if(resort) {
sortSearchResults(phrase, rm.getRequestResults());
filterSearchDuplicateResults(phrase, rm.getRequestResults());
}
}
}
private void searchInBackground(final SearchPhrase phrase, SearchResultMatcher matcher) { private void searchInBackground(final SearchPhrase phrase, SearchResultMatcher matcher) {
for (SearchWord sw : phrase.getWords()) { for (SearchWord sw : phrase.getWords()) {
@ -303,8 +280,8 @@ public class SearchUICore {
@Override @Override
public int compare(SearchResult o1, SearchResult o2) { public int compare(SearchResult o1, SearchResult o2) {
if(o1.foundWordCount != o2.foundWordCount) { if(o1.getFoundWordCount() != o2.getFoundWordCount()) {
return -Algorithms.compare(o1.foundWordCount, o2.foundWordCount); return -Algorithms.compare(o1.getFoundWordCount(), o2.getFoundWordCount());
} }
double s1 = o1.getSearchDistance(loc); double s1 = o1.getSearchDistance(loc);
double s2 = o2.getSearchDistance(loc); double s2 = o2.getSearchDistance(loc);
@ -322,7 +299,8 @@ public class SearchUICore {
}); });
} }
public static class SearchResultMatcher implements ResultMatcher<SearchResult>{ public static class SearchResultMatcher
implements ResultMatcher<SearchResult>{
private final List<SearchResult> requestResults = new ArrayList<>(); private final List<SearchResult> requestResults = new ArrayList<>();
private final ResultMatcher<SearchResult> matcher; private final ResultMatcher<SearchResult> matcher;
private final int request; private final int request;
@ -340,14 +318,20 @@ public class SearchUICore {
this.totalLimit = totalLimit; this.totalLimit = totalLimit;
} }
public void setParentSearchResult(SearchResult parentSearchResult) { public SearchResult setParentSearchResult(SearchResult parentSearchResult) {
SearchResult prev = this.parentSearchResult;
this.parentSearchResult = parentSearchResult; this.parentSearchResult = parentSearchResult;
return prev;
} }
public List<SearchResult> getRequestResults() { public List<SearchResult> getRequestResults() {
return requestResults; return requestResults;
} }
public int getCount() {
return requestResults.size();
}
public void apiSearchFinished(SearchCoreAPI api, SearchPhrase phrase) { public void apiSearchFinished(SearchCoreAPI api, SearchPhrase phrase) {
if(matcher != null) { if(matcher != null) {
SearchResult sr = new SearchResult(phrase); SearchResult sr = new SearchResult(phrase);

View file

@ -86,6 +86,24 @@ public class SearchCoreFactory {
public int getSearchPriority(SearchPhrase p) { public int getSearchPriority(SearchPhrase p) {
return 1; return 1;
} }
protected void subSearchApiOrPublish(SearchPhrase phrase,
SearchResultMatcher resultMatcher, SearchResult res, SearchBaseAPI api)
throws IOException {
phrase.countUnknownWordsMatch(res);
int cnt = resultMatcher.getCount();
if(!phrase.getUnknownSearchWords().isEmpty() && api != null) {
SearchPhrase nphrase = phrase.selectWord(res, phrase.getUnknownSearchWords(res.otherWordsMatch),
phrase.isLastUnknownSearchWordComplete());
SearchResult prev = resultMatcher.setParentSearchResult(res);
res.parentSearchResult = prev;
api.search(nphrase, resultMatcher);
resultMatcher.setParentSearchResult(prev);
}
if(resultMatcher.getCount() == cnt) {
resultMatcher.publish(res);
}
}
} }
@ -134,6 +152,14 @@ public class SearchCoreFactory {
private QuadTree<City> townCitiesQR = new QuadTree<City>(new QuadRect(0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE), private QuadTree<City> townCitiesQR = new QuadTree<City>(new QuadRect(0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE),
8, 0.55f); 8, 0.55f);
private List<City> resArray = new ArrayList<>(); private List<City> resArray = new ArrayList<>();
private SearchStreetByCityAPI cityApi;
private SearchBuildingAndIntersectionsByStreetAPI streetsApi;
public SearchAddressByNameAPI(SearchBuildingAndIntersectionsByStreetAPI streetsApi,
SearchStreetByCityAPI cityApi) {
this.streetsApi = streetsApi;
this.cityApi = cityApi;
}
@Override @Override
public int getSearchPriority(SearchPhrase p) { public int getSearchPriority(SearchPhrase p) {
@ -186,20 +212,19 @@ public class SearchCoreFactory {
resArray = townCitiesQR.queryInBox(bbox, resArray); resArray = townCitiesQR.queryInBox(bbox, resArray);
int limit = 0; int limit = 0;
for (City c : resArray) { for (City c : resArray) {
SearchResult sr = new SearchResult(phrase); SearchResult res = new SearchResult(phrase);
sr.object = c; res.object = c;
sr.file = (BinaryMapIndexReader) c.getReferenceFile(); res.file = (BinaryMapIndexReader) c.getReferenceFile();
sr.localeName = c.getName(phrase.getSettings().getLang(), true); res.localeName = c.getName(phrase.getSettings().getLang(), true);
sr.otherNames = c.getAllNames(true); res.otherNames = c.getAllNames(true);
sr.localeRelatedObjectName = sr.file.getRegionName(); res.localeRelatedObjectName = res.file.getRegionName();
sr.relatedObject = sr.file; res.relatedObject = res.file;
sr.location = c.getLocation(); res.location = c.getLocation();
sr.priority = 1; res.priority = 1;
sr.priorityDistance = 0.1; res.priorityDistance = 0.1;
sr.objectType = ObjectType.CITY; res.objectType = ObjectType.CITY;
if(nm.matches(sr.localeName) || nm.matches(sr.otherNames)) { if(nm.matches(res.localeName) || nm.matches(res.otherNames)) {
sr.foundWordCount += phrase.countUnknownWordsMatch(sr); subSearchApiOrPublish(phrase, resultMatcher, res, cityApi);
resultMatcher.publish(sr);
} }
if(limit++ > LIMIT * phrase.getRadiusLevel()) { if(limit++ > LIMIT * phrase.getRadiusLevel()) {
break; break;
@ -214,6 +239,7 @@ public class SearchCoreFactory {
if(phrase.getRadiusLevel() > 1 || phrase.getUnknownSearchWordLength() > 3) { if(phrase.getRadiusLevel() > 1 || phrase.getUnknownSearchWordLength() > 3) {
final boolean locSpecified = phrase.getLastTokenLocation() != null; final boolean locSpecified = phrase.getLastTokenLocation() != null;
LatLon loc = phrase.getLastTokenLocation(); LatLon loc = phrase.getLastTokenLocation();
final List<SearchResult> immediateResults = new ArrayList<>();
final QuadRect streetBbox = phrase.getRadiusBBoxToSearch(DEFAULT_ADDRESS_BBOX_RADIUS); final QuadRect streetBbox = phrase.getRadiusBBoxToSearch(DEFAULT_ADDRESS_BBOX_RADIUS);
final QuadRect postcodeBbox = phrase.getRadiusBBoxToSearch(DEFAULT_ADDRESS_BBOX_RADIUS * 5); final QuadRect postcodeBbox = phrase.getRadiusBBoxToSearch(DEFAULT_ADDRESS_BBOX_RADIUS * 5);
final QuadRect villagesBbox = phrase.getRadiusBBoxToSearch(DEFAULT_ADDRESS_BBOX_RADIUS * 3); final QuadRect villagesBbox = phrase.getRadiusBBoxToSearch(DEFAULT_ADDRESS_BBOX_RADIUS * 3);
@ -235,7 +261,6 @@ public class SearchCoreFactory {
sr.otherNames = object.getAllNames(true); sr.otherNames = object.getAllNames(true);
sr.localeRelatedObjectName = sr.file.getRegionName(); sr.localeRelatedObjectName = sr.file.getRegionName();
sr.relatedObject = sr.file; sr.relatedObject = sr.file;
sr.foundWordCount += phrase.countUnknownWordsMatch(sr);
sr.location = object.getLocation(); sr.location = object.getLocation();
sr.priorityDistance = 1; sr.priorityDistance = 1;
sr.priority = priority; sr.priority = priority;
@ -300,7 +325,7 @@ public class SearchCoreFactory {
return false; return false;
} }
limit ++; limit ++;
resultMatcher.publish(sr); immediateResults.add(sr);
return false; return false;
} }
@ -315,14 +340,26 @@ public class SearchCoreFactory {
while (offlineIterator.hasNext()) { while (offlineIterator.hasNext()) {
BinaryMapIndexReader r = offlineIterator.next(); BinaryMapIndexReader r = offlineIterator.next();
currentFile[0] = r; currentFile[0] = r;
immediateResults.clear();
SearchRequest<MapObject> req = BinaryMapIndexReader.buildAddressByNameRequest(rm, phrase SearchRequest<MapObject> req = BinaryMapIndexReader.buildAddressByNameRequest(rm, phrase
.getUnknownSearchWord().toLowerCase(), .getUnknownSearchWord().toLowerCase(),
phrase.isUnknownSearchWordComplete() ? StringMatcherMode.CHECK_EQUALS_FROM_SPACE : phrase.isUnknownSearchWordComplete() ? StringMatcherMode.CHECK_EQUALS_FROM_SPACE
StringMatcherMode.CHECK_STARTS_FROM_SPACE); : StringMatcherMode.CHECK_STARTS_FROM_SPACE);
if (locSpecified) { if (locSpecified) {
req.setBBoxRadius(loc.getLatitude(), loc.getLongitude(), phrase.getRadiusSearch(DEFAULT_ADDRESS_BBOX_RADIUS * 10)); req.setBBoxRadius(loc.getLatitude(), loc.getLongitude(),
phrase.getRadiusSearch(DEFAULT_ADDRESS_BBOX_RADIUS * 10));
} }
r.searchAddressDataByName(req); r.searchAddressDataByName(req);
for (SearchResult res : immediateResults) {
if(res.objectType == ObjectType.STREET) {
City ct = ((Street) res.object).getCity();
phrase.countUnknownWordsMatch(res, ct.getName(phrase.getSettings().getLang(), true),
ct.getAllNames(true));
subSearchApiOrPublish(phrase, resultMatcher, res, streetsApi);
} else {
subSearchApiOrPublish(phrase, resultMatcher, res, cityApi);
}
}
resultMatcher.apiSearchRegionFinished(this, r, phrase); resultMatcher.apiSearchRegionFinished(this, r, phrase);
} }
} }
@ -381,10 +418,8 @@ public class SearchCoreFactory {
sr.priorityDistance = 1; sr.priorityDistance = 1;
} }
sr.priority = SEARCH_AMENITY_BY_NAME_PRIORITY; sr.priority = SEARCH_AMENITY_BY_NAME_PRIORITY;
sr.foundWordCount += phrase.countUnknownWordsMatch(sr); phrase.countUnknownWordsMatch(sr);
sr.objectType = ObjectType.POI; sr.objectType = ObjectType.POI;
resultMatcher.publish(sr); resultMatcher.publish(sr);
return false; return false;
} }
@ -398,6 +433,7 @@ public class SearchCoreFactory {
BinaryMapIndexReader r = offlineIterator.next(); BinaryMapIndexReader r = offlineIterator.next();
currentFile[0] = r; currentFile[0] = r;
r.searchPoiByName(req); r.searchPoiByName(req);
resultMatcher.apiSearchRegionFinished(this, r, phrase); resultMatcher.apiSearchRegionFinished(this, r, phrase);
} }
return true; return true;
@ -619,6 +655,11 @@ public class SearchCoreFactory {
public static class SearchStreetByCityAPI extends SearchBaseAPI { public static class SearchStreetByCityAPI extends SearchBaseAPI {
private SearchBaseAPI streetsAPI;
public SearchStreetByCityAPI(SearchBuildingAndIntersectionsByStreetAPI streetsAPI) {
this.streetsAPI = streetsAPI;
}
private static int LIMIT = 10000; private static int LIMIT = 10000;
@Override @Override
public boolean search(SearchPhrase phrase, SearchResultMatcher resultMatcher) throws IOException { public boolean search(SearchPhrase phrase, SearchResultMatcher resultMatcher) throws IOException {
@ -651,10 +692,11 @@ public class SearchCoreFactory {
res.priority = SEARCH_STREET_BY_CITY_PRIORITY; res.priority = SEARCH_STREET_BY_CITY_PRIORITY;
//res.priorityDistance = 1; //res.priorityDistance = 1;
res.objectType = ObjectType.STREET; res.objectType = ObjectType.STREET;
subSearchApiOrPublish(phrase, resultMatcher, res, streetsAPI);
if (limit++ > LIMIT) { if (limit++ > LIMIT) {
break; break;
} }
resultMatcher.publish(res);
} }
return true; return true;
} }

View file

@ -6,6 +6,7 @@ import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.TreeSet;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.osmand.CollatorStringMatcher; import net.osmand.CollatorStringMatcher;
@ -115,6 +116,20 @@ public class SearchPhrase {
return unknownWords; return unknownWords;
} }
public List<String> getUnknownSearchWords(Collection<String> exclude) {
if(exclude == null || unknownWords.size() == 0 || exclude.size() == 0) {
return unknownWords;
}
List<String> l = new ArrayList<>();
for(String uw : unknownWords) {
if(exclude == null || !exclude.contains(uw)) {
l.add(uw);
}
}
return l;
}
public String getUnknownSearchWord() { public String getUnknownSearchWord() {
return unknownSearchWordTrim; return unknownSearchWordTrim;
} }
@ -245,13 +260,13 @@ public class SearchPhrase {
public SearchPhrase selectWord(SearchResult res, List<String> unknownWords, boolean lastComplete) { public SearchPhrase selectWord(SearchResult res, List<String> unknownWords, boolean lastComplete) {
SearchPhrase sp = new SearchPhrase(this.settings); SearchPhrase sp = new SearchPhrase(this.settings);
sp.words.addAll(this.words); addResult(res, sp);
SearchResult prnt = res.parentSearchResult; SearchResult prnt = res.parentSearchResult;
while(prnt != null) { while(prnt != null) {
addResult(prnt, sp); addResult(prnt, sp);
prnt = prnt.parentSearchResult; prnt = prnt.parentSearchResult;
} }
addResult(res, sp); sp.words.addAll(0, this.words);
if(unknownWords != null) { if(unknownWords != null) {
sp.lastUnknownSearchWordComplete = lastComplete; sp.lastUnknownSearchWordComplete = lastComplete;
for(int i = 0; i < unknownWords.size(); i++) { for(int i = 0; i < unknownWords.size(); i++) {
@ -267,7 +282,7 @@ public class SearchPhrase {
private void addResult(SearchResult res, SearchPhrase sp) { private void addResult(SearchResult res, SearchPhrase sp) {
SearchWord sw = new SearchWord(res.wordsSpan != null ? res.wordsSpan : res.localeName.trim(), res); SearchWord sw = new SearchWord(res.wordsSpan != null ? res.wordsSpan : res.localeName.trim(), res);
sp.words.add(sw); sp.words.add(0, sw);
} }
public boolean isLastWord(ObjectType... p) { public boolean isLastWord(ObjectType... p) {
@ -436,8 +451,12 @@ public class SearchPhrase {
} }
} }
public int countUnknownWordsMatch(SearchResult sr) {
int cnt = 0; public void countUnknownWordsMatch(SearchResult sr) {
countUnknownWordsMatch(sr, sr.localeName, sr.otherNames);
}
public void countUnknownWordsMatch(SearchResult sr, String localeName, Collection<String> otherNames) {
if(unknownWords.size() > 0) { if(unknownWords.size() > 0) {
for(int i = 0; i < unknownWords.size(); i++) { for(int i = 0; i < unknownWords.size(); i++) {
if(unknownWordsMatcher.size() == i) { if(unknownWordsMatcher.size() == i) {
@ -446,12 +465,14 @@ public class SearchPhrase {
StringMatcherMode.CHECK_STARTS_FROM_SPACE)); StringMatcherMode.CHECK_STARTS_FROM_SPACE));
} }
NameStringMatcher ms = unknownWordsMatcher.get(i); NameStringMatcher ms = unknownWordsMatcher.get(i);
if(ms.matches(sr.localeName) || ms.matches(sr.otherNames)) { if(ms.matches(localeName) || ms.matches(otherNames)) {
cnt++; if(sr.otherWordsMatch == null) {
sr.otherWordsMatch = new TreeSet<>();
}
sr.otherWordsMatch.add(unknownWords.get(i));
} }
} }
} }
return cnt;
} }
public int getRadiusSearch(int meters) { public int getRadiusSearch(int meters) {
return (1 << (getRadiusLevel() - 1)) * meters; return (1 << (getRadiusLevel() - 1)) * meters;

View file

@ -1,6 +1,7 @@
package net.osmand.search.core; package net.osmand.search.core;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import net.osmand.binary.BinaryMapIndexReader; import net.osmand.binary.BinaryMapIndexReader;
import net.osmand.data.LatLon; import net.osmand.data.LatLon;
@ -14,17 +15,25 @@ public class SearchResult {
public ObjectType objectType; public ObjectType objectType;
public BinaryMapIndexReader file; public BinaryMapIndexReader file;
public int foundWordCount = 1;
public double priority; public double priority;
public double priorityDistance; public double priorityDistance;
public String wordsSpan ; public String wordsSpan ;
public SearchResult parentSearchResult; public SearchResult parentSearchResult;
public Collection<String> otherWordsMatch = null;
public SearchResult(SearchPhrase sp) { public SearchResult(SearchPhrase sp) {
this.requiredSearchPhrase = sp; this.requiredSearchPhrase = sp;
} }
public int getFoundWordCount() {
if(otherWordsMatch != null) {
return otherWordsMatch.size() + 1;
}
return 1;
}
public double getSearchDistance(LatLon location) { public double getSearchDistance(LatLon location) {
double distance = 0; double distance = 0;
if (location != null && this.location != null) { if (location != null && this.location != null) {