From 8d3e79e44e4b279a578788be4715aec49e108cd8 Mon Sep 17 00:00:00 2001 From: Victor Shcherb Date: Sun, 24 Jul 2016 19:05:51 +0200 Subject: [PATCH] Improve full-text search --- .../src/net/osmand/search/SearchUICore.java | 50 ++++++++++- .../osmand/search/core/SearchCoreFactory.java | 4 + .../net/osmand/search/core/SearchPhrase.java | 85 +++++++++++++------ .../net/osmand/search/core/SearchResult.java | 7 +- 4 files changed, 113 insertions(+), 33 deletions(-) diff --git a/OsmAnd-java/src/net/osmand/search/SearchUICore.java b/OsmAnd-java/src/net/osmand/search/SearchUICore.java index 4a7dcb50af..464720e64f 100644 --- a/OsmAnd-java/src/net/osmand/search/SearchUICore.java +++ b/OsmAnd-java/src/net/osmand/search/SearchUICore.java @@ -35,6 +35,7 @@ import org.apache.commons.logging.Log; public class SearchUICore { private static final int TIMEOUT_BETWEEN_CHARS = 200; + private static final int LIMIT_JUSTIFY_RESULTS = 5; private static final Log LOG = PlatformUtil.getLog(SearchUICore.class); private SearchPhrase phrase; private SearchResultCollection currentSearchResult = new SearchResultCollection(); @@ -163,16 +164,18 @@ public class SearchUICore { public void run() { try { SearchResultMatcher rm = new SearchResultMatcher(matcher, request, requestNumber, totalLimit); - if(rm.isCancelled()) { - return; - } if(TIMEOUT_BETWEEN_CHARS > 0) { Thread.sleep(TIMEOUT_BETWEEN_CHARS); } + if(rm.isCancelled()) { + return; + } searchInBackground(phrase, rm); if (!rm.isCancelled()) { sortSearchResults(phrase, rm.getRequestResults()); filterSearchDuplicateResults(phrase, rm.getRequestResults()); + justifySearchResults(phrase, rm); + LOG.info(">> Search phrase " + phrase + " " + rm.getRequestResults().size()); SearchResultCollection collection = new SearchResultCollection(rm.getRequestResults(), phrase); @@ -192,6 +195,36 @@ public class SearchUICore { return quickRes; } + protected void justifySearchResults(SearchPhrase phrase, SearchResultMatcher rm) { + List res = rm.getRequestResults(); + if(!phrase.getUnknownSearchWords().isEmpty()) { + boolean resort = false; + int presize = res.size(); + for(int i = 0; i < presize || i < LIMIT_JUSTIFY_RESULTS; ) { + SearchResult st = res.get(i); + // st.foundWordCount could be used + SearchPhrase pp = phrase.selectWord(st, + phrase.getUnknownSearchWords(), phrase.isLastUnknownSearchWordComplete()); + + SearchResultMatcher srm = new SearchResultMatcher(null, rm.request, + rm.requestNumber, totalLimit); + srm.setParentSearchResult(st); + searchInBackground(pp, srm); + if(srm.getRequestResults().size() > 0) { + rm.getRequestResults().remove(i); + rm.getRequestResults().addAll(srm.getRequestResults()); + resort = true; + } else { + i++; + } + } + if(resort) { + sortSearchResults(phrase, rm.getRequestResults()); + filterSearchDuplicateResults(phrase, rm.getRequestResults()); + } + } + } + private void searchInBackground(final SearchPhrase phrase, SearchResultMatcher matcher) { for (SearchWord sw : phrase.getWords()) { if(sw.getResult() != null && sw.getResult().file != null) { @@ -270,6 +303,9 @@ public class SearchUICore { @Override public int compare(SearchResult o1, SearchResult o2) { + if(o1.foundWordCount != o2.foundWordCount) { + return -Algorithms.compare(o1.foundWordCount, o2.foundWordCount); + } double s1 = o1.getSearchDistance(loc); double s2 = o2.getSearchDistance(loc); int cmp = Double.compare(s1, s2); @@ -291,6 +327,7 @@ public class SearchUICore { private final ResultMatcher matcher; private final int request; private final int totalLimit; + private SearchResult parentSearchResult; private final AtomicInteger requestNumber; int count = 0; @@ -303,6 +340,10 @@ public class SearchUICore { this.totalLimit = totalLimit; } + public void setParentSearchResult(SearchResult parentSearchResult) { + this.parentSearchResult = parentSearchResult; + } + public List getRequestResults() { return requestResults; } @@ -312,6 +353,7 @@ public class SearchUICore { SearchResult sr = new SearchResult(phrase); sr.objectType = ObjectType.SEARCH_API_FINISHED; sr.object = api; + sr.parentSearchResult = parentSearchResult; matcher.publish(sr); } } @@ -321,6 +363,7 @@ public class SearchUICore { SearchResult sr = new SearchResult(phrase); sr.objectType = ObjectType.SEARCH_API_REGION_FINISHED; sr.object = api; + sr.parentSearchResult = parentSearchResult; sr.file = region; matcher.publish(sr); } @@ -330,6 +373,7 @@ public class SearchUICore { public boolean publish(SearchResult object) { if(matcher == null || matcher.publish(object)) { count++; + object.parentSearchResult = parentSearchResult; if(totalLimit == -1 || count < totalLimit) { requestResults.add(object); } diff --git a/OsmAnd-java/src/net/osmand/search/core/SearchCoreFactory.java b/OsmAnd-java/src/net/osmand/search/core/SearchCoreFactory.java index 0a9a1d18e5..b50de7a10a 100644 --- a/OsmAnd-java/src/net/osmand/search/core/SearchCoreFactory.java +++ b/OsmAnd-java/src/net/osmand/search/core/SearchCoreFactory.java @@ -68,6 +68,7 @@ public class SearchCoreFactory { public static final int SEARCH_ADDRESS_BY_NAME_API_PRIORITY_RADIUS2 = 5; public static final int SEARCH_ADDRESS_BY_NAME_PRIORITY = 5; public static final int SEARCH_ADDRESS_BY_NAME_PRIORITY_RADIUS2 = 5; + // context less (slower) public static final int SEARCH_AMENITY_BY_NAME_PRIORITY = 7; public static final int SEARCH_AMENITY_BY_NAME_API_PRIORITY_IF_POI_TYPE = 7; @@ -197,6 +198,7 @@ public class SearchCoreFactory { sr.priorityDistance = 0.1; sr.objectType = ObjectType.CITY; if(nm.matches(sr.localeName) || nm.matches(sr.otherNames)) { + sr.foundWordCount += phrase.countUnknownWordsMatch(sr); resultMatcher.publish(sr); } if(limit++ > LIMIT * phrase.getRadiusLevel()) { @@ -233,6 +235,7 @@ public class SearchCoreFactory { sr.otherNames = object.getAllNames(true); sr.localeRelatedObjectName = sr.file.getRegionName(); sr.relatedObject = sr.file; + sr.foundWordCount += phrase.countUnknownWordsMatch(sr); sr.location = object.getLocation(); sr.priorityDistance = 1; sr.priority = priority; @@ -378,6 +381,7 @@ public class SearchCoreFactory { sr.priorityDistance = 1; } sr.priority = SEARCH_AMENITY_BY_NAME_PRIORITY; + sr.foundWordCount += phrase.countUnknownWordsMatch(sr); sr.objectType = ObjectType.POI; diff --git a/OsmAnd-java/src/net/osmand/search/core/SearchPhrase.java b/OsmAnd-java/src/net/osmand/search/core/SearchPhrase.java index 8f522b51a8..530c8b42fd 100644 --- a/OsmAnd-java/src/net/osmand/search/core/SearchPhrase.java +++ b/OsmAnd-java/src/net/osmand/search/core/SearchPhrase.java @@ -23,6 +23,7 @@ public class SearchPhrase { private List words = new ArrayList<>(); private List unknownWords = new ArrayList<>(); + private List unknownWordsMatcher = new ArrayList<>(); private String unknownSearchWordTrim; private String unknownSearchPhrase = ""; @@ -31,8 +32,8 @@ public class SearchPhrase { private List indexes; private QuadRect cache1kmRect; - private boolean unknownSearchWordComplete; - private static final String DELIMITER = ","; + private boolean lastUnknownSearchWordComplete; + private static final String DELIMITER = " "; private static final String ALLDELIMITERS = "\\s|,"; private static final Pattern reg = Pattern.compile(ALLDELIMITERS); @@ -67,6 +68,7 @@ public class SearchPhrase { } sp.unknownSearchPhrase = restText; sp.unknownWords.clear(); + sp.unknownWordsMatcher.clear(); if (!reg.matcher(restText).find()) { sp.unknownSearchWordTrim = sp.unknownSearchPhrase.trim(); @@ -84,10 +86,10 @@ public class SearchPhrase { } } } - sp.unknownSearchWordComplete = sp.unknownWords.size() > 0; - if (text.length() > 0 && !sp.unknownSearchWordComplete) { + sp.lastUnknownSearchWordComplete = false; + if (text.length() > 0 ) { char ch = text.charAt(text.length() - 1); - sp.unknownSearchWordComplete = ch == ' ' || ch == ',' || ch == '\r' || ch == '\n' + sp.lastUnknownSearchWordComplete = ch == ' ' || ch == ',' || ch == '\r' || ch == '\n' || ch == ';'; } @@ -101,7 +103,11 @@ public class SearchPhrase { public boolean isUnknownSearchWordComplete() { - return unknownSearchWordComplete; + return lastUnknownSearchWordComplete || unknownWords.size() > 0; + } + + public boolean isLastUnknownSearchWordComplete() { + return lastUnknownSearchWordComplete; } @@ -234,27 +240,36 @@ public class SearchPhrase { } public SearchPhrase selectWord(SearchResult res) { + return selectWord(res, null, false); + } + + public SearchPhrase selectWord(SearchResult res, List unknownWords, boolean lastComplete) { SearchPhrase sp = new SearchPhrase(this.settings); - sp.words.addAll(this.words); - SearchWord sw = new SearchWord(res.wordsSpan != null ? res.wordsSpan : res.localeName.trim(), res); - sp.words.add(sw); + sp.words.addAll(this.words); + SearchResult prnt = res.parentSearchResult; + while(prnt != null) { + addResult(prnt, sp); + prnt = prnt.parentSearchResult; + } + addResult(res, sp); + if(unknownWords != null) { + sp.lastUnknownSearchWordComplete = lastComplete; + for(int i = 0; i < unknownWords.size(); i++) { + if(i== 0) { + sp.unknownSearchWordTrim = unknownWords.get(0); + } else { + sp.unknownWords.add(unknownWords.get(i)); + } + } + } return sp; } - - - - public List excludefilterWords() { - List w = new ArrayList<>(); - for(SearchWord s : words) { - if(s.getResult() == null) { - w.add(s); - } - } - return w; + + private void addResult(SearchResult res, SearchPhrase sp) { + SearchWord sw = new SearchWord(res.wordsSpan != null ? res.wordsSpan : res.localeName.trim(), res); + sp.words.add(sw); } - - public boolean isLastWord(ObjectType... p) { for (int i = words.size() - 1; i >= 0; i--) { SearchWord sw = words.get(i); @@ -275,16 +290,12 @@ public class SearchPhrase { return sm; } sm = new NameStringMatcher(unknownSearchWordTrim, - (unknownSearchWordComplete ? + (lastUnknownSearchWordComplete ? StringMatcherMode.CHECK_EQUALS_FROM_SPACE : StringMatcherMode.CHECK_STARTS_FROM_SPACE)); return sm; } - public boolean hasSameConstantWords(SearchPhrase p) { - return excludefilterWords().equals(p.excludefilterWords()); - } - public boolean hasObjectType(ObjectType p) { for(SearchWord s : words) { if(s.getType() == p) { @@ -425,8 +436,26 @@ public class SearchPhrase { } } - + public int countUnknownWordsMatch(SearchResult sr) { + int cnt = 0; + if(unknownWords.size() > 0) { + for(int i = 0; i < unknownWords.size(); i++) { + if(unknownWordsMatcher.size() == i) { + unknownWordsMatcher.add(new NameStringMatcher(unknownWords.get(i), + i < unknownWords.size() - 1 ? StringMatcherMode.CHECK_EQUALS_FROM_SPACE : + StringMatcherMode.CHECK_STARTS_FROM_SPACE)); + } + NameStringMatcher ms = unknownWordsMatcher.get(i); + if(ms.matches(sr.localeName) || ms.matches(sr.otherNames)) { + cnt++; + } + } + } + return cnt; + } public int getRadiusSearch(int meters) { return (1 << (getRadiusLevel() - 1)) * meters; } + + } diff --git a/OsmAnd-java/src/net/osmand/search/core/SearchResult.java b/OsmAnd-java/src/net/osmand/search/core/SearchResult.java index 191d49937c..4b51ea42b9 100644 --- a/OsmAnd-java/src/net/osmand/search/core/SearchResult.java +++ b/OsmAnd-java/src/net/osmand/search/core/SearchResult.java @@ -14,8 +14,11 @@ public class SearchResult { public ObjectType objectType; public BinaryMapIndexReader file; + public int foundWordCount = 1; public double priority; public double priorityDistance; + public String wordsSpan ; + public SearchResult parentSearchResult; public SearchResult(SearchPhrase sp) { @@ -40,8 +43,8 @@ public class SearchResult { public Object relatedObject; public double distRelatedObjectName; - public String wordsSpan ; - public SearchResult preciseSearchResult; + +