Improve full-text search

This commit is contained in:
Victor Shcherb 2016-07-24 19:05:51 +02:00
parent 7a9718078a
commit 8d3e79e44e
4 changed files with 113 additions and 33 deletions

View file

@ -35,6 +35,7 @@ import org.apache.commons.logging.Log;
public class SearchUICore { public class SearchUICore {
private static final int TIMEOUT_BETWEEN_CHARS = 200; private static final int TIMEOUT_BETWEEN_CHARS = 200;
private static final int LIMIT_JUSTIFY_RESULTS = 5;
private static final Log LOG = PlatformUtil.getLog(SearchUICore.class); private static final Log LOG = PlatformUtil.getLog(SearchUICore.class);
private SearchPhrase phrase; private SearchPhrase phrase;
private SearchResultCollection currentSearchResult = new SearchResultCollection(); private SearchResultCollection currentSearchResult = new SearchResultCollection();
@ -163,16 +164,18 @@ public class SearchUICore {
public void run() { public void run() {
try { try {
SearchResultMatcher rm = new SearchResultMatcher(matcher, request, requestNumber, totalLimit); SearchResultMatcher rm = new SearchResultMatcher(matcher, request, requestNumber, totalLimit);
if(rm.isCancelled()) {
return;
}
if(TIMEOUT_BETWEEN_CHARS > 0) { if(TIMEOUT_BETWEEN_CHARS > 0) {
Thread.sleep(TIMEOUT_BETWEEN_CHARS); Thread.sleep(TIMEOUT_BETWEEN_CHARS);
} }
if(rm.isCancelled()) {
return;
}
searchInBackground(phrase, rm); searchInBackground(phrase, rm);
if (!rm.isCancelled()) { if (!rm.isCancelled()) {
sortSearchResults(phrase, rm.getRequestResults()); sortSearchResults(phrase, rm.getRequestResults());
filterSearchDuplicateResults(phrase, rm.getRequestResults()); filterSearchDuplicateResults(phrase, rm.getRequestResults());
justifySearchResults(phrase, rm);
LOG.info(">> Search phrase " + phrase + " " + rm.getRequestResults().size()); LOG.info(">> Search phrase " + phrase + " " + rm.getRequestResults().size());
SearchResultCollection collection = new SearchResultCollection(rm.getRequestResults(), SearchResultCollection collection = new SearchResultCollection(rm.getRequestResults(),
phrase); phrase);
@ -192,6 +195,36 @@ public class SearchUICore {
return quickRes; return quickRes;
} }
protected void justifySearchResults(SearchPhrase phrase, SearchResultMatcher rm) {
List<SearchResult> res = rm.getRequestResults();
if(!phrase.getUnknownSearchWords().isEmpty()) {
boolean resort = false;
int presize = res.size();
for(int i = 0; i < presize || i < LIMIT_JUSTIFY_RESULTS; ) {
SearchResult st = res.get(i);
// st.foundWordCount could be used
SearchPhrase pp = phrase.selectWord(st,
phrase.getUnknownSearchWords(), phrase.isLastUnknownSearchWordComplete());
SearchResultMatcher srm = new SearchResultMatcher(null, rm.request,
rm.requestNumber, totalLimit);
srm.setParentSearchResult(st);
searchInBackground(pp, srm);
if(srm.getRequestResults().size() > 0) {
rm.getRequestResults().remove(i);
rm.getRequestResults().addAll(srm.getRequestResults());
resort = true;
} else {
i++;
}
}
if(resort) {
sortSearchResults(phrase, rm.getRequestResults());
filterSearchDuplicateResults(phrase, rm.getRequestResults());
}
}
}
private void searchInBackground(final SearchPhrase phrase, SearchResultMatcher matcher) { private void searchInBackground(final SearchPhrase phrase, SearchResultMatcher matcher) {
for (SearchWord sw : phrase.getWords()) { for (SearchWord sw : phrase.getWords()) {
if(sw.getResult() != null && sw.getResult().file != null) { if(sw.getResult() != null && sw.getResult().file != null) {
@ -270,6 +303,9 @@ public class SearchUICore {
@Override @Override
public int compare(SearchResult o1, SearchResult o2) { public int compare(SearchResult o1, SearchResult o2) {
if(o1.foundWordCount != o2.foundWordCount) {
return -Algorithms.compare(o1.foundWordCount, o2.foundWordCount);
}
double s1 = o1.getSearchDistance(loc); double s1 = o1.getSearchDistance(loc);
double s2 = o2.getSearchDistance(loc); double s2 = o2.getSearchDistance(loc);
int cmp = Double.compare(s1, s2); int cmp = Double.compare(s1, s2);
@ -291,6 +327,7 @@ public class SearchUICore {
private final ResultMatcher<SearchResult> matcher; private final ResultMatcher<SearchResult> matcher;
private final int request; private final int request;
private final int totalLimit; private final int totalLimit;
private SearchResult parentSearchResult;
private final AtomicInteger requestNumber; private final AtomicInteger requestNumber;
int count = 0; int count = 0;
@ -303,6 +340,10 @@ public class SearchUICore {
this.totalLimit = totalLimit; this.totalLimit = totalLimit;
} }
public void setParentSearchResult(SearchResult parentSearchResult) {
this.parentSearchResult = parentSearchResult;
}
public List<SearchResult> getRequestResults() { public List<SearchResult> getRequestResults() {
return requestResults; return requestResults;
} }
@ -312,6 +353,7 @@ public class SearchUICore {
SearchResult sr = new SearchResult(phrase); SearchResult sr = new SearchResult(phrase);
sr.objectType = ObjectType.SEARCH_API_FINISHED; sr.objectType = ObjectType.SEARCH_API_FINISHED;
sr.object = api; sr.object = api;
sr.parentSearchResult = parentSearchResult;
matcher.publish(sr); matcher.publish(sr);
} }
} }
@ -321,6 +363,7 @@ public class SearchUICore {
SearchResult sr = new SearchResult(phrase); SearchResult sr = new SearchResult(phrase);
sr.objectType = ObjectType.SEARCH_API_REGION_FINISHED; sr.objectType = ObjectType.SEARCH_API_REGION_FINISHED;
sr.object = api; sr.object = api;
sr.parentSearchResult = parentSearchResult;
sr.file = region; sr.file = region;
matcher.publish(sr); matcher.publish(sr);
} }
@ -330,6 +373,7 @@ public class SearchUICore {
public boolean publish(SearchResult object) { public boolean publish(SearchResult object) {
if(matcher == null || matcher.publish(object)) { if(matcher == null || matcher.publish(object)) {
count++; count++;
object.parentSearchResult = parentSearchResult;
if(totalLimit == -1 || count < totalLimit) { if(totalLimit == -1 || count < totalLimit) {
requestResults.add(object); requestResults.add(object);
} }

View file

@ -68,6 +68,7 @@ public class SearchCoreFactory {
public static final int SEARCH_ADDRESS_BY_NAME_API_PRIORITY_RADIUS2 = 5; public static final int SEARCH_ADDRESS_BY_NAME_API_PRIORITY_RADIUS2 = 5;
public static final int SEARCH_ADDRESS_BY_NAME_PRIORITY = 5; public static final int SEARCH_ADDRESS_BY_NAME_PRIORITY = 5;
public static final int SEARCH_ADDRESS_BY_NAME_PRIORITY_RADIUS2 = 5; public static final int SEARCH_ADDRESS_BY_NAME_PRIORITY_RADIUS2 = 5;
// context less (slower) // context less (slower)
public static final int SEARCH_AMENITY_BY_NAME_PRIORITY = 7; public static final int SEARCH_AMENITY_BY_NAME_PRIORITY = 7;
public static final int SEARCH_AMENITY_BY_NAME_API_PRIORITY_IF_POI_TYPE = 7; public static final int SEARCH_AMENITY_BY_NAME_API_PRIORITY_IF_POI_TYPE = 7;
@ -197,6 +198,7 @@ public class SearchCoreFactory {
sr.priorityDistance = 0.1; sr.priorityDistance = 0.1;
sr.objectType = ObjectType.CITY; sr.objectType = ObjectType.CITY;
if(nm.matches(sr.localeName) || nm.matches(sr.otherNames)) { if(nm.matches(sr.localeName) || nm.matches(sr.otherNames)) {
sr.foundWordCount += phrase.countUnknownWordsMatch(sr);
resultMatcher.publish(sr); resultMatcher.publish(sr);
} }
if(limit++ > LIMIT * phrase.getRadiusLevel()) { if(limit++ > LIMIT * phrase.getRadiusLevel()) {
@ -233,6 +235,7 @@ public class SearchCoreFactory {
sr.otherNames = object.getAllNames(true); sr.otherNames = object.getAllNames(true);
sr.localeRelatedObjectName = sr.file.getRegionName(); sr.localeRelatedObjectName = sr.file.getRegionName();
sr.relatedObject = sr.file; sr.relatedObject = sr.file;
sr.foundWordCount += phrase.countUnknownWordsMatch(sr);
sr.location = object.getLocation(); sr.location = object.getLocation();
sr.priorityDistance = 1; sr.priorityDistance = 1;
sr.priority = priority; sr.priority = priority;
@ -378,6 +381,7 @@ public class SearchCoreFactory {
sr.priorityDistance = 1; sr.priorityDistance = 1;
} }
sr.priority = SEARCH_AMENITY_BY_NAME_PRIORITY; sr.priority = SEARCH_AMENITY_BY_NAME_PRIORITY;
sr.foundWordCount += phrase.countUnknownWordsMatch(sr);
sr.objectType = ObjectType.POI; sr.objectType = ObjectType.POI;

View file

@ -23,6 +23,7 @@ public class SearchPhrase {
private List<SearchWord> words = new ArrayList<>(); private List<SearchWord> words = new ArrayList<>();
private List<String> unknownWords = new ArrayList<>(); private List<String> unknownWords = new ArrayList<>();
private List<NameStringMatcher> unknownWordsMatcher = new ArrayList<>();
private String unknownSearchWordTrim; private String unknownSearchWordTrim;
private String unknownSearchPhrase = ""; private String unknownSearchPhrase = "";
@ -31,8 +32,8 @@ public class SearchPhrase {
private List<BinaryMapIndexReader> indexes; private List<BinaryMapIndexReader> indexes;
private QuadRect cache1kmRect; private QuadRect cache1kmRect;
private boolean unknownSearchWordComplete; private boolean lastUnknownSearchWordComplete;
private static final String DELIMITER = ","; private static final String DELIMITER = " ";
private static final String ALLDELIMITERS = "\\s|,"; private static final String ALLDELIMITERS = "\\s|,";
private static final Pattern reg = Pattern.compile(ALLDELIMITERS); private static final Pattern reg = Pattern.compile(ALLDELIMITERS);
@ -67,6 +68,7 @@ public class SearchPhrase {
} }
sp.unknownSearchPhrase = restText; sp.unknownSearchPhrase = restText;
sp.unknownWords.clear(); sp.unknownWords.clear();
sp.unknownWordsMatcher.clear();
if (!reg.matcher(restText).find()) { if (!reg.matcher(restText).find()) {
sp.unknownSearchWordTrim = sp.unknownSearchPhrase.trim(); sp.unknownSearchWordTrim = sp.unknownSearchPhrase.trim();
@ -84,10 +86,10 @@ public class SearchPhrase {
} }
} }
} }
sp.unknownSearchWordComplete = sp.unknownWords.size() > 0; sp.lastUnknownSearchWordComplete = false;
if (text.length() > 0 && !sp.unknownSearchWordComplete) { if (text.length() > 0 ) {
char ch = text.charAt(text.length() - 1); char ch = text.charAt(text.length() - 1);
sp.unknownSearchWordComplete = ch == ' ' || ch == ',' || ch == '\r' || ch == '\n' sp.lastUnknownSearchWordComplete = ch == ' ' || ch == ',' || ch == '\r' || ch == '\n'
|| ch == ';'; || ch == ';';
} }
@ -101,7 +103,11 @@ public class SearchPhrase {
public boolean isUnknownSearchWordComplete() { public boolean isUnknownSearchWordComplete() {
return unknownSearchWordComplete; return lastUnknownSearchWordComplete || unknownWords.size() > 0;
}
public boolean isLastUnknownSearchWordComplete() {
return lastUnknownSearchWordComplete;
} }
@ -234,26 +240,35 @@ public class SearchPhrase {
} }
public SearchPhrase selectWord(SearchResult res) { public SearchPhrase selectWord(SearchResult res) {
return selectWord(res, null, false);
}
public SearchPhrase selectWord(SearchResult res, List<String> unknownWords, boolean lastComplete) {
SearchPhrase sp = new SearchPhrase(this.settings); SearchPhrase sp = new SearchPhrase(this.settings);
sp.words.addAll(this.words); sp.words.addAll(this.words);
SearchWord sw = new SearchWord(res.wordsSpan != null ? res.wordsSpan : res.localeName.trim(), res); SearchResult prnt = res.parentSearchResult;
sp.words.add(sw); while(prnt != null) {
addResult(prnt, sp);
prnt = prnt.parentSearchResult;
}
addResult(res, sp);
if(unknownWords != null) {
sp.lastUnknownSearchWordComplete = lastComplete;
for(int i = 0; i < unknownWords.size(); i++) {
if(i== 0) {
sp.unknownSearchWordTrim = unknownWords.get(0);
} else {
sp.unknownWords.add(unknownWords.get(i));
}
}
}
return sp; return sp;
} }
private void addResult(SearchResult res, SearchPhrase sp) {
SearchWord sw = new SearchWord(res.wordsSpan != null ? res.wordsSpan : res.localeName.trim(), res);
public List<SearchWord> excludefilterWords() { sp.words.add(sw);
List<SearchWord> w = new ArrayList<>();
for(SearchWord s : words) {
if(s.getResult() == null) {
w.add(s);
} }
}
return w;
}
public boolean isLastWord(ObjectType... p) { public boolean isLastWord(ObjectType... p) {
for (int i = words.size() - 1; i >= 0; i--) { for (int i = words.size() - 1; i >= 0; i--) {
@ -275,16 +290,12 @@ public class SearchPhrase {
return sm; return sm;
} }
sm = new NameStringMatcher(unknownSearchWordTrim, sm = new NameStringMatcher(unknownSearchWordTrim,
(unknownSearchWordComplete ? (lastUnknownSearchWordComplete ?
StringMatcherMode.CHECK_EQUALS_FROM_SPACE : StringMatcherMode.CHECK_EQUALS_FROM_SPACE :
StringMatcherMode.CHECK_STARTS_FROM_SPACE)); StringMatcherMode.CHECK_STARTS_FROM_SPACE));
return sm; return sm;
} }
public boolean hasSameConstantWords(SearchPhrase p) {
return excludefilterWords().equals(p.excludefilterWords());
}
public boolean hasObjectType(ObjectType p) { public boolean hasObjectType(ObjectType p) {
for(SearchWord s : words) { for(SearchWord s : words) {
if(s.getType() == p) { if(s.getType() == p) {
@ -425,8 +436,26 @@ public class SearchPhrase {
} }
} }
public int countUnknownWordsMatch(SearchResult sr) {
int cnt = 0;
if(unknownWords.size() > 0) {
for(int i = 0; i < unknownWords.size(); i++) {
if(unknownWordsMatcher.size() == i) {
unknownWordsMatcher.add(new NameStringMatcher(unknownWords.get(i),
i < unknownWords.size() - 1 ? StringMatcherMode.CHECK_EQUALS_FROM_SPACE :
StringMatcherMode.CHECK_STARTS_FROM_SPACE));
}
NameStringMatcher ms = unknownWordsMatcher.get(i);
if(ms.matches(sr.localeName) || ms.matches(sr.otherNames)) {
cnt++;
}
}
}
return cnt;
}
public int getRadiusSearch(int meters) { public int getRadiusSearch(int meters) {
return (1 << (getRadiusLevel() - 1)) * meters; return (1 << (getRadiusLevel() - 1)) * meters;
} }
} }

View file

@ -14,8 +14,11 @@ public class SearchResult {
public ObjectType objectType; public ObjectType objectType;
public BinaryMapIndexReader file; public BinaryMapIndexReader file;
public int foundWordCount = 1;
public double priority; public double priority;
public double priorityDistance; public double priorityDistance;
public String wordsSpan ;
public SearchResult parentSearchResult;
public SearchResult(SearchPhrase sp) { public SearchResult(SearchPhrase sp) {
@ -40,8 +43,8 @@ public class SearchResult {
public Object relatedObject; public Object relatedObject;
public double distRelatedObjectName; public double distRelatedObjectName;
public String wordsSpan ;
public SearchResult preciseSearchResult;