POI Name index

This commit is contained in:
Victor Shcherb 2011-09-27 01:22:07 +02:00
parent 068a15d7cc
commit 54c3487c48
6 changed files with 1848 additions and 648 deletions

View file

@ -42,6 +42,7 @@ import com.google.protobuf.WireFormat;
public class BinaryMapIndexReader {
public final static int TRANSPORT_STOP_ZOOM = 24;
protected static final int SHIFT_COORDINATES = 5;
private final static Log log = LogUtil.getLog(BinaryMapIndexReader.class);
private final RandomAccessFile raf;
@ -807,7 +808,7 @@ public class BinaryMapIndexReader {
}
}
private int MASK_TO_READ = ~((1 << BinaryMapIndexWriter.SHIFT_COORDINATES) - 1);
private int MASK_TO_READ = ~((1 << SHIFT_COORDINATES) - 1);
private BinaryMapDataObject readMapDataObject(int left, int right, int top, int bottom, SearchRequest<BinaryMapDataObject> req,
MapIndex root) throws IOException {
int tag = WireFormat.getTagFieldNumber(codedIS.readTag());
@ -826,8 +827,8 @@ public class BinaryMapIndexReader {
int maxY = 0;
req.numberOfVisitedObjects++;
while(codedIS.getBytesUntilLimit() > 0){
int x = (codedIS.readSInt32() << BinaryMapIndexWriter.SHIFT_COORDINATES) + px;
int y = (codedIS.readSInt32() << BinaryMapIndexWriter.SHIFT_COORDINATES) + py;
int x = (codedIS.readSInt32() << SHIFT_COORDINATES) + px;
int y = (codedIS.readSInt32() << SHIFT_COORDINATES) + py;
req.cacheCoordinates.add(x);
req.cacheCoordinates.add(y);
px = x;

View file

@ -1,6 +1,8 @@
package net.osmand.binary;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.list.array.TLongArrayList;
import java.io.IOException;
import java.io.OutputStream;
@ -18,6 +20,8 @@ import net.osmand.binary.OsmandOdb.CityIndex;
import net.osmand.binary.OsmandOdb.InteresectedStreets;
import net.osmand.binary.OsmandOdb.MapEncodingRule;
import net.osmand.binary.OsmandOdb.OsmAndPoiBoxDataAtom;
import net.osmand.binary.OsmandOdb.OsmAndPoiNameIndexData;
import net.osmand.binary.OsmandOdb.OsmAndPoiNameIndexDataAtom;
import net.osmand.binary.OsmandOdb.OsmAndTransportIndex;
import net.osmand.binary.OsmandOdb.PostcodeIndex;
import net.osmand.binary.OsmandOdb.StreetIndex;
@ -31,6 +35,7 @@ import net.osmand.data.IndexConstants;
import net.osmand.data.MapObject;
import net.osmand.data.Street;
import net.osmand.data.TransportStop;
import net.osmand.data.preparation.IndexPoiCreator.PoiTileBox;
import net.osmand.osm.LatLon;
import net.osmand.osm.MapUtils;
import net.osmand.osm.Node;
@ -38,6 +43,7 @@ import net.osmand.osm.MapRenderingTypes.MapRulType;
import net.sf.junidecode.Junidecode;
import com.google.protobuf.CodedOutputStream;
import com.google.protobuf.MessageLite;
import com.google.protobuf.WireFormat;
import com.google.protobuf.WireFormat.FieldType;
@ -45,7 +51,7 @@ public class BinaryMapIndexWriter {
private RandomAccessFile raf;
private CodedOutputStream codedOutStream;
protected static final int SHIFT_COORDINATES = 5;
protected static final int SHIFT_COORDINATES = BinaryMapIndexReader.SHIFT_COORDINATES;
private static class Bounds {
public Bounds(int leftX, int rightX, int topY, int bottomY) {
@ -806,27 +812,75 @@ public class BinaryMapIndexWriter {
codedOutStream.writeMessage(OsmandOdb.OsmAndPoiBox.CATEGORIES_FIELD_NUMBER, builder.build());
}
public void writePoiIncludedStrings(Set<String> includes) throws IOException {
checkPeekState(POI_BOX);
OsmandOdb.IndexedStringTable.Builder builder = OsmandOdb.IndexedStringTable.newBuilder();
for(String s : includes){
builder.addKey(s);
}
codedOutStream.writeMessage(OsmandOdb.OsmAndPoiBox.INCLUDENAMESLIST_FIELD_NUMBER, builder.build());
}
public void writePoiExcludedStrings(Set<String> parentNames, Set<String> includes) throws IOException {
checkPeekState(POI_BOX);
OsmandOdb.IndexedStringTable.Builder builder = OsmandOdb.IndexedStringTable.newBuilder();
for(String s : parentNames){
if(!includes.contains(s)){
builder.addKey(s);
public Map<PoiTileBox, TLongList> writePoiNameIndex(Map<String, List<PoiTileBox>> namesIndex, long fpPoiIndex) throws IOException {
checkPeekState(POI_INDEX_INIT);
codedOutStream.writeTag(OsmandOdb.OsmAndPoiIndex.NAMEINDEX_FIELD_NUMBER, WireFormat.WIRETYPE_FIXED32_LENGTH_DELIMITED);
preserveInt32Size();
Map<String, MessageLite> message = new LinkedHashMap<String, MessageLite>();
Map<String, Integer> indexedTable = new LinkedHashMap<String, Integer>();
Map<PoiTileBox, TLongList> fpToWriteSeeks = new LinkedHashMap<PoiTileBox, TLongList>();
int previousSize = 0;
for(Map.Entry<String, List<PoiTileBox>> e : namesIndex.entrySet()) {
OsmandOdb.OsmAndPoiNameIndexData.Builder builder = OsmandOdb.OsmAndPoiNameIndexData.newBuilder();
List<PoiTileBox> tileBoxes = e.getValue();
for(PoiTileBox box : tileBoxes) {
OsmandOdb.OsmAndPoiNameIndexDataAtom.Builder bs = OsmandOdb.OsmAndPoiNameIndexDataAtom.newBuilder();
bs.setX(box.getX());
bs.setY(box.getY());
bs.setZoom(box.getZoom());
bs.setShiftTo(0);
OsmAndPoiNameIndexDataAtom atom = bs.build();
builder.addAtoms(atom);
}
OsmAndPoiNameIndexData msg = builder.build();
message.put(e.getKey(), msg);
indexedTable.put(e.getKey(), previousSize);
previousSize += CodedOutputStream.computeMessageSize(OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER, msg);
int accumulateSize = 4;
for (int i = tileBoxes.size() - 1; i >= 0; i--) {
PoiTileBox box = tileBoxes.get(i);
if (!fpToWriteSeeks.containsKey(box)) {
fpToWriteSeeks.put(box, new TLongArrayList());
}
fpToWriteSeeks.get(box).add(previousSize - accumulateSize);
accumulateSize += CodedOutputStream.computeMessageSize(
OsmandOdb.OsmAndPoiNameIndexData.ATOMS_FIELD_NUMBER, msg.getAtoms(i));
}
}
writeIndexedTable(OsmandOdb.OsmAndPoiNameIndex.TABLE_FIELD_NUMBER, indexedTable);
codedOutStream.flush();
int diff = (int) (raf.getFilePointer() - fpPoiIndex);
for (TLongList es : fpToWriteSeeks.values()) {
for (int i = 0; i < es.size(); i++) {
es.set(i, es.get(i) + diff);
}
}
codedOutStream.writeMessage(OsmandOdb.OsmAndPoiBox.EXCLUDENAMESLIST_FIELD_NUMBER, builder.build());
for(Map.Entry<String, MessageLite> s : message.entrySet()) {
codedOutStream.writeMessage(OsmandOdb.OsmAndPoiNameIndex.DATA_FIELD_NUMBER, s.getValue());
}
writeInt32Size();
return fpToWriteSeeks;
}
private void writeIndexedTable(int tag, Map<String, Integer> indexedTable) throws IOException {
codedOutStream.writeTag(tag, WireFormat.WIRETYPE_FIXED32_LENGTH_DELIMITED);
preserveInt32Size();
for(Map.Entry<String, Integer> e : indexedTable.entrySet()){
codedOutStream.writeString(OsmandOdb.IndexedStringTable.KEY_FIELD_NUMBER, e.getKey());
codedOutStream.writeUInt32(OsmandOdb.IndexedStringTable.VAL_FIELD_NUMBER, e.getValue());
}
writeInt32Size();
}
public void writePoiDataAtom(long id, int x24shift, int y24shift, String nameEn, String name, TIntArrayList types, String openingHours,
String site, String phone) throws IOException {
checkPeekState(POI_DATA);
@ -860,14 +914,16 @@ public class BinaryMapIndexWriter {
}
public void startWritePoiData(int zoom, int x, int y, long fpPoiIndex, long fpPoiBox) throws IOException {
public void startWritePoiData(int zoom, int x, int y, long fpPoiIndex, TLongList fpPoiBox) throws IOException {
pushState(POI_DATA, POI_INDEX_INIT);
codedOutStream.writeTag(OsmandOdb.OsmAndPoiIndex.POIDATA_FIELD_NUMBER, WireFormat.WIRETYPE_FIXED32_LENGTH_DELIMITED);
long startPoiData = preserveInt32Size();
// write shift to that data
long filePointer = raf.getFilePointer();
raf.seek(fpPoiBox);
raf.writeInt((int) (startPoiData - fpPoiIndex - 4));
for (int i = 0; i < fpPoiBox.size(); i++) {
raf.seek(fpPoiBox.get(i));
raf.writeInt((int) (startPoiData - fpPoiIndex - 4));
}
raf.seek(filePointer);
codedOutStream.writeUInt32(OsmandOdb.OsmAndPoiBoxData.ZOOM_FIELD_NUMBER, zoom);

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,8 @@
package net.osmand.data.preparation;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.list.array.TLongArrayList;
import java.io.File;
import java.io.FileNotFoundException;
@ -18,9 +20,7 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import net.osmand.Algoritms;
import net.osmand.IProgress;
@ -49,8 +49,6 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
private static final int ZOOM_TO_SAVE_START = 6;
private static final int ZOOM_TO_WRITE_CATEGORIES_START = 12;
private static final int ZOOM_TO_WRITE_CATEGORIES_END = 16;
private static final int ZOOM_TO_WRITE_NAME_START = 9;
private static final int ZOOM_TO_WRITE_NAME_END = 11;
private boolean useInMemoryCreator = true;
@ -195,6 +193,8 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
Collator collator = Collator.getInstance();
collator.setStrength(Collator.PRIMARY);
Map<String, List<PoiTileBox>> namesIndex = new LinkedHashMap<String, List<PoiTileBox>>();
// 0. process all entities
ResultSet rs;
if(useInMemoryCreator) {
@ -203,8 +203,8 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
rs = poiConnection.createStatement().executeQuery("SELECT x,y,name,name_en,type,subtype from poi");
}
int zoomToStart = ZOOM_TO_SAVE_START;
Tree<PoiBox> rootZoomsTree = new Tree<PoiBox>();
rootZoomsTree.setNode(new PoiBox());
Tree<PoiTileBox> rootZoomsTree = new Tree<PoiTileBox>();
rootZoomsTree.setNode(new PoiTileBox());
int minX = Integer.MAX_VALUE;
int maxX = 0;
int minY = Integer.MAX_VALUE;
@ -227,22 +227,21 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
String type = rs.getString(5);
String subtype = rs.getString(6);
Tree<PoiBox> prevTree = rootZoomsTree;
Tree<PoiTileBox> prevTree = rootZoomsTree;
rootZoomsTree.getNode().addCategory(type, subtype);
rootZoomsTree.getNode().addNamePrefix(name, nameEn);
for (int i = zoomToStart; i <= ZOOM_TO_SAVE_END; i++) {
int xs = x >> (31 - i);
int ys = y >> (31 - i);
Tree<PoiBox> subtree = null;
for (Tree<PoiBox> sub : prevTree.getSubtrees()) {
Tree<PoiTileBox> subtree = null;
for (Tree<PoiTileBox> sub : prevTree.getSubtrees()) {
if (sub.getNode().x == xs && sub.getNode().y == ys && sub.getNode().zoom == i) {
subtree = sub;
break;
}
}
if (subtree == null) {
subtree = new Tree<PoiBox>();
PoiBox poiBox = new PoiBox();
subtree = new Tree<PoiTileBox>();
PoiTileBox poiBox = new PoiTileBox();
subtree.setNode(poiBox);
poiBox.x = xs;
poiBox.y = ys;
@ -251,10 +250,11 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
prevTree.addSubTree(subtree);
}
subtree.getNode().addCategory(type, subtype);
subtree.getNode().addNamePrefix(name, nameEn);
prevTree = subtree;
}
addNamePrefix(name, nameEn, prevTree.getNode(), namesIndex);
if (useInMemoryCreator) {
if (prevTree.getNode().poiData == null) {
prevTree.getNode().poiData = new ArrayList<PoiData>();
@ -287,6 +287,9 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
// 2. write categories table
Map<String, Map<String, Integer>> categories = rootZoomsTree.node.categories;
Map<String, Integer> catIndexes = writer.writePOICategoriesTable(categories);
// 2.5 write names table
Map<PoiTileBox, TLongList> fpToWriteSeeks = writer.writePoiNameIndex(namesIndex, startFpPoiIndex);
// 3. write boxes
log.info("Poi box processing finishied");
@ -304,9 +307,8 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
}
// 3.2 write tree using stack
Map<PoiBox, Long> fpToWriteSeeks = new LinkedHashMap<PoiBox, Long>();
for (Tree<PoiBox> subs : rootZoomsTree.getSubtrees()) {
writePoiBoxes(writer, subs, fpToWriteSeeks, categories, catIndexes, rootZoomsTree.getNode().startsName);
for (Tree<PoiTileBox> subs : rootZoomsTree.getSubtrees()) {
writePoiBoxes(writer, subs, fpToWriteSeeks, categories, catIndexes);
}
// 4. write poi data
@ -315,7 +317,7 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
.prepareStatement("SELECT id, x, y, name_en, name, type, subtype, opening_hours, site, phone from poi "
+ "where x >= ? AND x < ? AND y >= ? AND y < ?");
TIntArrayList types = new TIntArrayList();
for (Map.Entry<PoiBox, Long> entry : fpToWriteSeeks.entrySet()) {
for (Map.Entry<PoiTileBox, TLongList> entry : fpToWriteSeeks.entrySet()) {
int z = entry.getKey().zoom;
int x = entry.getKey().x;
int y = entry.getKey().y;
@ -372,10 +374,43 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
writer.endWritePOIIndex();
}
public void addNamePrefix(String name, String nameEn, PoiTileBox data, Map<String, List<PoiTileBox>> poiData) {
if(Algoritms.isEmpty(nameEn)){
nameEn = Junidecode.unidecode(name);
}
parsePrefix(name, data, poiData);
parsePrefix(nameEn, data, poiData);
}
private void writePoiBoxes(BinaryMapIndexWriter writer, Tree<PoiBox> tree, Map<PoiBox, Long> fpToWriteSeeks,
Map<String, Map<String, Integer>> categories, Map<String, Integer> catIndexes,
Set<String> parentNames) throws IOException, SQLException {
private void parsePrefix(String name, PoiTileBox data, Map<String, List<PoiTileBox>> poiData) {
int prev = -1;
for (int i = 0; i <= name.length(); i++) {
if (i == name.length() || (!Character.isLetter(name.charAt(i)) &&
!Character.isDigit(name.charAt(i)))) {
if (prev != -1) {
String substr = name.substring(prev, i);
if (substr.length() > 3) {
substr = substr.substring(0, 3);
}
String val = substr.toLowerCase();
if(!poiData.containsKey(val)){
poiData.put(val, new ArrayList<PoiTileBox>());
}
poiData.get(val).add(data);
prev = -1;
}
} else {
if(prev == -1){
prev = i;
}
}
}
}
private void writePoiBoxes(BinaryMapIndexWriter writer, Tree<PoiTileBox> tree, Map<PoiTileBox, TLongList> fpToWriteSeeks,
Map<String, Map<String, Integer>> categories, Map<String, Integer> catIndexes) throws IOException, SQLException {
int x = tree.getNode().x;
int y = tree.getNode().y;
int zoom = tree.getNode().zoom;
@ -391,25 +426,16 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
}
writer.writePOICategories(types);
}
if(zoom >= ZOOM_TO_WRITE_NAME_START && zoom <= ZOOM_TO_WRITE_NAME_END){
Set<String> names = tree.getNode().startsName;
if (names.size() > 0) {
if (zoom == ZOOM_TO_WRITE_NAME_START || parentNames.size() - names.size() > names.size()) {
System.out.println("I" + names.size() + " " + zoom);
writer.writePoiIncludedStrings(names);
} else {
System.out.println("E" + (parentNames.size() - names.size()) + " " + zoom);
writer.writePoiExcludedStrings(parentNames, names);
}
}
}
if (!end) {
for (Tree<PoiBox> subTree : tree.getSubtrees()) {
writePoiBoxes(writer, subTree, fpToWriteSeeks, categories, catIndexes, tree.getNode().startsName);
for (Tree<PoiTileBox> subTree : tree.getSubtrees()) {
writePoiBoxes(writer, subTree, fpToWriteSeeks, categories, catIndexes);
}
} else {
fpToWriteSeeks.put(tree.getNode(), fp);
if(!fpToWriteSeeks.containsKey(tree.getNode())) {
fpToWriteSeeks.put(tree.getNode(), new TLongArrayList());
}
fpToWriteSeeks.get(tree.getNode()).add(fp);
}
writer.endWritePoiBox();
}
@ -427,14 +453,24 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
String site;
}
private static class PoiBox {
public static class PoiTileBox {
int x;
int y;
int zoom;
Map<String, Map<String, Integer>> categories = new LinkedHashMap<String, Map<String, Integer>>();
Set<String> startsName = new TreeSet<String>();
List<PoiData> poiData = null;
public int getX() {
return x;
}
public int getY() {
return y;
}
public int getZoom() {
return zoom;
}
private void addCategory(String cat, String subCat){
if(!categories.containsKey(cat)){
categories.put(cat, new TreeMap<String, Integer>());
@ -450,35 +486,6 @@ public class IndexPoiCreator extends AbstractIndexPartCreator {
categories.get(cat).put(subCat, 0);
}
public void addNamePrefix(String name, String nameEn) {
if(Algoritms.isEmpty(nameEn)){
nameEn = Junidecode.unidecode(name);
}
parsePrefix(name);
parsePrefix(nameEn);
}
private void parsePrefix(String name) {
int prev = -1;
for (int i = 0; i <= name.length(); i++) {
if (i == name.length() || (!Character.isLetter(name.charAt(i)) &&
!Character.isDigit(name.charAt(i)))) {
if (prev != -1) {
String substr = name.substring(prev, i);
if (substr.length() > 3) {
substr = substr.substring(0, 3);
}
startsName.add(substr.toLowerCase());
prev = -1;
}
} else {
if(prev == -1){
prev = i;
}
}
}
}
}
private static class Tree<T> {

View file

@ -1,7 +1,6 @@
option java_package = "net.osmand.binary";
//protoc --java_out=../.. osmand_odb.proto
//
// STORAGE LAYER: Storing primitives.
//
@ -30,6 +29,20 @@ message StringTable {
repeated string s = 1;
}
message IndexedStringTable {
// common prefix for all strings inside
optional string prefix = 1;
// key, val and subtables are mixed and order is preserved by key (ascending)
// so that's example of data : key1,val1,subtables1,key2,val2,key3,val3,subtables3,subtables3
repeated string key = 3;
// the shift to the data or one uint data itself
repeated uint32 val = 4;
// subtables are supposed to make search faster instead of searching through all strings
// it's enought to read prefix in the header
repeated IndexedStringTable subtables = 5;
}
message OsmAndMapIndex {
// encoded as fixed32 length delimited
repeated MapRootLevel levels = 1;
@ -82,8 +95,6 @@ message MapTree {
}
/// Simple messages
message MapData {
required bytes coordinates = 1; // array of delta x,y sint32 (CodedIinputStream) >> 5
@ -275,6 +286,8 @@ message OsmAndPoiIndex {
repeated OsmAndCategoryTable categoriesTable = 3;
// leave space for other indexes
// encoded as fixed32 length delimited
optional OsmAndPoiNameIndex nameIndex = 4;
// encoded as fixed32 length delimited
repeated OsmAndPoiBox boxes = 6; // children
@ -284,19 +297,34 @@ message OsmAndPoiIndex {
}
message IndexedStringTable {
// common prefix for all strings inside
optional string prefix = 1;
message OsmAndPoiNameIndex {
// shift to data starting from first OsmAndPoiNameIndexData message
// encoded as fixed32 length delimited
required IndexedStringTable table = 2;
// key, val and subtables are mixed and order is preserved by key (ascending)
// so that's example of data : key1,val1,subtables1,key2,val2,key3,val3,subtables3,subtables3
repeated string key = 3;
// the shift to the data or one uint data itself
repeated uint32 val = 4;
// subtables are supposed to make search faster instead of searching through all strings
// it's enought to read prefix in the header
repeated IndexedStringTable subtables = 5;
}
repeated OsmAndPoiNameIndexData data = 5;
}
message OsmAndPoiNameIndexData {
// can contain multiple data offsets
repeated OsmAndPoiNameIndexDataAtom atoms = 4;
}
message OsmAndPoiNameIndexDataAtom {
// fields do not preserve order define, they are repeating
optional uint32 zoom = 2;
optional uint32 x = 3;
optional uint32 y = 4;
// Must be the last
// shift to OsmAndPoiBoxData message from OsmAndPoiIndex.start
// message is started when body is started
optional fixed32 shiftTo = 14;
}
message OsmAndCategoryTable {
required string category = 1;
@ -310,14 +338,11 @@ message OsmAndPoiBox {
required sint32 top = 3; // delta encoded (zoom)
optional OsmAndPoiCategories categories = 4;
optional IndexedStringTable includeNamesList = 5;
optional IndexedStringTable excludeNamesList = 6;
// encoded as fixed32 length delimited
repeated OsmAndPoiBox subBoxes = 10;
// must be the last
optional fixed32 shiftToData = 14; // shift to OsmAndPoiBoxData message from OsmAndPoiIndex.start
// message is started when body is started
}

View file

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry excluding="net/osmand/LogUtil.java|net/osmand/swing/|net/osmand/data/preparation/|net/osmand/osm/util/|net/osmand/data/index/|net/osmand/osm/io/OsmStorageWriter.java" kind="src" path="use"/>
<classpathentry excluding="net/osmand/LogUtil.java|net/osmand/swing/|net/osmand/data/preparation/|net/osmand/osm/util/|net/osmand/data/index/|net/osmand/osm/io/OsmStorageWriter.java|net/osmand/binary/BinaryMapIndexWriter.java" kind="src" path="use"/>
<classpathentry kind="src" path="gen"/>
<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
<classpathentry kind="lib" path="lib/bzip2-20090327.jar"/>