implement index creator

git-svn-id: https://osmand.googlecode.com/svn/trunk@451 e29c36b1-1cfa-d876-8d93-3434fc2bb7b8
This commit is contained in:
Victor Shcherb 2010-08-13 23:00:37 +00:00
parent 899156e6cd
commit b0a5076613

View file

@ -16,6 +16,7 @@ import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -72,6 +73,7 @@ public class IndexCreator {
public static final String TEMP_NODES_DB = "nodes"+IndexConstants.MAP_INDEX_EXT; public static final String TEMP_NODES_DB = "nodes"+IndexConstants.MAP_INDEX_EXT;
public static final int STEP_MAIN = 1; public static final int STEP_MAIN = 1;
public static final int STEP_ADDRESS_RELATIONS = 2; public static final int STEP_ADDRESS_RELATIONS = 2;
public static final int STEP_CITY_NODES = 3;
private File workingDir = null; private File workingDir = null;
@ -122,13 +124,16 @@ public class IndexCreator {
private PreparedStatement addressStreetNodeStat; private PreparedStatement addressStreetNodeStat;
// choose what to use ? // choose what to use ?
private boolean loadStreetsInMemory = true; private boolean loadInMemory = true;
private PreparedStatement addressSearchStreetStat; private PreparedStatement addressSearchStreetStat;
private Map<String, Long> addressStreetLocalMap = new LinkedHashMap<String, Long>();
private PreparedStatement addressSearchBuildingStat; private PreparedStatement addressSearchBuildingStat;
private PreparedStatement addressSearchStreetNodeStat; private PreparedStatement addressSearchStreetNodeStat;
private Map<String, Long> addressStreetLocalMap = new LinkedHashMap<String, Long>();
private Set<Long> addressBuildingLocalSet = new LinkedHashSet<Long>();
private Set<Long> addressStreetNodeLocalSet = new LinkedHashSet<Long>();
// address structure // address structure
// load it in memory // load it in memory
private Map<EntityId, City> cities = new LinkedHashMap<EntityId, City>(); private Map<EntityId, City> cities = new LinkedHashMap<EntityId, City>();
@ -237,14 +242,7 @@ public class IndexCreator {
@Override @Override
public boolean acceptEntityToLoad(OsmBaseStorage storage, EntityId entityId, Entity e) { public boolean acceptEntityToLoad(OsmBaseStorage storage, EntityId entityId, Entity e) {
// Register all city labelbs // Register all city labelbs
if (e instanceof Node && e.getTag(OSMTagKey.PLACE) != null) { registerCityIfNeeded(e);
City city = new City((Node) e);
if(city.getType() != null && !Algoritms.isEmpty(city.getName())){
convertEnglishName(city);
cityManager.registerObject(((Node) e).getLatitude(), ((Node) e).getLongitude(), city);
cities.put(city.getEntityId(), city);
}
}
// put all nodes into temporary db to get only required nodes after loading all data // put all nodes into temporary db to get only required nodes after loading all data
try { try {
if (e instanceof Node) { if (e instanceof Node) {
@ -413,6 +411,10 @@ public class IndexCreator {
this.transportFileName = transportFileName; this.transportFileName = transportFileName;
} }
public void setNodesDBFile(File file){
dbFile = file;
}
public void setMapFileName(String mapFileName) { public void setMapFileName(String mapFileName) {
this.mapFileName = mapFileName; this.mapFileName = mapFileName;
} }
@ -452,16 +454,17 @@ public class IndexCreator {
return addressFileName; return addressFileName;
} }
public void iterateOverAllEntities(IProgress progress, NewDataExtractionOsmFilter f, int step) throws SQLException{ public void iterateOverAllEntities(IProgress progress, int allNodes, int allWays, int allRelations, int step) throws SQLException{
iterateOverEntities(progress, EntityType.NODE, f.getAllNodes(), step); iterateOverEntities(progress, EntityType.NODE, allNodes, step);
iterateOverEntities(progress, EntityType.WAY, f.getAllWays(), step); iterateOverEntities(progress, EntityType.WAY, allWays, step);
iterateOverEntities(progress, EntityType.RELATION, f.getAllRelations(), step); iterateOverEntities(progress, EntityType.RELATION, allRelations, step);
} }
public void iterateOverEntities(IProgress progress, EntityType type, int allCount, int step) throws SQLException{ public int iterateOverEntities(IProgress progress, EntityType type, int allCount, int step) throws SQLException{
Statement statement = dbConn.createStatement(); Statement statement = dbConn.createStatement();
String select; String select;
String info; String info;
int count = 0;
if(type == EntityType.NODE){ if(type == EntityType.NODE){
select = "select * from node"; select = "select * from node";
@ -476,6 +479,7 @@ public class IndexCreator {
progress.startTask("Indexing "+info +"...", allCount); progress.startTask("Indexing "+info +"...", allCount);
ResultSet rs = statement.executeQuery(select); ResultSet rs = statement.executeQuery(select);
while(rs.next()){ while(rs.next()){
count++;
progress.progress(1); progress.progress(1);
Entity e; Entity e;
if(type == EntityType.NODE){ if(type == EntityType.NODE){
@ -489,6 +493,7 @@ public class IndexCreator {
iterateEntity(e, step); iterateEntity(e, step);
} }
rs.close(); rs.close();
return count;
} }
private void loadEntityTags(EntityType type, Entity e) throws SQLException { private void loadEntityTags(EntityType type, Entity e) throws SQLException {
@ -688,6 +693,9 @@ public class IndexCreator {
if (r.getKey() instanceof Way && saveAddressWays) { if (r.getKey() instanceof Way && saveAddressWays) {
DataIndexWriter.writeStreetWayNodes(addressStreetNodeStat, DataIndexWriter.writeStreetWayNodes(addressStreetNodeStat,
pStatements, streetId, (Way) r.getKey(), BATCH_SIZE); pStatements, streetId, (Way) r.getKey(), BATCH_SIZE);
if(loadInMemory){
addressStreetNodeLocalSet.add(r.getKey().getId());
}
} }
} else if ("house".equals(r.getValue())) { } else if ("house".equals(r.getValue())) {
// will be registered further in other case // will be registered further in other case
@ -699,6 +707,9 @@ public class IndexCreator {
convertEnglishName(building); convertEnglishName(building);
DataIndexWriter.writeBuilding(addressBuildingStat, pStatements, DataIndexWriter.writeBuilding(addressBuildingStat, pStatements,
streetId, building, BATCH_SIZE); streetId, building, BATCH_SIZE);
if(loadInMemory){
addressBuildingLocalSet.add(r.getKey().getId());
}
} }
} }
} }
@ -723,6 +734,9 @@ public class IndexCreator {
convertEnglishName(building); convertEnglishName(building);
DataIndexWriter.writeBuilding(addressBuildingStat, pStatements, DataIndexWriter.writeBuilding(addressBuildingStat, pStatements,
streetId, building, BATCH_SIZE); streetId, building, BATCH_SIZE);
if(loadInMemory){
addressBuildingLocalSet.add(id.getId());
}
} }
} }
} else { } else {
@ -835,7 +849,7 @@ public class IndexCreator {
Long foundId = null; Long foundId = null;
name = normalizeStreetName(name); name = normalizeStreetName(name);
if(loadStreetsInMemory){ if(loadInMemory){
foundId = addressStreetLocalMap.get(name+"_"+city.getId()); foundId = addressStreetLocalMap.get(name+"_"+city.getId());
} else { } else {
addressSearchStreetStat.setLong(1, city.getId()); addressSearchStreetStat.setLong(1, city.getId());
@ -855,7 +869,7 @@ public class IndexCreator {
addressStreetStat.setDouble(IndexStreetTable.LATITUDE.ordinal() + 1, location.getLatitude()); addressStreetStat.setDouble(IndexStreetTable.LATITUDE.ordinal() + 1, location.getLatitude());
addressStreetStat.setDouble(IndexStreetTable.LONGITUDE.ordinal() + 1, location.getLongitude()); addressStreetStat.setDouble(IndexStreetTable.LONGITUDE.ordinal() + 1, location.getLongitude());
addressStreetStat.setLong(IndexStreetTable.CITY.ordinal() + 1, city.getId()); addressStreetStat.setLong(IndexStreetTable.CITY.ordinal() + 1, city.getId());
if(loadStreetsInMemory){ if(loadInMemory){
DataIndexWriter.addBatch(pStatements, addressStreetStat, BATCH_SIZE); DataIndexWriter.addBatch(pStatements, addressStreetStat, BATCH_SIZE);
addressStreetLocalMap.put(name+"_"+city.getId(), initId); addressStreetLocalMap.put(name+"_"+city.getId(), initId);
} else { } else {
@ -895,10 +909,17 @@ public class IndexCreator {
// index not only buildings but also nodes that belongs to addr:interpolation ways // index not only buildings but also nodes that belongs to addr:interpolation ways
if (e.getTag(OSMTagKey.ADDR_HOUSE_NUMBER) != null && e.getTag(OSMTagKey.ADDR_STREET) != null) { if (e.getTag(OSMTagKey.ADDR_HOUSE_NUMBER) != null && e.getTag(OSMTagKey.ADDR_STREET) != null) {
// TODO e.getTag(OSMTagKey.ADDR_CITY) could be used to find city however many cities could have same name! // TODO e.getTag(OSMTagKey.ADDR_CITY) could be used to find city however many cities could have same name!
// check that building is not registered already // check that building is not registered already
addressSearchBuildingStat.setLong(1, e.getId()); boolean exist = false;
ResultSet rs = addressSearchBuildingStat.executeQuery(); if(loadInMemory){
if (!rs.next()) { addressSearchBuildingStat.setLong(1, e.getId());
ResultSet rs = addressSearchBuildingStat.executeQuery();
exist = rs.next();
rs.close();
} else {
exist = addressBuildingLocalSet.contains(e.getId());
}
if (!exist) {
loadEntityData(e, false); loadEntityData(e, false);
LatLon l = e.getLatLon(); LatLon l = e.getLatLon();
City city = getClosestCity(l); City city = getClosestCity(l);
@ -910,15 +931,21 @@ public class IndexCreator {
DataIndexWriter.writeBuilding(addressBuildingStat, pStatements, idStreet, building, BATCH_SIZE); DataIndexWriter.writeBuilding(addressBuildingStat, pStatements, idStreet, building, BATCH_SIZE);
} }
} }
rs.close();
} }
// suppose that streets with names are ways for car // suppose that streets with names are ways for car
if (e instanceof Way /* && OSMSettings.wayForCar(e.getTag(OSMTagKey.HIGHWAY)) */ if (e instanceof Way /* && OSMSettings.wayForCar(e.getTag(OSMTagKey.HIGHWAY)) */
&& e.getTag(OSMTagKey.HIGHWAY) != null && e.getTag(OSMTagKey.NAME) != null) { && e.getTag(OSMTagKey.HIGHWAY) != null && e.getTag(OSMTagKey.NAME) != null) {
boolean exist = false;
if(loadInMemory){
addressSearchStreetNodeStat.setLong(1, e.getId());
ResultSet rs = addressSearchStreetNodeStat.executeQuery();
exist = rs.next();
rs.close();
} else {
exist = addressStreetNodeLocalSet.contains(e.getId());
}
// check that building is not registered already // check that building is not registered already
addressSearchStreetNodeStat.setLong(1, e.getId()); if (!exist) {
ResultSet rs = addressSearchStreetNodeStat.executeQuery();
if (!rs.next()) {
loadEntityData(e, false); loadEntityData(e, false);
LatLon l = e.getLatLon(); LatLon l = e.getLatLon();
City city = getClosestCity(l); City city = getClosestCity(l);
@ -927,7 +954,6 @@ public class IndexCreator {
DataIndexWriter.writeStreetWayNodes(addressStreetNodeStat, pStatements, idStreet, (Way) e, BATCH_SIZE); DataIndexWriter.writeStreetWayNodes(addressStreetNodeStat, pStatements, idStreet, (Way) e, BATCH_SIZE);
} }
} }
rs.close();
} }
if (e instanceof Relation) { if (e instanceof Relation) {
if (e.getTag(OSMTagKey.POSTAL_CODE) != null) { if (e.getTag(OSMTagKey.POSTAL_CODE) != null) {
@ -940,9 +966,24 @@ public class IndexCreator {
if (e instanceof Relation && "address".equals(e.getTag(OSMTagKey.TYPE))) { if (e instanceof Relation && "address".equals(e.getTag(OSMTagKey.TYPE))) {
indexAddressRelation((Relation) e); indexAddressRelation((Relation) e);
} }
} else if (step == STEP_CITY_NODES) {
registerCityIfNeeded(e);
} }
} }
private void registerCityIfNeeded(Entity e) {
if (e instanceof Node && e.getTag(OSMTagKey.PLACE) != null) {
City city = new City((Node) e);
if(city.getType() != null && !Algoritms.isEmpty(city.getName())){
convertEnglishName(city);
cityManager.registerObject(((Node) e).getLatitude(), ((Node) e).getLongitude(), city);
cities.put(city.getEntityId(), city);
}
}
}
private void convertEnglishName(MapObject o){ private void convertEnglishName(MapObject o){
@ -952,81 +993,93 @@ public class IndexCreator {
} }
} }
public void generateIndexes(String path, IProgress progress, IOsmStorageFilter addFilter) throws IOException, SAXException, public void generateIndexes(File readFile, IProgress progress, IOsmStorageFilter addFilter) throws IOException, SAXException,
SQLException { SQLException {
File f = new File(path); if (readFile != null && regionName == null) {
InputStream stream = new FileInputStream(f); int i = readFile.getName().indexOf('.');
int i = f.getName().indexOf('.'); if(i > -1){
if (regionName == null) { regionName = Algoritms.capitalizeFirstLetterAndLowercase(readFile.getName().substring(0, i));
regionName = Algoritms.capitalizeFirstLetterAndLowercase(f.getName().substring(0, i));
}
InputStream streamFile = stream;
long st = System.currentTimeMillis();
if (path.endsWith(".bz2")) {
if (stream.read() != 'B' || stream.read() != 'Z') {
throw new RuntimeException("The source stream must start with the characters BZ if it is to be read as a BZip2 stream.");
} else {
stream = new CBZip2InputStream(stream);
} }
} }
if (progress != null) {
progress.startTask("Loading file " + path, -1);
}
// clear previous results
cities.clear();
cityManager.clear();
postalCodeRelations.clear();
OsmBaseStorage storage = new OsmBaseStorage();
storage.setSupressWarnings(DataExtractionSettings.getSettings().isSupressWarningsForDuplicatedId());
if (addFilter != null) {
storage.getFilters().add(addFilter);
}
try { try {
Class.forName("org.sqlite.JDBC"); Class.forName("org.sqlite.JDBC");
} catch (ClassNotFoundException e) { } catch (ClassNotFoundException e) {
log.error("Illegal configuration", e); log.error("Illegal configuration", e);
throw new IllegalStateException(e); throw new IllegalStateException(e);
} }
if (indexMap) { boolean loadFromPath = dbFile == null;
dbFile = new File(workingDir, getMapFileName()); if (dbFile == null) {
} else { if (indexMap) {
dbFile = new File(workingDir, TEMP_NODES_DB); dbFile = new File(workingDir, getMapFileName());
} } else {
// to save space dbFile = new File(workingDir, TEMP_NODES_DB);
if (dbFile.exists()) { }
dbFile.delete(); // to save space
if (dbFile.exists()) {
dbFile.delete();
}
} }
// creating nodes db to fast access for all nodes // creating nodes db to fast access for all nodes
dbConn = DriverManager.getConnection("jdbc:sqlite:" + dbFile.getAbsolutePath()); dbConn = DriverManager.getConnection("jdbc:sqlite:" + dbFile.getAbsolutePath());
// 1. Loading osm file // clear previous results
NewDataExtractionOsmFilter filter = new NewDataExtractionOsmFilter(); cities.clear();
try { cityManager.clear();
// 1 init database to store temporary data postalCodeRelations.clear();
progress.setGeneralProgress("[50 of 100]");
int allRelations = 100000;
filter.initDatabase(); int allWays = 1000000;
storage.getFilters().add(filter); int allNodes = 10000000;
storage.parseOSM(stream, progress, streamFile, false); if (loadFromPath) {
filter.finishLoading(); InputStream stream = new FileInputStream(readFile);
InputStream streamFile = stream;
if (log.isInfoEnabled()) { long st = System.currentTimeMillis();
log.info("File parsed : " + (System.currentTimeMillis() - st)); if (readFile.getName().endsWith(".bz2")) {
if (stream.read() != 'B' || stream.read() != 'Z') {
throw new RuntimeException("The source stream must start with the characters BZ if it is to be read as a BZip2 stream.");
} else {
stream = new CBZip2InputStream(stream);
}
} }
progress.finishTask();
} finally { if (progress != null) {
if (log.isInfoEnabled()) { progress.startTask("Loading file " + readFile.getAbsolutePath(), -1);
log.info("File indexed : " + (System.currentTimeMillis() - st)); }
OsmBaseStorage storage = new OsmBaseStorage();
storage.setSupressWarnings(DataExtractionSettings.getSettings().isSupressWarningsForDuplicatedId());
if (addFilter != null) {
storage.getFilters().add(addFilter);
}
// 1. Loading osm file
NewDataExtractionOsmFilter filter = new NewDataExtractionOsmFilter();
try {
// 1 init database to store temporary data
progress.setGeneralProgress("[50 of 100]");
filter.initDatabase();
storage.getFilters().add(filter);
storage.parseOSM(stream, progress, streamFile, false);
filter.finishLoading();
allNodes = filter.getAllNodes();
allWays = filter.getAllWays();
allRelations = filter.getAllRelations();
if (log.isInfoEnabled()) {
log.info("File parsed : " + (System.currentTimeMillis() - st));
}
progress.finishTask();
} finally {
if (log.isInfoEnabled()) {
log.info("File indexed : " + (System.currentTimeMillis() - st));
}
} }
} }
// 2. Processing all entries // 2. Processing all entries
progress.setGeneralProgress("[90 of 100]"); progress.setGeneralProgress("[90 of 100]");
@ -1120,6 +1173,10 @@ public class IndexCreator {
// 1. write all cities // 1. write all cities
if(indexAddress){ if(indexAddress){
if(!loadFromPath){
allNodes = iterateOverEntities(progress, EntityType.NODE, allNodes, STEP_CITY_NODES);
}
for(City c : cities.values()){ for(City c : cities.values()){
DataIndexWriter.writeCity(addressCityStat, pStatements, c, BATCH_SIZE); DataIndexWriter.writeCity(addressCityStat, pStatements, c, BATCH_SIZE);
} }
@ -1134,7 +1191,7 @@ public class IndexCreator {
// 2. index address relations // 2. index address relations
if(indexAddress){ if(indexAddress){
iterateOverEntities(progress, EntityType.RELATION, filter.getAllRelations(), STEP_ADDRESS_RELATIONS); allRelations = iterateOverEntities(progress, EntityType.RELATION, allRelations, STEP_ADDRESS_RELATIONS);
// commit to put all cities // commit to put all cities
if(pStatements.get(addressBuildingStat) > 0){ if(pStatements.get(addressBuildingStat) > 0){
addressBuildingStat.executeBatch(); addressBuildingStat.executeBatch();
@ -1149,7 +1206,7 @@ public class IndexCreator {
// 3. iterate over all entities // 3. iterate over all entities
iterateOverAllEntities(progress, filter, STEP_MAIN); iterateOverAllEntities(progress, allNodes, allWays, allRelations, STEP_MAIN);
// 4. update all postal codes from relations // 4. update all postal codes from relations
@ -1232,15 +1289,28 @@ public class IndexCreator {
// TODO normalizing (!!!), lowercase, converting en street names after all! // TODO normalizing (!!!), lowercase, converting en street names after all!
// TODO find proper location for streets ! centralize them // TODO find proper location for streets ! centralize them
public static void main(String[] args) throws IOException, SAXException, SQLException { public static void main(String[] args) throws IOException, SAXException, SQLException {
IndexCreator extr = new IndexCreator(new File("e:/Information/OSM maps/osmand/")); File workDir = new File("e:/Information/OSM maps/osmand/");
IndexCreator extr = new IndexCreator(workDir);
extr.setIndexPOI(true); extr.setIndexPOI(true);
extr.setIndexTransport(true); extr.setIndexTransport(true);
extr.setIndexAddress(true); extr.setIndexAddress(true);
extr.setNormalizeStreets(true); extr.setNormalizeStreets(true);
extr.setSaveAddressWays(true); extr.setSaveAddressWays(true);
File file = new File(workDir, "netherlands.odb");
extr.setNodesDBFile(file);
extr.generateIndexes(file, new ConsoleProgressImplementation(2), null);
extr.generateIndexes("e:/Information/OSM maps/osm_map/netherlands.osm.bz2", new ConsoleProgressImplementation(2), null); // extr.generateIndexes(new File("e:/Information/OSM maps/osm_map/netherlands.osm.bz2"), new ConsoleProgressImplementation(2), null);
// extr.generateIndexes("e:/Information/OSM maps/belarus osm/minsk.osm", new ConsoleProgressImplementation(4), null);
// extr.generateIndexes(new File("e:/Information/OSM maps/belarus osm/minsk.osm"), new ConsoleProgressImplementation(4), null);
// extr.generateIndexes(new File("e:/Information/OSM maps/belarus osm/belarus_2010_06_02.osm.bz2"), new ConsoleProgressImplementation(4), null);
// File file = new File(workDir, "nodes.map.odb");
// extr.setNodesDBFile(file);
// extr.generateIndexes(file, new ConsoleProgressImplementation(2), null);
} }
} }