diff --git a/DataExtractionOSM/.classpath b/DataExtractionOSM/.classpath
index 56b0fb9745..f25f559058 100644
--- a/DataExtractionOSM/.classpath
+++ b/DataExtractionOSM/.classpath
@@ -11,5 +11,6 @@
+
diff --git a/DataExtractionOSM/lib/mysql-connector-java-5.1.18-bin.jar b/DataExtractionOSM/lib/mysql-connector-java-5.1.18-bin.jar
new file mode 100644
index 0000000000..cdee6a1668
Binary files /dev/null and b/DataExtractionOSM/lib/mysql-connector-java-5.1.18-bin.jar differ
diff --git a/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java b/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java
index 7b18dd24f5..1a23ecef97 100644
--- a/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java
+++ b/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java
@@ -8,6 +8,10 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
import java.sql.SQLException;
import javax.xml.parsers.ParserConfigurationException;
@@ -23,30 +27,36 @@ import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
-
-
import net.osmand.Algoritms;
import net.osmand.LogUtil;
import net.osmand.Version;
import net.osmand.data.preparation.IndexCreator;
import net.osmand.impl.ConsoleProgressImplementation;
+// http://toolserver.org/~dispenser/dumps/coord_commonswiki.sql.gz
+// select * from coord_commonswiki limit 10;
public class WikiIndexer {
private static final Log log = LogUtil.getLog(WikiIndexer.class);
private final File srcPath;
private final File workPath;
private final File targetPath;
+ private final String userName = "jenkins";
+ private final String password = "jenkins";
+ private final String url = "jdbc:mysql://localhost/wiki";
+
public static class WikiIndexerException extends Exception {
private static final long serialVersionUID = 1L;
+
public WikiIndexerException(String name) {
super(name);
}
+
public WikiIndexerException(String string, Exception e) {
super(string, e);
}
}
-
+
public WikiIndexer(File srcPath, File targetPath, File workPath) {
this.srcPath = srcPath;
this.targetPath = targetPath;
@@ -58,35 +68,52 @@ public class WikiIndexer {
File srcPath = extractDirectory(args, 0);
File targetPath = extractDirectory(args, 1);
File workPath = extractDirectory(args, 2);
-
+
WikiIndexer wikiIndexer = new WikiIndexer(srcPath, targetPath, workPath);
wikiIndexer.run();
-
+
} catch (WikiIndexerException e) {
log.error(e.getMessage());
}
}
-
+
private static File extractDirectory(String[] args, int ind) throws WikiIndexerException {
if (args.length <= ind) {
- throw new WikiIndexerException("Usage: WikiIndexer src_directory target_directory work_directory [--description={full|normal|minimum}]" + " missing " + (ind + 1));
+ throw new WikiIndexerException(
+ "Usage: WikiIndexer src_directory target_directory work_directory [--description={full|normal|minimum}]" + " missing "
+ + (ind + 1));
} else {
File fs = new File(args[ind]);
fs.mkdir();
- if(!fs.exists() || !fs.isDirectory()) {
+ if (!fs.exists() || !fs.isDirectory()) {
throw new WikiIndexerException("Specified directory doesn't exist : " + args[ind]);
}
return fs;
}
}
-
- public void run() {
+
+ public void run() throws WikiIndexerException {
+ log.info("Obtain database connection");
+ Connection conn;
+ try {
+ Class.forName("com.mysql.jdbc.Driver").newInstance();
+ conn = DriverManager.getConnection(url, userName, password);
+ log.info("Database connection established");
+ } catch (InstantiationException e1) {
+ throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
+ } catch (IllegalAccessException e1) {
+ throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
+ } catch (ClassNotFoundException e1) {
+ throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
+ } catch (SQLException e1) {
+ throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
+ }
File[] listFiles = srcPath.listFiles();
- for(File f : listFiles) {
+ for (File f : listFiles) {
try {
if (f.isFile() && (f.getName().endsWith(".xml") || f.getName().endsWith(".xml.bz2"))) {
log.info("About to process " + f.getName());
- File outFile = process(f);
+ File outFile = process(f, conn);
if (outFile != null) {
IndexCreator ic = new IndexCreator(workPath);
@@ -100,30 +127,35 @@ public class WikiIndexer {
}
}
} catch (WikiIndexerException e) {
- log.error("Error processing "+f.getName(), e);
+ log.error("Error processing " + f.getName(), e);
} catch (RuntimeException e) {
- log.error("Error processing "+f.getName(), e);
+ log.error("Error processing " + f.getName(), e);
} catch (IOException e) {
- log.error("Error processing "+f.getName(), e);
+ log.error("Error processing " + f.getName(), e);
} catch (SAXException e) {
- log.error("Error processing "+f.getName(), e);
+ log.error("Error processing " + f.getName(), e);
} catch (SQLException e) {
- log.error("Error processing "+f.getName(), e);
+ log.error("Error processing " + f.getName(), e);
} catch (InterruptedException e) {
- log.error("Error processing "+f.getName(), e);
+ log.error("Error processing " + f.getName(), e);
}
}
}
- protected File process(File f) throws WikiIndexerException {
+ protected File process(File f, Connection dbConnection) throws WikiIndexerException {
InputStream fi = null;
BufferedWriter out = null;
try {
int in = f.getName().indexOf('.');
+ String wikiLocale = f.getName().substring(in + 1, f.getName().indexOf('.', in));
+ log.info("Locale for file " + wikiLocale);
+
+ PreparedStatement statement = dbConnection.prepareStatement("SELECT gc_lat, gc_lon, gc_type FROM coord_"+wikiLocale+"wiki WHERE gc_from=?");
+
File osmOut = new File(workPath, f.getName().substring(0, in) + ".osm");
fi = new BufferedInputStream(new FileInputStream(f));
InputStream progressStream = fi;
- if(f.getName().endsWith(".bz2")){
+ if (f.getName().endsWith(".bz2")) {
if (fi.read() != 'B' || fi.read() != 'Z') {
throw new RuntimeException("The source stream must start with the characters BZ if it is to be read as a BZip2 stream."); //$NON-NLS-1$
} else {
@@ -133,10 +165,11 @@ public class WikiIndexer {
ConsoleProgressImplementation progress = new ConsoleProgressImplementation();
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8"));
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
- WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream);
+ WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement);
saxParser.parse(fi, wikiOsmHandler);
-
- if(wikiOsmHandler.getCount() < 1){
+
+ statement.close();
+ if (wikiOsmHandler.getCount() < 1) {
return null;
}
return osmOut;
@@ -148,84 +181,112 @@ public class WikiIndexer {
throw new WikiIndexerException("Parse exception", e);
} catch (XMLStreamException e) {
throw new WikiIndexerException("Parse exception", e);
+ } catch (SQLException e) {
+ throw new WikiIndexerException("Database exception or locale configuration problem", e);
} finally {
Algoritms.closeStream(out);
Algoritms.closeStream(fi);
}
}
-
-
+
public class WikiOsmHandler extends DefaultHandler {
long id = 1;
private final SAXParser saxParser;
private boolean page = false;
private StringBuilder ctext = null;
-
+
private StringBuilder title = new StringBuilder();
private StringBuilder text = new StringBuilder();
-
+ private StringBuilder pageId = new StringBuilder();
+ private float clat = 0;
+ private float clon = 0;
+ private String subcategory = null;
+ private boolean parseText = false;
+
private final ConsoleProgressImplementation progress;
private final InputStream progIS;
private XMLStreamWriter streamWriter;
-
- WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS) throws IOException, XMLStreamException {
+ private final PreparedStatement dbStat;
+
+ WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS,
+ PreparedStatement dbStat)
+ throws IOException, XMLStreamException {
this.saxParser = saxParser;
this.progress = progress;
this.progIS = progIS;
+ this.dbStat = dbStat;
XMLOutputFactory xof = XMLOutputFactory.newInstance();
- streamWriter = xof.createXMLStreamWriter(outOsm);
- streamWriter.writeStartDocument();
- streamWriter.writeCharacters("\n");
- streamWriter.writeStartElement("osm");
- streamWriter.writeAttribute("version", "0.6");
- streamWriter.writeAttribute("generator", Version.APP_MAP_CREATOR_VERSION);
-
-
+ streamWriter = xof.createXMLStreamWriter(outOsm);
+ streamWriter.writeStartDocument();
+ streamWriter.writeCharacters("\n");
+ streamWriter.writeStartElement("osm");
+ streamWriter.writeAttribute("version", "0.6");
+ streamWriter.writeAttribute("generator", Version.APP_MAP_CREATOR_VERSION);
+
progress.startTask("Parse wiki xml", progIS.available());
}
-
+
public int getCount() {
return (int) (id - 1);
}
-
+
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
String name = saxParser.isNamespaceAware() ? localName : qName;
if (!page) {
page = name.equals("page");
} else {
- if(name.equals("title")) {
+ if (name.equals("title")) {
title.setLength(0);
ctext = title;
- } else if(name.equals("text")) {
- text.setLength(0);
- ctext = text;
+ } else if (name.equals("text")) {
+ if(parseText) {
+ text.setLength(0);
+ ctext = text;
+ }
+ } else if (name.equals("id")) {
+ pageId.setLength(0);
+ ctext = pageId;
}
}
}
-
-
+
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (page) {
- if(ctext != null) {
+ if (ctext != null) {
ctext.append(ch, start, length);
}
}
}
-
+
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
String name = saxParser.isNamespaceAware() ? localName : qName;
try {
if (page) {
- if(name.equals("page")) {
+ if (name.equals("page")) {
page = false;
+ parseText = false;
progress.remaining(progIS.available());
- } else if(name.equals("title")) {
+ } else if (name.equals("title")) {
ctext = null;
- } else if(name.equals("text")) {
- analyzeTextForGeoInfo();
+ } else if (name.equals("id")) {
+ ctext = null;
+ long pid = Long.parseLong(pageId.toString());
+ dbStat.setLong(1, pid);
+ ResultSet rs = dbStat.executeQuery();
+ parseText = false;
+ if(rs.next()) {
+ parseText = true;
+ clat = rs.getFloat(1);
+ clon = rs.getFloat(2);
+ subcategory = (rs.getString(3) + "").toLowerCase();
+ }
+ } else if (name.equals("text")) {
+ if(parseText) {
+ analyzeTextForGeoInfoNew();
+ }
ctext = null;
}
}
@@ -233,16 +294,18 @@ public class WikiIndexer {
throw new SAXException(e);
} catch (XMLStreamException e) {
throw new SAXException(e);
+ } catch (SQLException e) {
+ throw new SAXException(e);
}
}
-
- private String readProperty(String prop, int s, int e){
+
+ private String readProperty(String prop, int s, int e) {
int res = -1;
for (int i = s; i < e - prop.length(); i++) {
- if(prop.charAt(0) == text.charAt(i)) {
+ if (prop.charAt(0) == text.charAt(i)) {
boolean neq = false;
for (int j = 0; j < prop.length(); j++) {
- if(prop.charAt(j) != text.charAt(i + j)) {
+ if (prop.charAt(j) != text.charAt(i + j)) {
neq = true;
break;
}
@@ -253,7 +316,7 @@ public class WikiIndexer {
}
}
}
- if(res == -1){
+ if (res == -1) {
return null;
}
int sr = -1;
@@ -267,12 +330,12 @@ public class WikiIndexer {
sr = i + 1;
}
}
- if(sr != -1) {
+ if (sr != -1) {
String result = text.substring(sr, se);
int commSt = result.indexOf("");
- if(commEnd == -1){
+ if (commEnd == -1) {
commEnd = result.length();
} else {
commEnd += "-->".length();
@@ -283,15 +346,15 @@ public class WikiIndexer {
}
return null;
}
-
+
private float zeroParseFloat(String s) {
return s == null || s.length() == 0 ? 0 : Float.parseFloat(s);
}
-
+
private int findOpenBrackets(int i) {
int h = text.indexOf("{{", i);
boolean check = true;
- while(check){
+ while (check) {
int startComment = text.indexOf("