diff --git a/DataExtractionOSM/.classpath b/DataExtractionOSM/.classpath index 56b0fb9745..f25f559058 100644 --- a/DataExtractionOSM/.classpath +++ b/DataExtractionOSM/.classpath @@ -11,5 +11,6 @@ + diff --git a/DataExtractionOSM/lib/mysql-connector-java-5.1.18-bin.jar b/DataExtractionOSM/lib/mysql-connector-java-5.1.18-bin.jar new file mode 100644 index 0000000000..cdee6a1668 Binary files /dev/null and b/DataExtractionOSM/lib/mysql-connector-java-5.1.18-bin.jar differ diff --git a/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java b/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java index 7b18dd24f5..1a23ecef97 100644 --- a/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java +++ b/DataExtractionOSM/src/net/osmand/data/index/WikiIndexer.java @@ -8,6 +8,10 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; import javax.xml.parsers.ParserConfigurationException; @@ -23,30 +27,36 @@ import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; - - import net.osmand.Algoritms; import net.osmand.LogUtil; import net.osmand.Version; import net.osmand.data.preparation.IndexCreator; import net.osmand.impl.ConsoleProgressImplementation; +// http://toolserver.org/~dispenser/dumps/coord_commonswiki.sql.gz +// select * from coord_commonswiki limit 10; public class WikiIndexer { private static final Log log = LogUtil.getLog(WikiIndexer.class); private final File srcPath; private final File workPath; private final File targetPath; + private final String userName = "jenkins"; + private final String password = "jenkins"; + private final String url = "jdbc:mysql://localhost/wiki"; + public static class WikiIndexerException extends Exception { private static final long serialVersionUID = 1L; + public WikiIndexerException(String name) { super(name); } + public WikiIndexerException(String string, Exception e) { super(string, e); } } - + public WikiIndexer(File srcPath, File targetPath, File workPath) { this.srcPath = srcPath; this.targetPath = targetPath; @@ -58,35 +68,52 @@ public class WikiIndexer { File srcPath = extractDirectory(args, 0); File targetPath = extractDirectory(args, 1); File workPath = extractDirectory(args, 2); - + WikiIndexer wikiIndexer = new WikiIndexer(srcPath, targetPath, workPath); wikiIndexer.run(); - + } catch (WikiIndexerException e) { log.error(e.getMessage()); } } - + private static File extractDirectory(String[] args, int ind) throws WikiIndexerException { if (args.length <= ind) { - throw new WikiIndexerException("Usage: WikiIndexer src_directory target_directory work_directory [--description={full|normal|minimum}]" + " missing " + (ind + 1)); + throw new WikiIndexerException( + "Usage: WikiIndexer src_directory target_directory work_directory [--description={full|normal|minimum}]" + " missing " + + (ind + 1)); } else { File fs = new File(args[ind]); fs.mkdir(); - if(!fs.exists() || !fs.isDirectory()) { + if (!fs.exists() || !fs.isDirectory()) { throw new WikiIndexerException("Specified directory doesn't exist : " + args[ind]); } return fs; } } - - public void run() { + + public void run() throws WikiIndexerException { + log.info("Obtain database connection"); + Connection conn; + try { + Class.forName("com.mysql.jdbc.Driver").newInstance(); + conn = DriverManager.getConnection(url, userName, password); + log.info("Database connection established"); + } catch (InstantiationException e1) { + throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1); + } catch (IllegalAccessException e1) { + throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1); + } catch (ClassNotFoundException e1) { + throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1); + } catch (SQLException e1) { + throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1); + } File[] listFiles = srcPath.listFiles(); - for(File f : listFiles) { + for (File f : listFiles) { try { if (f.isFile() && (f.getName().endsWith(".xml") || f.getName().endsWith(".xml.bz2"))) { log.info("About to process " + f.getName()); - File outFile = process(f); + File outFile = process(f, conn); if (outFile != null) { IndexCreator ic = new IndexCreator(workPath); @@ -100,30 +127,35 @@ public class WikiIndexer { } } } catch (WikiIndexerException e) { - log.error("Error processing "+f.getName(), e); + log.error("Error processing " + f.getName(), e); } catch (RuntimeException e) { - log.error("Error processing "+f.getName(), e); + log.error("Error processing " + f.getName(), e); } catch (IOException e) { - log.error("Error processing "+f.getName(), e); + log.error("Error processing " + f.getName(), e); } catch (SAXException e) { - log.error("Error processing "+f.getName(), e); + log.error("Error processing " + f.getName(), e); } catch (SQLException e) { - log.error("Error processing "+f.getName(), e); + log.error("Error processing " + f.getName(), e); } catch (InterruptedException e) { - log.error("Error processing "+f.getName(), e); + log.error("Error processing " + f.getName(), e); } } } - protected File process(File f) throws WikiIndexerException { + protected File process(File f, Connection dbConnection) throws WikiIndexerException { InputStream fi = null; BufferedWriter out = null; try { int in = f.getName().indexOf('.'); + String wikiLocale = f.getName().substring(in + 1, f.getName().indexOf('.', in)); + log.info("Locale for file " + wikiLocale); + + PreparedStatement statement = dbConnection.prepareStatement("SELECT gc_lat, gc_lon, gc_type FROM coord_"+wikiLocale+"wiki WHERE gc_from=?"); + File osmOut = new File(workPath, f.getName().substring(0, in) + ".osm"); fi = new BufferedInputStream(new FileInputStream(f)); InputStream progressStream = fi; - if(f.getName().endsWith(".bz2")){ + if (f.getName().endsWith(".bz2")) { if (fi.read() != 'B' || fi.read() != 'Z') { throw new RuntimeException("The source stream must start with the characters BZ if it is to be read as a BZip2 stream."); //$NON-NLS-1$ } else { @@ -133,10 +165,11 @@ public class WikiIndexer { ConsoleProgressImplementation progress = new ConsoleProgressImplementation(); out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8")); SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser(); - WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream); + WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement); saxParser.parse(fi, wikiOsmHandler); - - if(wikiOsmHandler.getCount() < 1){ + + statement.close(); + if (wikiOsmHandler.getCount() < 1) { return null; } return osmOut; @@ -148,84 +181,112 @@ public class WikiIndexer { throw new WikiIndexerException("Parse exception", e); } catch (XMLStreamException e) { throw new WikiIndexerException("Parse exception", e); + } catch (SQLException e) { + throw new WikiIndexerException("Database exception or locale configuration problem", e); } finally { Algoritms.closeStream(out); Algoritms.closeStream(fi); } } - - + public class WikiOsmHandler extends DefaultHandler { long id = 1; private final SAXParser saxParser; private boolean page = false; private StringBuilder ctext = null; - + private StringBuilder title = new StringBuilder(); private StringBuilder text = new StringBuilder(); - + private StringBuilder pageId = new StringBuilder(); + private float clat = 0; + private float clon = 0; + private String subcategory = null; + private boolean parseText = false; + private final ConsoleProgressImplementation progress; private final InputStream progIS; private XMLStreamWriter streamWriter; - - WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS) throws IOException, XMLStreamException { + private final PreparedStatement dbStat; + + WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS, + PreparedStatement dbStat) + throws IOException, XMLStreamException { this.saxParser = saxParser; this.progress = progress; this.progIS = progIS; + this.dbStat = dbStat; XMLOutputFactory xof = XMLOutputFactory.newInstance(); - streamWriter = xof.createXMLStreamWriter(outOsm); - streamWriter.writeStartDocument(); - streamWriter.writeCharacters("\n"); - streamWriter.writeStartElement("osm"); - streamWriter.writeAttribute("version", "0.6"); - streamWriter.writeAttribute("generator", Version.APP_MAP_CREATOR_VERSION); - - + streamWriter = xof.createXMLStreamWriter(outOsm); + streamWriter.writeStartDocument(); + streamWriter.writeCharacters("\n"); + streamWriter.writeStartElement("osm"); + streamWriter.writeAttribute("version", "0.6"); + streamWriter.writeAttribute("generator", Version.APP_MAP_CREATOR_VERSION); + progress.startTask("Parse wiki xml", progIS.available()); } - + public int getCount() { return (int) (id - 1); } - + @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { String name = saxParser.isNamespaceAware() ? localName : qName; if (!page) { page = name.equals("page"); } else { - if(name.equals("title")) { + if (name.equals("title")) { title.setLength(0); ctext = title; - } else if(name.equals("text")) { - text.setLength(0); - ctext = text; + } else if (name.equals("text")) { + if(parseText) { + text.setLength(0); + ctext = text; + } + } else if (name.equals("id")) { + pageId.setLength(0); + ctext = pageId; } } } - - + @Override public void characters(char[] ch, int start, int length) throws SAXException { if (page) { - if(ctext != null) { + if (ctext != null) { ctext.append(ch, start, length); } } } - + @Override public void endElement(String uri, String localName, String qName) throws SAXException { String name = saxParser.isNamespaceAware() ? localName : qName; try { if (page) { - if(name.equals("page")) { + if (name.equals("page")) { page = false; + parseText = false; progress.remaining(progIS.available()); - } else if(name.equals("title")) { + } else if (name.equals("title")) { ctext = null; - } else if(name.equals("text")) { - analyzeTextForGeoInfo(); + } else if (name.equals("id")) { + ctext = null; + long pid = Long.parseLong(pageId.toString()); + dbStat.setLong(1, pid); + ResultSet rs = dbStat.executeQuery(); + parseText = false; + if(rs.next()) { + parseText = true; + clat = rs.getFloat(1); + clon = rs.getFloat(2); + subcategory = (rs.getString(3) + "").toLowerCase(); + } + } else if (name.equals("text")) { + if(parseText) { + analyzeTextForGeoInfoNew(); + } ctext = null; } } @@ -233,16 +294,18 @@ public class WikiIndexer { throw new SAXException(e); } catch (XMLStreamException e) { throw new SAXException(e); + } catch (SQLException e) { + throw new SAXException(e); } } - - private String readProperty(String prop, int s, int e){ + + private String readProperty(String prop, int s, int e) { int res = -1; for (int i = s; i < e - prop.length(); i++) { - if(prop.charAt(0) == text.charAt(i)) { + if (prop.charAt(0) == text.charAt(i)) { boolean neq = false; for (int j = 0; j < prop.length(); j++) { - if(prop.charAt(j) != text.charAt(i + j)) { + if (prop.charAt(j) != text.charAt(i + j)) { neq = true; break; } @@ -253,7 +316,7 @@ public class WikiIndexer { } } } - if(res == -1){ + if (res == -1) { return null; } int sr = -1; @@ -267,12 +330,12 @@ public class WikiIndexer { sr = i + 1; } } - if(sr != -1) { + if (sr != -1) { String result = text.substring(sr, se); int commSt = result.indexOf(""); - if(commEnd == -1){ + if (commEnd == -1) { commEnd = result.length(); } else { commEnd += "-->".length(); @@ -283,15 +346,15 @@ public class WikiIndexer { } return null; } - + private float zeroParseFloat(String s) { return s == null || s.length() == 0 ? 0 : Float.parseFloat(s); } - + private int findOpenBrackets(int i) { int h = text.indexOf("{{", i); boolean check = true; - while(check){ + while (check) { int startComment = text.indexOf("