Update wiki indexer

This commit is contained in:
Victor Shcherb 2012-01-08 23:51:34 +01:00
parent a91934bee6
commit f5a2da8b62
2 changed files with 20 additions and 8 deletions

View file

@ -170,7 +170,7 @@ public class WikiIndexer {
ConsoleProgressImplementation progress = new ConsoleProgressImplementation(); ConsoleProgressImplementation progress = new ConsoleProgressImplementation();
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8")); out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8"));
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser(); SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement); WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement, wikiLocale);
saxParser.parse(fi, wikiOsmHandler); saxParser.parse(fi, wikiOsmHandler);
statement.close(); statement.close();
@ -205,6 +205,7 @@ public class WikiIndexer {
private StringBuilder pageId = new StringBuilder(); private StringBuilder pageId = new StringBuilder();
private float clat = 0; private float clat = 0;
private float clon = 0; private float clon = 0;
private long cid;
private String subcategory = null; private String subcategory = null;
private boolean parseText = false; private boolean parseText = false;
@ -212,14 +213,17 @@ public class WikiIndexer {
private final InputStream progIS; private final InputStream progIS;
private XMLStreamWriter streamWriter; private XMLStreamWriter streamWriter;
private final PreparedStatement dbStat; private final PreparedStatement dbStat;
private final String locale;
WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS, WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS,
PreparedStatement dbStat) PreparedStatement dbStat, String wikiLocale)
throws IOException, XMLStreamException { throws IOException, XMLStreamException {
this.saxParser = saxParser; this.saxParser = saxParser;
this.progress = progress; this.progress = progress;
this.progIS = progIS; this.progIS = progIS;
this.dbStat = dbStat; this.dbStat = dbStat;
this.locale = wikiLocale;
XMLOutputFactory xof = XMLOutputFactory.newInstance(); XMLOutputFactory xof = XMLOutputFactory.newInstance();
streamWriter = xof.createXMLStreamWriter(outOsm); streamWriter = xof.createXMLStreamWriter(outOsm);
streamWriter.writeStartDocument(); streamWriter.writeStartDocument();
@ -278,8 +282,8 @@ public class WikiIndexer {
ctext = null; ctext = null;
} else if (name.equals("id")) { } else if (name.equals("id")) {
ctext = null; ctext = null;
long pid = Long.parseLong(pageId.toString()); cid = Long.parseLong(pageId.toString());
dbStat.setLong(1, pid); dbStat.setLong(1, cid);
ResultSet rs = dbStat.executeQuery(); ResultSet rs = dbStat.executeQuery();
parseText = false; parseText = false;
if(rs.next()) { if(rs.next()) {
@ -290,6 +294,7 @@ public class WikiIndexer {
} }
} else if (name.equals("text")) { } else if (name.equals("text")) {
if(parseText) { if(parseText) {
log.debug("Article accepted " + cid + " " + title.toString());
analyzeTextForGeoInfoNew(); analyzeTextForGeoInfoNew();
} }
ctext = null; ctext = null;
@ -467,7 +472,6 @@ public class WikiIndexer {
} }
private void analyzeTextForGeoInfoNew() throws XMLStreamException { private void analyzeTextForGeoInfoNew() throws XMLStreamException {
// fast precheck
StringBuilder description = new StringBuilder(); StringBuilder description = new StringBuilder();
int beg = 0; int beg = 0;
int h = findOpenBrackets(beg); int h = findOpenBrackets(beg);
@ -578,7 +582,8 @@ public class WikiIndexer {
private void writeNode(double lat, double lon, String subcategory, StringBuilder description) throws XMLStreamException { private void writeNode(double lat, double lon, String subcategory, StringBuilder description) throws XMLStreamException {
streamWriter.writeCharacters("\n"); streamWriter.writeCharacters("\n");
streamWriter.writeStartElement("node"); streamWriter.writeStartElement("node");
streamWriter.writeAttribute("id", "-" + id++); id++;
streamWriter.writeAttribute("id", "-" + cid);
streamWriter.writeAttribute("lat", lat + ""); streamWriter.writeAttribute("lat", lat + "");
streamWriter.writeAttribute("lon", lon + ""); streamWriter.writeAttribute("lon", lon + "");
@ -588,6 +593,13 @@ public class WikiIndexer {
streamWriter.writeAttribute("v", title.toString()); streamWriter.writeAttribute("v", title.toString());
streamWriter.writeEndElement(); streamWriter.writeEndElement();
streamWriter.writeCharacters("\n ");
streamWriter.writeStartElement("tag");
streamWriter.writeAttribute("k", "wikipedia");
streamWriter.writeAttribute("v", locale + ":"+title.toString());
streamWriter.writeEndElement();
streamWriter.writeCharacters("\n "); streamWriter.writeCharacters("\n ");
streamWriter.writeStartElement("tag"); streamWriter.writeStartElement("tag");
streamWriter.writeAttribute("k", "osmwiki"); streamWriter.writeAttribute("k", "osmwiki");

View file

@ -59,7 +59,7 @@ public class ConsoleProgressImplementation implements IProgress {
this.lastPercentPrint = getCurrentPercent(); this.lastPercentPrint = getCurrentPercent();
long now = System.currentTimeMillis(); long now = System.currentTimeMillis();
if(now - lastTimePrinted >= deltaTimeToPrint || deltaTime < 0){ if(now - lastTimePrinted >= deltaTimeToPrint || deltaTime < 0){
log.info(MessageFormat.format("Done {0} %.", getCurrentPercent())); //$NON-NLS-1$ log.debug(MessageFormat.format("Done {0} %.", getCurrentPercent())); //$NON-NLS-1$
lastTimePrinted = now; lastTimePrinted = now;
} }
@ -80,7 +80,7 @@ public class ConsoleProgressImplementation implements IProgress {
public void startTask(String taskName, int work) { public void startTask(String taskName, int work) {
if(!Algoritms.objectEquals(currentTask, taskName)){ if(!Algoritms.objectEquals(currentTask, taskName)){
this.currentTask = taskName; this.currentTask = taskName;
log.info("Memory before task exec: " + Runtime.getRuntime().totalMemory() + " free : " + Runtime.getRuntime().freeMemory()); //$NON-NLS-1$ //$NON-NLS-2$ log.debug("Memory before task exec: " + Runtime.getRuntime().totalMemory() + " free : " + Runtime.getRuntime().freeMemory()); //$NON-NLS-1$ //$NON-NLS-2$
if (previousTaskStarted == 0) { if (previousTaskStarted == 0) {
log.info(taskName + " started - " + work); //$NON-NLS-1$ log.info(taskName + " started - " + work); //$NON-NLS-1$
} else { } else {