Update wiki indexer

This commit is contained in:
Victor Shcherb 2012-01-08 23:51:34 +01:00
parent a91934bee6
commit f5a2da8b62
2 changed files with 20 additions and 8 deletions

View file

@ -170,7 +170,7 @@ public class WikiIndexer {
ConsoleProgressImplementation progress = new ConsoleProgressImplementation();
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8"));
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement);
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement, wikiLocale);
saxParser.parse(fi, wikiOsmHandler);
statement.close();
@ -205,6 +205,7 @@ public class WikiIndexer {
private StringBuilder pageId = new StringBuilder();
private float clat = 0;
private float clon = 0;
private long cid;
private String subcategory = null;
private boolean parseText = false;
@ -212,14 +213,17 @@ public class WikiIndexer {
private final InputStream progIS;
private XMLStreamWriter streamWriter;
private final PreparedStatement dbStat;
private final String locale;
WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS,
PreparedStatement dbStat)
PreparedStatement dbStat, String wikiLocale)
throws IOException, XMLStreamException {
this.saxParser = saxParser;
this.progress = progress;
this.progIS = progIS;
this.dbStat = dbStat;
this.locale = wikiLocale;
XMLOutputFactory xof = XMLOutputFactory.newInstance();
streamWriter = xof.createXMLStreamWriter(outOsm);
streamWriter.writeStartDocument();
@ -278,8 +282,8 @@ public class WikiIndexer {
ctext = null;
} else if (name.equals("id")) {
ctext = null;
long pid = Long.parseLong(pageId.toString());
dbStat.setLong(1, pid);
cid = Long.parseLong(pageId.toString());
dbStat.setLong(1, cid);
ResultSet rs = dbStat.executeQuery();
parseText = false;
if(rs.next()) {
@ -290,6 +294,7 @@ public class WikiIndexer {
}
} else if (name.equals("text")) {
if(parseText) {
log.debug("Article accepted " + cid + " " + title.toString());
analyzeTextForGeoInfoNew();
}
ctext = null;
@ -467,7 +472,6 @@ public class WikiIndexer {
}
private void analyzeTextForGeoInfoNew() throws XMLStreamException {
// fast precheck
StringBuilder description = new StringBuilder();
int beg = 0;
int h = findOpenBrackets(beg);
@ -578,7 +582,8 @@ public class WikiIndexer {
private void writeNode(double lat, double lon, String subcategory, StringBuilder description) throws XMLStreamException {
streamWriter.writeCharacters("\n");
streamWriter.writeStartElement("node");
streamWriter.writeAttribute("id", "-" + id++);
id++;
streamWriter.writeAttribute("id", "-" + cid);
streamWriter.writeAttribute("lat", lat + "");
streamWriter.writeAttribute("lon", lon + "");
@ -587,6 +592,13 @@ public class WikiIndexer {
streamWriter.writeAttribute("k", "name");
streamWriter.writeAttribute("v", title.toString());
streamWriter.writeEndElement();
streamWriter.writeCharacters("\n ");
streamWriter.writeStartElement("tag");
streamWriter.writeAttribute("k", "wikipedia");
streamWriter.writeAttribute("v", locale + ":"+title.toString());
streamWriter.writeEndElement();
streamWriter.writeCharacters("\n ");
streamWriter.writeStartElement("tag");

View file

@ -59,7 +59,7 @@ public class ConsoleProgressImplementation implements IProgress {
this.lastPercentPrint = getCurrentPercent();
long now = System.currentTimeMillis();
if(now - lastTimePrinted >= deltaTimeToPrint || deltaTime < 0){
log.info(MessageFormat.format("Done {0} %.", getCurrentPercent())); //$NON-NLS-1$
log.debug(MessageFormat.format("Done {0} %.", getCurrentPercent())); //$NON-NLS-1$
lastTimePrinted = now;
}
@ -80,7 +80,7 @@ public class ConsoleProgressImplementation implements IProgress {
public void startTask(String taskName, int work) {
if(!Algoritms.objectEquals(currentTask, taskName)){
this.currentTask = taskName;
log.info("Memory before task exec: " + Runtime.getRuntime().totalMemory() + " free : " + Runtime.getRuntime().freeMemory()); //$NON-NLS-1$ //$NON-NLS-2$
log.debug("Memory before task exec: " + Runtime.getRuntime().totalMemory() + " free : " + Runtime.getRuntime().freeMemory()); //$NON-NLS-1$ //$NON-NLS-2$
if (previousTaskStarted == 0) {
log.info(taskName + " started - " + work); //$NON-NLS-1$
} else {