Update wiki indexer
This commit is contained in:
parent
a91934bee6
commit
f5a2da8b62
2 changed files with 20 additions and 8 deletions
|
@ -170,7 +170,7 @@ public class WikiIndexer {
|
||||||
ConsoleProgressImplementation progress = new ConsoleProgressImplementation();
|
ConsoleProgressImplementation progress = new ConsoleProgressImplementation();
|
||||||
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8"));
|
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8"));
|
||||||
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
|
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
|
||||||
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement);
|
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement, wikiLocale);
|
||||||
saxParser.parse(fi, wikiOsmHandler);
|
saxParser.parse(fi, wikiOsmHandler);
|
||||||
|
|
||||||
statement.close();
|
statement.close();
|
||||||
|
@ -205,6 +205,7 @@ public class WikiIndexer {
|
||||||
private StringBuilder pageId = new StringBuilder();
|
private StringBuilder pageId = new StringBuilder();
|
||||||
private float clat = 0;
|
private float clat = 0;
|
||||||
private float clon = 0;
|
private float clon = 0;
|
||||||
|
private long cid;
|
||||||
private String subcategory = null;
|
private String subcategory = null;
|
||||||
private boolean parseText = false;
|
private boolean parseText = false;
|
||||||
|
|
||||||
|
@ -212,14 +213,17 @@ public class WikiIndexer {
|
||||||
private final InputStream progIS;
|
private final InputStream progIS;
|
||||||
private XMLStreamWriter streamWriter;
|
private XMLStreamWriter streamWriter;
|
||||||
private final PreparedStatement dbStat;
|
private final PreparedStatement dbStat;
|
||||||
|
private final String locale;
|
||||||
|
|
||||||
|
|
||||||
WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS,
|
WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS,
|
||||||
PreparedStatement dbStat)
|
PreparedStatement dbStat, String wikiLocale)
|
||||||
throws IOException, XMLStreamException {
|
throws IOException, XMLStreamException {
|
||||||
this.saxParser = saxParser;
|
this.saxParser = saxParser;
|
||||||
this.progress = progress;
|
this.progress = progress;
|
||||||
this.progIS = progIS;
|
this.progIS = progIS;
|
||||||
this.dbStat = dbStat;
|
this.dbStat = dbStat;
|
||||||
|
this.locale = wikiLocale;
|
||||||
XMLOutputFactory xof = XMLOutputFactory.newInstance();
|
XMLOutputFactory xof = XMLOutputFactory.newInstance();
|
||||||
streamWriter = xof.createXMLStreamWriter(outOsm);
|
streamWriter = xof.createXMLStreamWriter(outOsm);
|
||||||
streamWriter.writeStartDocument();
|
streamWriter.writeStartDocument();
|
||||||
|
@ -278,8 +282,8 @@ public class WikiIndexer {
|
||||||
ctext = null;
|
ctext = null;
|
||||||
} else if (name.equals("id")) {
|
} else if (name.equals("id")) {
|
||||||
ctext = null;
|
ctext = null;
|
||||||
long pid = Long.parseLong(pageId.toString());
|
cid = Long.parseLong(pageId.toString());
|
||||||
dbStat.setLong(1, pid);
|
dbStat.setLong(1, cid);
|
||||||
ResultSet rs = dbStat.executeQuery();
|
ResultSet rs = dbStat.executeQuery();
|
||||||
parseText = false;
|
parseText = false;
|
||||||
if(rs.next()) {
|
if(rs.next()) {
|
||||||
|
@ -290,6 +294,7 @@ public class WikiIndexer {
|
||||||
}
|
}
|
||||||
} else if (name.equals("text")) {
|
} else if (name.equals("text")) {
|
||||||
if(parseText) {
|
if(parseText) {
|
||||||
|
log.debug("Article accepted " + cid + " " + title.toString());
|
||||||
analyzeTextForGeoInfoNew();
|
analyzeTextForGeoInfoNew();
|
||||||
}
|
}
|
||||||
ctext = null;
|
ctext = null;
|
||||||
|
@ -467,7 +472,6 @@ public class WikiIndexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void analyzeTextForGeoInfoNew() throws XMLStreamException {
|
private void analyzeTextForGeoInfoNew() throws XMLStreamException {
|
||||||
// fast precheck
|
|
||||||
StringBuilder description = new StringBuilder();
|
StringBuilder description = new StringBuilder();
|
||||||
int beg = 0;
|
int beg = 0;
|
||||||
int h = findOpenBrackets(beg);
|
int h = findOpenBrackets(beg);
|
||||||
|
@ -578,7 +582,8 @@ public class WikiIndexer {
|
||||||
private void writeNode(double lat, double lon, String subcategory, StringBuilder description) throws XMLStreamException {
|
private void writeNode(double lat, double lon, String subcategory, StringBuilder description) throws XMLStreamException {
|
||||||
streamWriter.writeCharacters("\n");
|
streamWriter.writeCharacters("\n");
|
||||||
streamWriter.writeStartElement("node");
|
streamWriter.writeStartElement("node");
|
||||||
streamWriter.writeAttribute("id", "-" + id++);
|
id++;
|
||||||
|
streamWriter.writeAttribute("id", "-" + cid);
|
||||||
streamWriter.writeAttribute("lat", lat + "");
|
streamWriter.writeAttribute("lat", lat + "");
|
||||||
streamWriter.writeAttribute("lon", lon + "");
|
streamWriter.writeAttribute("lon", lon + "");
|
||||||
|
|
||||||
|
@ -588,6 +593,13 @@ public class WikiIndexer {
|
||||||
streamWriter.writeAttribute("v", title.toString());
|
streamWriter.writeAttribute("v", title.toString());
|
||||||
streamWriter.writeEndElement();
|
streamWriter.writeEndElement();
|
||||||
|
|
||||||
|
streamWriter.writeCharacters("\n ");
|
||||||
|
streamWriter.writeStartElement("tag");
|
||||||
|
streamWriter.writeAttribute("k", "wikipedia");
|
||||||
|
streamWriter.writeAttribute("v", locale + ":"+title.toString());
|
||||||
|
streamWriter.writeEndElement();
|
||||||
|
|
||||||
|
|
||||||
streamWriter.writeCharacters("\n ");
|
streamWriter.writeCharacters("\n ");
|
||||||
streamWriter.writeStartElement("tag");
|
streamWriter.writeStartElement("tag");
|
||||||
streamWriter.writeAttribute("k", "osmwiki");
|
streamWriter.writeAttribute("k", "osmwiki");
|
||||||
|
|
|
@ -59,7 +59,7 @@ public class ConsoleProgressImplementation implements IProgress {
|
||||||
this.lastPercentPrint = getCurrentPercent();
|
this.lastPercentPrint = getCurrentPercent();
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
if(now - lastTimePrinted >= deltaTimeToPrint || deltaTime < 0){
|
if(now - lastTimePrinted >= deltaTimeToPrint || deltaTime < 0){
|
||||||
log.info(MessageFormat.format("Done {0} %.", getCurrentPercent())); //$NON-NLS-1$
|
log.debug(MessageFormat.format("Done {0} %.", getCurrentPercent())); //$NON-NLS-1$
|
||||||
lastTimePrinted = now;
|
lastTimePrinted = now;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ public class ConsoleProgressImplementation implements IProgress {
|
||||||
public void startTask(String taskName, int work) {
|
public void startTask(String taskName, int work) {
|
||||||
if(!Algoritms.objectEquals(currentTask, taskName)){
|
if(!Algoritms.objectEquals(currentTask, taskName)){
|
||||||
this.currentTask = taskName;
|
this.currentTask = taskName;
|
||||||
log.info("Memory before task exec: " + Runtime.getRuntime().totalMemory() + " free : " + Runtime.getRuntime().freeMemory()); //$NON-NLS-1$ //$NON-NLS-2$
|
log.debug("Memory before task exec: " + Runtime.getRuntime().totalMemory() + " free : " + Runtime.getRuntime().freeMemory()); //$NON-NLS-1$ //$NON-NLS-2$
|
||||||
if (previousTaskStarted == 0) {
|
if (previousTaskStarted == 0) {
|
||||||
log.info(taskName + " started - " + work); //$NON-NLS-1$
|
log.info(taskName + " started - " + work); //$NON-NLS-1$
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in a new issue