Add POI text box and rewrite WikiIndexer

This commit is contained in:
Victor Shcherb 2012-01-08 23:20:30 +01:00
parent c89e935431
commit 2914df8cb7
5 changed files with 217 additions and 121 deletions

View file

@ -11,5 +11,6 @@
<classpathentry kind="lib" path="lib/jleveldb-v01.jar"/>
<classpathentry kind="lib" path="lib/sqlite-jdbc-3.7.6.3-20110609.081603-3.jar"/>
<classpathentry kind="lib" path="lib/jsch-20120101.jar"/>
<classpathentry kind="lib" path="lib/mysql-connector-java-5.1.18-bin.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

View file

@ -8,6 +8,10 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import javax.xml.parsers.ParserConfigurationException;
@ -23,24 +27,30 @@ import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import net.osmand.Algoritms;
import net.osmand.LogUtil;
import net.osmand.Version;
import net.osmand.data.preparation.IndexCreator;
import net.osmand.impl.ConsoleProgressImplementation;
// http://toolserver.org/~dispenser/dumps/coord_commonswiki.sql.gz
// select * from coord_commonswiki limit 10;
public class WikiIndexer {
private static final Log log = LogUtil.getLog(WikiIndexer.class);
private final File srcPath;
private final File workPath;
private final File targetPath;
private final String userName = "jenkins";
private final String password = "jenkins";
private final String url = "jdbc:mysql://localhost/wiki";
public static class WikiIndexerException extends Exception {
private static final long serialVersionUID = 1L;
public WikiIndexerException(String name) {
super(name);
}
public WikiIndexerException(String string, Exception e) {
super(string, e);
}
@ -69,24 +79,41 @@ public class WikiIndexer {
private static File extractDirectory(String[] args, int ind) throws WikiIndexerException {
if (args.length <= ind) {
throw new WikiIndexerException("Usage: WikiIndexer src_directory target_directory work_directory [--description={full|normal|minimum}]" + " missing " + (ind + 1));
throw new WikiIndexerException(
"Usage: WikiIndexer src_directory target_directory work_directory [--description={full|normal|minimum}]" + " missing "
+ (ind + 1));
} else {
File fs = new File(args[ind]);
fs.mkdir();
if(!fs.exists() || !fs.isDirectory()) {
if (!fs.exists() || !fs.isDirectory()) {
throw new WikiIndexerException("Specified directory doesn't exist : " + args[ind]);
}
return fs;
}
}
public void run() {
public void run() throws WikiIndexerException {
log.info("Obtain database connection");
Connection conn;
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
conn = DriverManager.getConnection(url, userName, password);
log.info("Database connection established");
} catch (InstantiationException e1) {
throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
} catch (IllegalAccessException e1) {
throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
} catch (ClassNotFoundException e1) {
throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
} catch (SQLException e1) {
throw new WikiIndexerException("Could not establish connection to " + url + " with " + userName, e1);
}
File[] listFiles = srcPath.listFiles();
for(File f : listFiles) {
for (File f : listFiles) {
try {
if (f.isFile() && (f.getName().endsWith(".xml") || f.getName().endsWith(".xml.bz2"))) {
log.info("About to process " + f.getName());
File outFile = process(f);
File outFile = process(f, conn);
if (outFile != null) {
IndexCreator ic = new IndexCreator(workPath);
@ -100,30 +127,35 @@ public class WikiIndexer {
}
}
} catch (WikiIndexerException e) {
log.error("Error processing "+f.getName(), e);
log.error("Error processing " + f.getName(), e);
} catch (RuntimeException e) {
log.error("Error processing "+f.getName(), e);
log.error("Error processing " + f.getName(), e);
} catch (IOException e) {
log.error("Error processing "+f.getName(), e);
log.error("Error processing " + f.getName(), e);
} catch (SAXException e) {
log.error("Error processing "+f.getName(), e);
log.error("Error processing " + f.getName(), e);
} catch (SQLException e) {
log.error("Error processing "+f.getName(), e);
log.error("Error processing " + f.getName(), e);
} catch (InterruptedException e) {
log.error("Error processing "+f.getName(), e);
log.error("Error processing " + f.getName(), e);
}
}
}
protected File process(File f) throws WikiIndexerException {
protected File process(File f, Connection dbConnection) throws WikiIndexerException {
InputStream fi = null;
BufferedWriter out = null;
try {
int in = f.getName().indexOf('.');
String wikiLocale = f.getName().substring(in + 1, f.getName().indexOf('.', in));
log.info("Locale for file " + wikiLocale);
PreparedStatement statement = dbConnection.prepareStatement("SELECT gc_lat, gc_lon, gc_type FROM coord_"+wikiLocale+"wiki WHERE gc_from=?");
File osmOut = new File(workPath, f.getName().substring(0, in) + ".osm");
fi = new BufferedInputStream(new FileInputStream(f));
InputStream progressStream = fi;
if(f.getName().endsWith(".bz2")){
if (f.getName().endsWith(".bz2")) {
if (fi.read() != 'B' || fi.read() != 'Z') {
throw new RuntimeException("The source stream must start with the characters BZ if it is to be read as a BZip2 stream."); //$NON-NLS-1$
} else {
@ -133,10 +165,11 @@ public class WikiIndexer {
ConsoleProgressImplementation progress = new ConsoleProgressImplementation();
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(osmOut), "UTF-8"));
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream);
WikiOsmHandler wikiOsmHandler = new WikiOsmHandler(saxParser, out, progress, progressStream, statement);
saxParser.parse(fi, wikiOsmHandler);
if(wikiOsmHandler.getCount() < 1){
statement.close();
if (wikiOsmHandler.getCount() < 1) {
return null;
}
return osmOut;
@ -148,13 +181,14 @@ public class WikiIndexer {
throw new WikiIndexerException("Parse exception", e);
} catch (XMLStreamException e) {
throw new WikiIndexerException("Parse exception", e);
} catch (SQLException e) {
throw new WikiIndexerException("Database exception or locale configuration problem", e);
} finally {
Algoritms.closeStream(out);
Algoritms.closeStream(fi);
}
}
public class WikiOsmHandler extends DefaultHandler {
long id = 1;
private final SAXParser saxParser;
@ -163,15 +197,24 @@ public class WikiIndexer {
private StringBuilder title = new StringBuilder();
private StringBuilder text = new StringBuilder();
private StringBuilder pageId = new StringBuilder();
private float clat = 0;
private float clon = 0;
private String subcategory = null;
private boolean parseText = false;
private final ConsoleProgressImplementation progress;
private final InputStream progIS;
private XMLStreamWriter streamWriter;
private final PreparedStatement dbStat;
WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS) throws IOException, XMLStreamException {
WikiOsmHandler(SAXParser saxParser, BufferedWriter outOsm, ConsoleProgressImplementation progress, InputStream progIS,
PreparedStatement dbStat)
throws IOException, XMLStreamException {
this.saxParser = saxParser;
this.progress = progress;
this.progIS = progIS;
this.dbStat = dbStat;
XMLOutputFactory xof = XMLOutputFactory.newInstance();
streamWriter = xof.createXMLStreamWriter(outOsm);
streamWriter.writeStartDocument();
@ -180,7 +223,6 @@ public class WikiIndexer {
streamWriter.writeAttribute("version", "0.6");
streamWriter.writeAttribute("generator", Version.APP_MAP_CREATOR_VERSION);
progress.startTask("Parse wiki xml", progIS.available());
}
@ -194,21 +236,25 @@ public class WikiIndexer {
if (!page) {
page = name.equals("page");
} else {
if(name.equals("title")) {
if (name.equals("title")) {
title.setLength(0);
ctext = title;
} else if(name.equals("text")) {
} else if (name.equals("text")) {
if(parseText) {
text.setLength(0);
ctext = text;
}
} else if (name.equals("id")) {
pageId.setLength(0);
ctext = pageId;
}
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (page) {
if(ctext != null) {
if (ctext != null) {
ctext.append(ch, start, length);
}
}
@ -219,13 +265,28 @@ public class WikiIndexer {
String name = saxParser.isNamespaceAware() ? localName : qName;
try {
if (page) {
if(name.equals("page")) {
if (name.equals("page")) {
page = false;
parseText = false;
progress.remaining(progIS.available());
} else if(name.equals("title")) {
} else if (name.equals("title")) {
ctext = null;
} else if(name.equals("text")) {
analyzeTextForGeoInfo();
} else if (name.equals("id")) {
ctext = null;
long pid = Long.parseLong(pageId.toString());
dbStat.setLong(1, pid);
ResultSet rs = dbStat.executeQuery();
parseText = false;
if(rs.next()) {
parseText = true;
clat = rs.getFloat(1);
clon = rs.getFloat(2);
subcategory = (rs.getString(3) + "").toLowerCase();
}
} else if (name.equals("text")) {
if(parseText) {
analyzeTextForGeoInfoNew();
}
ctext = null;
}
}
@ -233,16 +294,18 @@ public class WikiIndexer {
throw new SAXException(e);
} catch (XMLStreamException e) {
throw new SAXException(e);
} catch (SQLException e) {
throw new SAXException(e);
}
}
private String readProperty(String prop, int s, int e){
private String readProperty(String prop, int s, int e) {
int res = -1;
for (int i = s; i < e - prop.length(); i++) {
if(prop.charAt(0) == text.charAt(i)) {
if (prop.charAt(0) == text.charAt(i)) {
boolean neq = false;
for (int j = 0; j < prop.length(); j++) {
if(prop.charAt(j) != text.charAt(i + j)) {
if (prop.charAt(j) != text.charAt(i + j)) {
neq = true;
break;
}
@ -253,7 +316,7 @@ public class WikiIndexer {
}
}
}
if(res == -1){
if (res == -1) {
return null;
}
int sr = -1;
@ -267,12 +330,12 @@ public class WikiIndexer {
sr = i + 1;
}
}
if(sr != -1) {
if (sr != -1) {
String result = text.substring(sr, se);
int commSt = result.indexOf("<!--");
if(commSt != -1) {
if (commSt != -1) {
int commEnd = result.indexOf("-->");
if(commEnd == -1){
if (commEnd == -1) {
commEnd = result.length();
} else {
commEnd += "-->".length();
@ -291,7 +354,7 @@ public class WikiIndexer {
private int findOpenBrackets(int i) {
int h = text.indexOf("{{", i);
boolean check = true;
while(check){
while (check) {
int startComment = text.indexOf("<!--", i);
check = false;
if (startComment != -1 && startComment < h) {
@ -303,41 +366,41 @@ public class WikiIndexer {
return h;
}
private int findClosedBrackets(int i){
if(i == -1){
private int findClosedBrackets(int i) {
if (i == -1) {
return -1;
}
int stack = 1;
int h = text.indexOf("{{", i+2);
int e = text.indexOf("}}", i+2);
while(stack != 0 && e != -1) {
if(h!= -1 && h<e){
int h = text.indexOf("{{", i + 2);
int e = text.indexOf("}}", i + 2);
while (stack != 0 && e != -1) {
if (h != -1 && h < e) {
i = h;
stack++;
} else {
i = e;
stack--;
}
if(stack != 0) {
h = text.indexOf("{{", i+2);
e = text.indexOf("}}", i+2);
if (stack != 0) {
h = text.indexOf("{{", i + 2);
e = text.indexOf("}}", i + 2);
}
}
if(stack == 0){
if (stack == 0) {
return e;
}
return -1;
}
@Deprecated
private void analyzeTextForGeoInfo() throws XMLStreamException {
// fast precheck
if(title.toString().endsWith("/doc") || title.toString().startsWith("Шаблон:") ||
title.toString().startsWith("Template:")) {
if (title.toString().endsWith("/doc") || title.toString().startsWith("Шаблон:") || title.toString().startsWith("Template:")) {
// Looks as template article no information in it
return;
}
int ls = text.indexOf("lat_dir");
if(ls != -1 && text.charAt(ls + 1 + "lat_dir".length()) != '|') {
if (ls != -1 && text.charAt(ls + 1 + "lat_dir".length()) != '|') {
float lat = 0;
float lon = 0;
String subcategory = "";
@ -365,7 +428,7 @@ public class WikiIndexer {
String lon_dir = readProperty("lon_dir", h, e);
String lat_dg = readProperty("lat_deg", h, e);
String lon_dg = readProperty("lon_deg", h, e);
if(lon_dg == null || lat_dg == null || lat_dg.length() == 0 || lon_dg.length() == 0){
if (lon_dg == null || lat_dg == null || lat_dg.length() == 0 || lon_dg.length() == 0) {
return;
}
float lat_deg = Float.parseFloat(lat_dg);
@ -374,8 +437,8 @@ public class WikiIndexer {
float lon_min = zeroParseFloat(readProperty("lon_min", h, e));
float lat_sec = zeroParseFloat(readProperty("lat_sec", h, e));
float lon_sec = zeroParseFloat(readProperty("lon_sec", h, e));
lat = (("S".equals(lat_dir))? -1 : 1) * (lat_deg + (lat_min + lat_sec/60)/60);
lon = (("W".equals(lon_dir))? -1 : 1) * (lon_deg + (lon_min + lon_sec/60)/60);
lat = (("S".equals(lat_dir)) ? -1 : 1) * (lat_deg + (lat_min + lat_sec / 60) / 60);
lon = (("W".equals(lon_dir)) ? -1 : 1) * (lon_deg + (lon_min + lon_sec / 60) / 60);
} catch (RuntimeException es) {
log.error("Article " + title, es);
return;
@ -389,34 +452,65 @@ public class WikiIndexer {
}
// Special case
// 4. Parse main subcategory name
processDescription(description, e + 3);
if(description.length() > 0) {
if (description.length() > 0) {
writeNode(lat, lon, subcategory, description);
}
}
}
private int checkAndParse(int i, String start, String end, StringBuilder d, boolean add){
if(text.charAt(i) != start.charAt(0)) {
private void analyzeTextForGeoInfoNew() throws XMLStreamException {
// fast precheck
StringBuilder description = new StringBuilder();
int beg = 0;
int h = findOpenBrackets(beg);
// 1. Find main header section {{ ... lat, lon }}
while (h != -1 && text.substring(beg, h).trim().length() == 0 ) {
beg = findClosedBrackets(h);
if(beg == -1){
return;
}
beg += 2;
h = findOpenBrackets(beg);
}
// 3. Parse main subcategory name
// for (int j = h + 2; j < e; j++) {
// if (Character.isWhitespace(text.charAt(j)) || text.charAt(j) == '|') {
// subcategory = text.substring(h + 2, j).trim();
// break;
// }
// }
// Special case
// 4. Parse main subcategory name
processDescription(description, beg);
if (description.length() > 0) {
writeNode(clat, clon, subcategory, description);
}
}
private int checkAndParse(int i, String start, String end, StringBuilder d, boolean add) {
if (text.charAt(i) != start.charAt(0)) {
return -1;
}
for (int j = 1 ; j < start.length(); j++) {
if(text.charAt(i + j) != start.charAt(j)){
for (int j = 1; j < start.length(); j++) {
if (text.charAt(i + j) != start.charAt(j)) {
return -1;
}
}
int st = i+start.length();
int st = i + start.length();
int en = text.length();
boolean colon = false;
for (int j = i + start.length(); j < text.length(); j++) {
if (text.charAt(j) == '|') {
st = j + 1;
if(colon){
if (colon) {
// Special case to prevent adding
// [[File:av.png|thumb|220|220]]
add = false;
@ -439,35 +533,34 @@ public class WikiIndexer {
}
}
if(add){
if (add) {
d.append(text, st, en);
}
return en + end.length();
}
private void processDescription(StringBuilder description, int start) {
for (int j = start ; j < text.length();) {
if (text.charAt(j) == '=' && text.charAt(j+1) == '=') {
for (int j = start; j < text.length();) {
if (text.charAt(j) == '=' && text.charAt(j + 1) == '=') {
break;
} else if (text.charAt(j) == '\n' && j - start > 2048) {
break;
} else {
int r = -1;
if(r == -1) {
if (r == -1) {
r = checkAndParse(j, "<ref", "</ref>", description, false);
}
if (r == -1) {
r = checkAndParse(j, "[[", "]]", description, true);
}
if(r == -1) {
if (r == -1) {
r = checkAndParse(j, "{{", "}}", description, true);
}
if(r == -1) {
r = checkAndParse(j, "''", "''", description,true);
if (r == -1) {
r = checkAndParse(j, "''", "''", description, true);
}
if(r == -1) {
if (r == -1) {
description.append(text.charAt(j));
j++;
} else {
@ -481,8 +574,8 @@ public class WikiIndexer {
streamWriter.writeCharacters("\n");
streamWriter.writeStartElement("node");
streamWriter.writeAttribute("id", "-" + id++);
streamWriter.writeAttribute("lat", lat+"");
streamWriter.writeAttribute("lon", lon+"");
streamWriter.writeAttribute("lat", lat + "");
streamWriter.writeAttribute("lon", lon + "");
streamWriter.writeCharacters("\n ");
streamWriter.writeStartElement("tag");

View file

@ -2,7 +2,6 @@ package net.osmand.plus.activities;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.MessageFormat;
import java.util.Locale;
import java.util.StringTokenizer;
@ -197,7 +196,7 @@ public class NavigatePointActivity extends Activity implements SearchActivityChi
}
step++;
}
} else if (ch == '.') {
} else if (ch == '.' || ch == ':' ) {
// do nothing here
} else {
if (step == 1) {

View file

@ -287,9 +287,7 @@ public class POIMapLayer extends OsmandMapLayer implements ContextMenuLayer.ICon
@Override
public OnClickListener getActionListener(List<String> actionsList, Object o) {
final Amenity a = (Amenity) o;
actionsList.add(this.view.getResources().getString(R.string.poi_context_menu_modify));
actionsList.add(this.view.getResources().getString(R.string.poi_context_menu_delete));
int ind = 2;
int ind = 0;
final int phoneIndex = a.getPhone() != null ? ind++ : -1;
final int siteIndex = a.getSite() != null ? ind++ : -1;
final int descriptionIndex = a.getDescription() != null ? ind++ : -1;
@ -302,14 +300,19 @@ public class POIMapLayer extends OsmandMapLayer implements ContextMenuLayer.ICon
if(a.getDescription() != null){
actionsList.add(this.view.getResources().getString(R.string.poi_context_menu_showdescription));
}
final int modifyInd = ind++;
actionsList.add(this.view.getResources().getString(R.string.poi_context_menu_modify));
final int deleteInd = ind++;
actionsList.add(this.view.getResources().getString(R.string.poi_context_menu_delete));
final EditingPOIActivity edit = activity.getPoiActions();
return new DialogInterface.OnClickListener(){
@Override
public void onClick(DialogInterface dialog, int which) {
if (which == 0) {
if (which == modifyInd) {
edit.showEditDialog(a);
} else if(which == 1) {
} else if(which == deleteInd) {
edit.showDeleteDialog(a);
} else if (which == phoneIndex) {
try {