Update indexes site

This commit is contained in:
Victor Shcherb 2012-01-08 13:05:05 +01:00
parent 1137a34f9a
commit f7d288849f
2 changed files with 131 additions and 123 deletions

View file

@ -331,7 +331,8 @@ public class WikiIndexer {
private void analyzeTextForGeoInfo() throws XMLStreamException {
// fast precheck
if(title.toString().endsWith("/doc") || title.toString().startsWith("Шаблон:")) {
if(title.toString().endsWith("/doc") || title.toString().startsWith("Шаблон:") ||
title.toString().startsWith("Template:")) {
// Looks as template article no information in it
return;
}

View file

@ -1,138 +1,145 @@
<?php
function updateGoogleCodeIndexes($update=false) {
$localFileName='indexes.xml';
// check each 30 minutes
if(!$update && file_exists($localFileName) && time() - filemtime($localFileName) < 60 * 30) {
return;
}
function updateGoogleCodeIndexes($update=false) {
$localFileName='indexes.xml';
// check each 30 minutes
if(!$update && file_exists($localFileName) && time() - filemtime($localFileName) < 60 * 30) {
return;
}
$dom = new DomDocument();
$dom = new DomDocument();
$output = new DOMDocument();
$output->formatOutput = true;
$outputIndexes = $output->createElement( "osmand_regions" );
$outputIndexes->setAttribute('mapversion','1');
$output->appendChild( $outputIndexes );
$output = new DOMDocument();
$output->formatOutput = true;
$outputIndexes = $output->createElement( "osmand_regions" );
$outputIndexes->setAttribute('mapversion','1');
$output->appendChild( $outputIndexes );
$st = 0;
$num = 200;
$count = 0;
$mapNodes = array();
/// 1. dlownload indexes from googlecode
while($st != -1){
$dom->loadHTMLFile("http://code.google.com/p/osmand/downloads/list?num=".$num."&start=".$st."&colspec=Filename+Summary+Uploaded+Size");
$st = 0;
$num = 200;
$count = 0;
$mapNodes = array();
/// 1. dlownload indexes from googlecode
while($st != -1){
$dom->loadHTMLFile("http://code.google.com/p/osmand/downloads/list?num=".$num."&start=".$st."&colspec=Filename+Summary+Uploaded+Size");
$count ++;
$xpath = new DOMXpath($dom);
$xpathI = new DOMXpath($dom);
$res = $xpath->query('//td[contains(@class,"col_0")]');
if($res && $res->length > 0) {
foreach($res as $node) {
$indexName = trim($node->nodeValue);
$s = $xpathI->query('td[contains(@class,"col_1")]/a[1]', $node->parentNode);
if(!$s || $s->length == 0) {
continue;
}
$description = $s->item(0)->nodeValue;
$i = strpos($description,"{");
if(!$i) {
continue;
}
$i1 = strpos($description,":", $i);
$i2 = stripos($description,"mb", $i1);
if(!$i2) {
$i2 = strpos($description,"}", $i1);
}
$date = trim(substr($description, $i + 1, $i1 - $i -1));
$size = trim(substr($description, $i1 + 1, $i2 - $i1 -1));
$description = trim(substr($description, 0, $i));
if(strpos($indexName,"voice.zip") || strpos($indexName,"_1.poi.zip") ||
strpos($indexName,"_1.poi.odb") || strpos($indexName,"_1.obf")) {
$ipart = strpos($indexName,"zip-");
$part = 1;
$base = $indexName;
if($ipart) {
$part = (int)substr($indexName, $ipart+4);
$base = substr($indexName, 0, $ipart+3);
if(isset($mapNodes[$base])) {
$out = $mapNodes[$base];
} else {
$out = $output->createElement( "multiregion" );
$out -> setAttribute("parts", $part);
$mapNodes[$base] = $out;
$out -> setAttribute("date", $date);
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $base);
$out -> setAttribute("description", $description);
$outputIndexes->appendChild($out);
}
if( (int) $out -> getAttribute("parts") < $part){
$out -> setAttribute("parts", $part);
}
} else {
$out = $output->createElement( "region" );
$out -> setAttribute("date", $date);
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description);
$outputIndexes->appendChild($out);
$mapNodes[$indexName] = $out;
}
if($res && $res->length > 0) {
foreach($res as $node) {
$indexName = trim($node->nodeValue);
$s = $xpathI->query('td[contains(@class,"col_1")]/a[1]', $node->parentNode);
if(!$s || $s->length == 0) {
continue;
}
$description = $s->item(0)->nodeValue;
$i = strpos($description,"{");
if(!$i) {
continue;
}
$i1 = strpos($description,":", $i);
$i2 = stripos($description,"mb", $i1);
if(!$i2) {
$i2 = strpos($description,"}", $i1);
}
$date = trim(substr($description, $i + 1, $i1 - $i -1));
$size = trim(substr($description, $i1 + 1, $i2 - $i1 -1));
$description = trim(substr($description, 0, $i));
}
}
$st += $num;
if(strpos($indexName,"voice.zip") || strpos($indexName,"_1.poi.zip") ||
strpos($indexName,"_1.poi.odb") || strpos($indexName,"_1.obf")) {
$ipart = strpos($indexName,"zip-");
$part = 1;
$base = $indexName;
if($ipart) {
$part = (int)substr($indexName, $ipart+4);
$base = substr($indexName, 0, $ipart+3);
if(isset($mapNodes[$base])) {
$out = $mapNodes[$base];
} else {
$out = $output->createElement( "multiregion" );
$out -> setAttribute("parts", $part);
$mapNodes[$base] = $out;
$out -> setAttribute("date", $date);
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $base);
$out -> setAttribute("description", $description);
$outputIndexes->appendChild($out);
}
if( (int) $out -> getAttribute("parts") < $part){
$out -> setAttribute("parts", $part);
}
} else {
$out = $output->createElement( "region" );
$out -> setAttribute("date", $date);
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description);
$outputIndexes->appendChild($out);
$mapNodes[$indexName] = $out;
}
}
}
$st += $num;
} else {
$st = -1;
$st = -1;
}
}
/// 2. append local indexes
$local = new DomDocument();
// Open a known directory, and proceed to read its contents
$dir='indexes/';
if (is_dir($dir)) {
if ($dh = opendir($dir)) {
$zip = new ZipArchive();
while (($file = readdir($dh)) !== false) {
$filename = $dir . $file ; //"./test112.zip";
//print("processing file:" . $filename . "\n");
if ($zip->open($filename,ZIPARCHIVE::CHECKCONS)!==TRUE) {
//echo exit("cannot open <$filename>\n");
//print($filename . " cannot open as zip\n");
continue;
}
$indexName=$file;
if (isset($mapNodes[$indexName])) {
//print($indexName . " is listed already, skipping\n");
continue;
}
$description = $zip->getCommentIndex(0);
$stat = $zip->statIndex( 0 );
$date= date('d.m.Y',$stat['mtime']);
$size= number_format((filesize($filename) / (1024.0*1024.0)), 1, '.', '');
$zip->close();
$out = $output->createElement( "region" );
$out -> setAttribute("date", $date);
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description);
$outputIndexes->appendChild($out);
//$mapNodes[$indexName] = $out;
}
closedir($dh);
}
} else {
print($dir . " not a directory!\n");
}
$output->save($localFileName);
/// 2. append local indexes
$local = new DomDocument();
// Open a known directory, and proceed to read its contents
$dir='indexes/';
if (is_dir($dir)) {
if ($dh = opendir($dir)) {
$zip = new ZipArchive();
while (($file = readdir($dh)) !== false) {
$filename = $dir . $file ; //"./test112.zip";
//print("processing file:" . $filename . "\n");
if ($zip->open($filename,ZIPARCHIVE::CHECKCONS)!==TRUE) {
// echo exit("cannot open <$filename>\n");
// print($filename . " cannot open as zip\n");
continue;
}
$indexName=$file;
$description = $zip->getCommentIndex(0);
$stat = $zip->statIndex( 0 );
$date= date('d.m.Y',$stat['mtime']);
$size= number_format((filesize($filename) / (1024.0*1024.0)), 1, '.', '');
$zip->close();
if (isset($mapNodes[$indexName])) {
$exdate = DateTime::createFromFormat('d.m.Y', $mapNodes[$indexName]->getAttribute("date"));
if($stat['mtime'] < $exdate->getTimestamp()) {
continue;
}
$out = $mapNodes[$indexName];
} else {
$out = $output->createElement( "region" );
$outputIndexes->appendChild($out);
}
$out -> setAttribute("date", $date);
$out -> setAttribute("local", "true");
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description);
//$mapNodes[$indexName] = $out;
}
closedir($dh);
}
} else {
print($dir . " not a directory!\n");
}
$output->save($localFileName);
}
?>