Update indexes site

This commit is contained in:
Victor Shcherb 2012-01-08 13:05:05 +01:00
parent 1137a34f9a
commit f7d288849f
2 changed files with 131 additions and 123 deletions

View file

@ -331,7 +331,8 @@ public class WikiIndexer {
private void analyzeTextForGeoInfo() throws XMLStreamException { private void analyzeTextForGeoInfo() throws XMLStreamException {
// fast precheck // fast precheck
if(title.toString().endsWith("/doc") || title.toString().startsWith("Шаблон:")) { if(title.toString().endsWith("/doc") || title.toString().startsWith("Шаблон:") ||
title.toString().startsWith("Template:")) {
// Looks as template article no information in it // Looks as template article no information in it
return; return;
} }

View file

@ -1,138 +1,145 @@
<?php <?php
function updateGoogleCodeIndexes($update=false) { function updateGoogleCodeIndexes($update=false) {
$localFileName='indexes.xml'; $localFileName='indexes.xml';
// check each 30 minutes // check each 30 minutes
if(!$update && file_exists($localFileName) && time() - filemtime($localFileName) < 60 * 30) { if(!$update && file_exists($localFileName) && time() - filemtime($localFileName) < 60 * 30) {
return; return;
} }
$dom = new DomDocument(); $dom = new DomDocument();
$output = new DOMDocument(); $output = new DOMDocument();
$output->formatOutput = true; $output->formatOutput = true;
$outputIndexes = $output->createElement( "osmand_regions" ); $outputIndexes = $output->createElement( "osmand_regions" );
$outputIndexes->setAttribute('mapversion','1'); $outputIndexes->setAttribute('mapversion','1');
$output->appendChild( $outputIndexes ); $output->appendChild( $outputIndexes );
$st = 0; $st = 0;
$num = 200; $num = 200;
$count = 0; $count = 0;
$mapNodes = array(); $mapNodes = array();
/// 1. dlownload indexes from googlecode /// 1. dlownload indexes from googlecode
while($st != -1){ while($st != -1){
$dom->loadHTMLFile("http://code.google.com/p/osmand/downloads/list?num=".$num."&start=".$st."&colspec=Filename+Summary+Uploaded+Size"); $dom->loadHTMLFile("http://code.google.com/p/osmand/downloads/list?num=".$num."&start=".$st."&colspec=Filename+Summary+Uploaded+Size");
$count ++; $count ++;
$xpath = new DOMXpath($dom); $xpath = new DOMXpath($dom);
$xpathI = new DOMXpath($dom); $xpathI = new DOMXpath($dom);
$res = $xpath->query('//td[contains(@class,"col_0")]'); $res = $xpath->query('//td[contains(@class,"col_0")]');
if($res && $res->length > 0) { if($res && $res->length > 0) {
foreach($res as $node) { foreach($res as $node) {
$indexName = trim($node->nodeValue); $indexName = trim($node->nodeValue);
$s = $xpathI->query('td[contains(@class,"col_1")]/a[1]', $node->parentNode); $s = $xpathI->query('td[contains(@class,"col_1")]/a[1]', $node->parentNode);
if(!$s || $s->length == 0) { if(!$s || $s->length == 0) {
continue; continue;
} }
$description = $s->item(0)->nodeValue; $description = $s->item(0)->nodeValue;
$i = strpos($description,"{"); $i = strpos($description,"{");
if(!$i) { if(!$i) {
continue; continue;
} }
$i1 = strpos($description,":", $i); $i1 = strpos($description,":", $i);
$i2 = stripos($description,"mb", $i1); $i2 = stripos($description,"mb", $i1);
if(!$i2) { if(!$i2) {
$i2 = strpos($description,"}", $i1); $i2 = strpos($description,"}", $i1);
} }
$date = trim(substr($description, $i + 1, $i1 - $i -1)); $date = trim(substr($description, $i + 1, $i1 - $i -1));
$size = trim(substr($description, $i1 + 1, $i2 - $i1 -1)); $size = trim(substr($description, $i1 + 1, $i2 - $i1 -1));
$description = trim(substr($description, 0, $i)); $description = trim(substr($description, 0, $i));
if(strpos($indexName,"voice.zip") || strpos($indexName,"_1.poi.zip") || if(strpos($indexName,"voice.zip") || strpos($indexName,"_1.poi.zip") ||
strpos($indexName,"_1.poi.odb") || strpos($indexName,"_1.obf")) { strpos($indexName,"_1.poi.odb") || strpos($indexName,"_1.obf")) {
$ipart = strpos($indexName,"zip-"); $ipart = strpos($indexName,"zip-");
$part = 1; $part = 1;
$base = $indexName; $base = $indexName;
if($ipart) { if($ipart) {
$part = (int)substr($indexName, $ipart+4); $part = (int)substr($indexName, $ipart+4);
$base = substr($indexName, 0, $ipart+3); $base = substr($indexName, 0, $ipart+3);
if(isset($mapNodes[$base])) { if(isset($mapNodes[$base])) {
$out = $mapNodes[$base]; $out = $mapNodes[$base];
} else { } else {
$out = $output->createElement( "multiregion" ); $out = $output->createElement( "multiregion" );
$out -> setAttribute("parts", $part); $out -> setAttribute("parts", $part);
$mapNodes[$base] = $out; $mapNodes[$base] = $out;
$out -> setAttribute("date", $date); $out -> setAttribute("date", $date);
$out -> setAttribute("size", $size); $out -> setAttribute("size", $size);
$out -> setAttribute("name", $base); $out -> setAttribute("name", $base);
$out -> setAttribute("description", $description); $out -> setAttribute("description", $description);
$outputIndexes->appendChild($out); $outputIndexes->appendChild($out);
} }
if( (int) $out -> getAttribute("parts") < $part){ if( (int) $out -> getAttribute("parts") < $part){
$out -> setAttribute("parts", $part); $out -> setAttribute("parts", $part);
} }
} else { } else {
$out = $output->createElement( "region" ); $out = $output->createElement( "region" );
$out -> setAttribute("date", $date); $out -> setAttribute("date", $date);
$out -> setAttribute("size", $size); $out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName); $out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description); $out -> setAttribute("description", $description);
$outputIndexes->appendChild($out); $outputIndexes->appendChild($out);
$mapNodes[$indexName] = $out; $mapNodes[$indexName] = $out;
} }
} }
} }
$st += $num; $st += $num;
} else { } else {
$st = -1; $st = -1;
} }
}
/// 2. append local indexes
$local = new DomDocument();
// Open a known directory, and proceed to read its contents
$dir='indexes/';
if (is_dir($dir)) {
if ($dh = opendir($dir)) {
$zip = new ZipArchive();
while (($file = readdir($dh)) !== false) {
$filename = $dir . $file ; //"./test112.zip";
//print("processing file:" . $filename . "\n");
if ($zip->open($filename,ZIPARCHIVE::CHECKCONS)!==TRUE) {
//echo exit("cannot open <$filename>\n");
//print($filename . " cannot open as zip\n");
continue;
}
$indexName=$file;
if (isset($mapNodes[$indexName])) {
//print($indexName . " is listed already, skipping\n");
continue;
}
$description = $zip->getCommentIndex(0);
$stat = $zip->statIndex( 0 );
$date= date('d.m.Y',$stat['mtime']);
$size= number_format((filesize($filename) / (1024.0*1024.0)), 1, '.', '');
$zip->close();
$out = $output->createElement( "region" );
$out -> setAttribute("date", $date);
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description);
$outputIndexes->appendChild($out);
//$mapNodes[$indexName] = $out;
} }
closedir($dh); /// 2. append local indexes
} $local = new DomDocument();
} else { // Open a known directory, and proceed to read its contents
print($dir . " not a directory!\n"); $dir='indexes/';
} if (is_dir($dir)) {
if ($dh = opendir($dir)) {
$zip = new ZipArchive();
while (($file = readdir($dh)) !== false) {
$filename = $dir . $file ; //"./test112.zip";
//print("processing file:" . $filename . "\n");
if ($zip->open($filename,ZIPARCHIVE::CHECKCONS)!==TRUE) {
// echo exit("cannot open <$filename>\n");
// print($filename . " cannot open as zip\n");
continue;
}
$indexName=$file;
$output->save($localFileName); $description = $zip->getCommentIndex(0);
$stat = $zip->statIndex( 0 );
$date= date('d.m.Y',$stat['mtime']);
$size= number_format((filesize($filename) / (1024.0*1024.0)), 1, '.', '');
$zip->close();
if (isset($mapNodes[$indexName])) {
$exdate = DateTime::createFromFormat('d.m.Y', $mapNodes[$indexName]->getAttribute("date"));
if($stat['mtime'] < $exdate->getTimestamp()) {
continue;
}
$out = $mapNodes[$indexName];
} else {
$out = $output->createElement( "region" );
$outputIndexes->appendChild($out);
}
$out -> setAttribute("date", $date);
$out -> setAttribute("local", "true");
$out -> setAttribute("size", $size);
$out -> setAttribute("name", $indexName);
$out -> setAttribute("description", $description);
//$mapNodes[$indexName] = $out;
}
closedir($dh);
}
} else {
print($dir . " not a directory!\n");
}
$output->save($localFileName);
} }
?> ?>