phplibrary/core/search/index.php
2016-07-14 16:29:26 +03:00

86 lines
No EOL
2.7 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
require_once 'core/search/htmlhelper.php';
require_once 'core/search/stemmer.php';
require_once 'core/path.php';
/**
* Индексирование файлов
*/
class Index
{
const ARRAY_FILE = 0;
const ARRAY_TEXT = 1;
public $index = array ();
public $text = array ();
protected $count = 0;
function getTitle ($content) {
$title = "'<title[^>]*?>(.*?)</title>'si";
preg_match($title, $content, $matches);
if(isset($matches[1])) {
return $matches[1];
}
return "";
}
// Выбираем основу слова
function clean ($word)
{
return Stemmer::russian(strtolower($word));
}
function process ($base, $files)
{
$path = new Path($base);
// Список документов
foreach ($path->getContentRec($files) as $file) {
$content = file_get_contents ($file);
$text = stripText($content);
// $title = self::getTitle ($content);
$title = pathinfo($file, PATHINFO_BASENAME);
// echo $file, "\n";
// Список слов в документе
$list = tokenize($text);
foreach ($list as $word) {
$preword = self::clean($word);
if (isset($this->index[$preword])) {
$index = $this->index[$preword];
if ( ! in_array ($this->count, $index)) $this->index[$preword] [] = $this->count;
} else {
// Не записываем слова длинна которых меньше 2
if (strlen($preword) > 1) {
$this->index[$preword] = array ($this->count);
}
}
}
$this->text [] = array ($title, $path->relPath ($file), $text);
$this->count ++;
}
ksort($this->index);
}
/**
* Сохранение результата поиска
*/
function saveData ($file)
{
$file = fopen($file, "w");
// Количество слов и текстов
fwrite ($file, pack("SS", count($this->index), count($this->text)));
foreach ($this->index as $word => $value) {
$length = strlen($word);
array_unshift ($value, "SSa*S*", $length, count($value), $word);
fwrite($file, call_user_func_array ('pack', $value));
}
foreach ($this->text as $text) {
fwrite($file, pack("SSSa*a*a*",
strlen($text[0]), strlen($text[1]), strlen($text[2])
, $text[0], $text[1], $text[2]));
}
}
}
?>