]*?>(.*?)'si"; preg_match($title, $content, $matches); if(isset($matches[1])) { return $matches[1]; } return ""; } // Выбираем основу слова function clean ($word) { return Stemmer::russian(strtolower($word)); } function process ($base, $files) { $path = new Path($base); // Список документов foreach ($path->getContentRec($files) as $file) { $content = file_get_contents ($file); $text = stripText($content); // $title = self::getTitle ($content); $title = pathinfo($file, PATHINFO_BASENAME); // echo $file, "\n"; // Список слов в документе $list = tokenize($text); foreach ($list as $word) { $preword = self::clean($word); if (isset($this->index[$preword])) { $index = $this->index[$preword]; if ( ! in_array ($this->count, $index)) $this->index[$preword] [] = $this->count; } else { // Не записываем слова длинна которых меньше 2 if (strlen($preword) > 1) { $this->index[$preword] = array ($this->count); } } } $this->text [] = array ($title, $path->relPath ($file), $text); $this->count ++; } ksort($this->index); } /** * Сохранение результата поиска */ function saveData ($file) { $file = fopen($file, "w"); // Количество слов и текстов fwrite ($file, pack("SS", count($this->index), count($this->text))); foreach ($this->index as $word => $value) { $length = strlen($word); array_unshift ($value, "SSa*S*", $length, count($value), $word); fwrite($file, call_user_func_array ('pack', $value)); } foreach ($this->text as $text) { fwrite($file, pack("SSSa*a*a*", strlen($text[0]), strlen($text[1]), strlen($text[2]) , $text[0], $text[1], $text[2])); } } } ?>