85 lines
No EOL
2.5 KiB
PHP
85 lines
No EOL
2.5 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Èçâëåêàåò òåêñò èç HTML äîêóìåíòà
|
|
*/
|
|
function stripText($document)
|
|
{
|
|
$search = array("'<script[^>]*?>.*?</script>'si" => "", // strip out javascript
|
|
"'<[\/\!]*?[^<>]*?>'si" => "", // strip out html tags
|
|
"'([\r\n])[\s]+'" => "\\1", // strip out white space
|
|
"'&(quot|#34|#034|#x22);'i" => "\"", // replace html entities
|
|
"'&(amp|#38|#038|#x26);'i" => "&", // added hexadecimal values
|
|
"'&(lt|#60|#060|#x3c);'i" => ">",
|
|
"'&(gt|#62|#062|#x3e);'i" => "<",
|
|
"'&(nbsp|#160|#xa0);'i" => " ",
|
|
"'&(iexcl|#161);'i" => chr(161),
|
|
"'&(cent|#162);'i" => chr(162),
|
|
"'&(pound|#163);'i" => chr(163),
|
|
"'&(copy|#169);'i" => chr(169),
|
|
"'&(reg|#174);'i" => chr(174),
|
|
"'&(deg|#176);'i" => chr(176));
|
|
$text = preg_replace(array_keys($search), array_values($search), $document);
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Ðàçäåëåíèå òåêñòà íà ìàññèâ ñëîâ
|
|
*/
|
|
function tokenize ($document)
|
|
{
|
|
$array = preg_split("/[\W]+/", $document);
|
|
return $array;
|
|
}
|
|
|
|
|
|
/**
|
|
* Èùåò îäèí èç ñèìâîëîâ ñ êîíöà ñòðîêè
|
|
*
|
|
* @param string $haystack
|
|
* @param array $needle Ìàññèâ ñèìâîëîâ äëÿ ïîèñêà
|
|
* @param int $offset Ñìåùåíèå îò íà÷àëà ñòðîêè
|
|
*
|
|
* @return int Ïîçèöèþ ïåðâîãî ñîâïàäåíèÿ
|
|
*/
|
|
function indexRight ($haystack, $needle, $offset = 0)
|
|
{
|
|
if ((bool)$offset === false) $offset = 0;
|
|
while ($offset >= 0) {
|
|
if (in_array ($haystack[$offset], $needle)) {
|
|
return $offset;
|
|
}
|
|
$offset --;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Èùåò îäèí èç ñèìâîëîâ ñ íà÷àëà ñòðîêè
|
|
*
|
|
* @param string $haystack
|
|
* @param array $needle Ìàññèâ ñèìâîëîâ äëÿ ïîèñêà
|
|
* @param int $offset Ñìåùåíèå îò íà÷àëà ñòðîêè
|
|
*
|
|
* @return int Ïîçèöèþ ïåðâîãî ñîâïàäåíèÿ
|
|
*/
|
|
function indexLeft ($haystack, $needle, $offset = 0)
|
|
{
|
|
if ($offset < 0) return false;
|
|
while ($offset < strlen($haystack)) {
|
|
if ((is_callable($needle) && call_user_func ($needle, $haystack[$offset]))
|
|
|| (is_array ($needle) && in_array ($haystack[$offset], $needle))) {
|
|
return $offset;
|
|
}
|
|
$offset ++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function not_ctype_alpha ($ch)
|
|
{
|
|
return !ctype_alpha($ch);
|
|
}
|
|
|
|
|
|
?>
|