diff --git a/Parserus.php b/Parserus.php index d630400..5dba4d8 100644 --- a/Parserus.php +++ b/Parserus.php @@ -358,7 +358,7 @@ public function detectSmilies() * @param mixed $list Массив bb-кодов, null и т.д. * @return Parserus $this */ - public function setWhiteList($list) + public function setWhiteList($list = null) { $this->whiteList = is_array($list) ? $list : null; return $this; @@ -370,7 +370,7 @@ public function setWhiteList($list) * @param mixed $list Массив bb-кодов, null и т.д. * @return Parserus $this */ - public function setBlackList($list) + public function setBlackList($list = null) { $this->blackList = ! empty($list) && is_array($list) ? $list : null; return $this; @@ -997,18 +997,10 @@ public function getCode($id = 0) * Метод ищет в текстовых узлах ссылки и создает на их месте узлы с bb-кодами url * Для уменьшения нагрузки использовать при сохранении, а не при выводе * - * @param int $id Указатель на текущий тег * @return Parserus $this */ - public function detectUrls($id = 0) + public function detectUrls() { - if (! isset($this->bbcodes['url']) - || isset($this->data[$id]['text']) - || isset($this->data[$id]['text only']) - ) { - return $this; - } - $pattern = '%\b(?<=\s|^) (?>(?:ht|f)tps?://|www\.|ftp\.) (?:[\p{L}\p{N}]+(?:[\p{L}\p{N}\-]*[\p{L}\p{N}])?\.)+ @@ -1020,41 +1012,76 @@ public function detectUrls($id = 0) (?:\#[\p{L}\p{N}-]+)? )?%xu'; - $children = $this->data[$id]['children']; - $this->data[$id]['children'] = []; + return $this->detect('url', $pattern, true); + } - foreach ($children as $cid) { - if (! isset($this->data[$cid]['text'])) { - $this->data[$id]['children'][] = $cid; - $this->detectUrls($cid); - } else if (! isset($this->bbcodes['url']['parents'][$this->bbcodes[$this->data[$id]['tag']]['type']])) { - $this->data[$id]['children'][] = $cid; - } else { - $text = $this->data[$cid]['text']; - if (! preg_match_all($pattern, $text, $matches, PREG_OFFSET_CAPTURE)) { - $this->data[$id]['children'][] = $cid; - continue; - } + /** + * Метод ищет в текстовых узлах совпадения с $pattern и создает на их месте узлы с bb-кодами $tag + * + * @param string $tag Имя для создания bb-кода + * @param string $pattern Регулярное выражение для поиска + * @param bool $textOnly Флаг. true, если содержимое созданного тега текстовое + * @return Parserus $this + */ + protected function detect($tag, $pattern, $textOnly) + { + if (! isset($this->bbcodes[$tag])) { + return $this; + } + + $error = null; + if (null !== $this->blackList && in_array($tag, $this->blackList)) { + $error = 1; + } else if (null !== $this->whiteList && ! in_array($tag, $this->whiteList)) { + $error = 2; + } - $pos = 0; + for ($id = $this->dataId; $id > 0; --$id) { + // не текстовый узел + if (! isset($this->data[$id]['text'])) { + continue; + } - foreach ($matches[0] as $match) { - $this->addTextNode(substr($text, $pos, $match[1] - $pos), $id); + $pid = $this->data[$id]['parent']; - $new = $this->addTagNode('url', $id, [], true); - $this->addTextNode($match[0], $new); + // родитель может содержать только текст или не подходит по типу + if (isset($this->data[$pid]['text only']) || + ! isset($this->bbcodes[$tag]['parents'][$this->bbcodes[$this->data[$pid]['tag']]['type']]) + ) { + continue; + } - $pos = $match[1] + strlen($match[0]); - } + if (! preg_match_all($pattern, $this->data[$id]['text'], $matches, PREG_OFFSET_CAPTURE)) { + continue; + } else if ($error) { + $this->errors[] = [$error, $tag]; + return $this; + } - $this->addTextNode($this->endStr($text, $pos), $id); - unset($this->data[$cid]); + $idx = array_search($id, $this->data[$pid]['children']); + $arrEnd = array_slice($this->data[$pid]['children'], $idx + 1); + $this->data[$pid]['children'] = array_slice($this->data[$pid]['children'], 0, $idx); + + $pos = 0; + + foreach ($matches[0] as $match) { + $this->addTextNode(substr($this->data[$id]['text'], $pos, $match[1] - $pos), $pid); + + $new = $this->addTagNode($tag, $pid, [], $textOnly); + $this->addTextNode($match[0], $new); + + $pos = $match[1] + strlen($match[0]); } + + $this->addTextNode($this->endStr($this->data[$id]['text'], $pos), $pid); + unset($this->data[$id]); + + $this->data[$pid]['children'] = array_merge($this->data[$pid]['children'], $arrEnd); } + return $this; } - /** * Метод удаляет пустые теги из дерева * diff --git a/README.md b/README.md index 75b8705..eb5ab5b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Parserus +[![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) + BBCode parser. ## Requirements diff --git a/examples/detectUrls.php b/examples/detectUrls.php index 08f6b04..a4c3a76 100644 --- a/examples/detectUrls.php +++ b/examples/detectUrls.php @@ -20,8 +20,8 @@ #... }, ], -])->parse('Hello www.exemple.com World!') +])->parse('Hello www.example.com World!') ->detectUrls() ->getCode(); -#output: Hello [url]www.exemple.com[/url] World! +#output: Hello [url]www.example.com[/url] World! diff --git a/examples/detectUrls2.php b/examples/detectUrls2.php new file mode 100644 index 0000000..2ed193e --- /dev/null +++ b/examples/detectUrls2.php @@ -0,0 +1,63 @@ +setBBCodes([ + ['tag' => 'url', + 'type' => 'url', + 'parents' => ['inline', 'block'], + 'attrs' => [ + 'Def' => [ + 'format' => '%^[^\x00-\x1f]+$%', + ], + 'no attr' => [ + 'body format' => '%^[^\x00-\x1f]+$%D', + ], + ], + 'handler' => function($body, $attrs, $parser) { +#... + }, + ], + ['tag' => 'h', + 'type' => 'h', + 'handler' => function($body, $attrs, $parser) { +#... + }, + ], +])->parse('www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3') + ->detectUrls() + ->getCode(); + +#output: [url]www.example.com/link1[/url][h]Hello www.example.com/link2 World![/h][url]www.example.com/link3[/url] + +echo "\n\n"; + +echo $parser->setBlackList(['url']) + ->setWhiteList() + ->parse('www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3') + ->detectUrls() + ->getCode(); + +#output: www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3 + +var_dump($parser->getErrors()); + +#output: array (size=1) +#output: 0 => string 'Тег [url] находится в черном списке' (length=60) + +echo "\n\n"; + +echo $parser->setBlackList() + ->setWhiteList(['h']) + ->parse('www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3') + ->detectUrls() + ->getCode(); + +#output: www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3 + +var_dump($parser->getErrors()); + +#output: array (size=1) +#output: 0 => string 'Тег [url] отсутствует в белом списке' (length=62)