Skip to content

Commit

Permalink
The detectUrls() method is changed
Browse files Browse the repository at this point in the history
1. The recursion is removed.
2. Check on BlackList and WhiteList is added.
  • Loading branch information
MioVisman committed Nov 26, 2016
1 parent 36c2161 commit d039178
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 37 deletions.
97 changes: 62 additions & 35 deletions Parserus.php
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ public function detectSmilies()
* @param mixed $list Массив bb-кодов, null и т.д.
* @return Parserus $this
*/
public function setWhiteList($list)
public function setWhiteList($list = null)
{
$this->whiteList = is_array($list) ? $list : null;
return $this;
Expand All @@ -370,7 +370,7 @@ public function setWhiteList($list)
* @param mixed $list Массив bb-кодов, null и т.д.
* @return Parserus $this
*/
public function setBlackList($list)
public function setBlackList($list = null)
{
$this->blackList = ! empty($list) && is_array($list) ? $list : null;
return $this;
Expand Down Expand Up @@ -997,18 +997,10 @@ public function getCode($id = 0)
* Метод ищет в текстовых узлах ссылки и создает на их месте узлы с bb-кодами url
* Для уменьшения нагрузки использовать при сохранении, а не при выводе
*
* @param int $id Указатель на текущий тег
* @return Parserus $this
*/
public function detectUrls($id = 0)
public function detectUrls()
{
if (! isset($this->bbcodes['url'])
|| isset($this->data[$id]['text'])
|| isset($this->data[$id]['text only'])
) {
return $this;
}

$pattern = '%\b(?<=\s|^)
(?>(?:ht|f)tps?://|www\.|ftp\.)
(?:[\p{L}\p{N}]+(?:[\p{L}\p{N}\-]*[\p{L}\p{N}])?\.)+
Expand All @@ -1020,41 +1012,76 @@ public function detectUrls($id = 0)
(?:\#[\p{L}\p{N}-]+)?
)?%xu';

$children = $this->data[$id]['children'];
$this->data[$id]['children'] = [];
return $this->detect('url', $pattern, true);
}

foreach ($children as $cid) {
if (! isset($this->data[$cid]['text'])) {
$this->data[$id]['children'][] = $cid;
$this->detectUrls($cid);
} else if (! isset($this->bbcodes['url']['parents'][$this->bbcodes[$this->data[$id]['tag']]['type']])) {
$this->data[$id]['children'][] = $cid;
} else {
$text = $this->data[$cid]['text'];
if (! preg_match_all($pattern, $text, $matches, PREG_OFFSET_CAPTURE)) {
$this->data[$id]['children'][] = $cid;
continue;
}
/**
* Метод ищет в текстовых узлах совпадения с $pattern и создает на их месте узлы с bb-кодами $tag
*
* @param string $tag Имя для создания bb-кода
* @param string $pattern Регулярное выражение для поиска
* @param bool $textOnly Флаг. true, если содержимое созданного тега текстовое
* @return Parserus $this
*/
protected function detect($tag, $pattern, $textOnly)
{
if (! isset($this->bbcodes[$tag])) {
return $this;
}

$error = null;
if (null !== $this->blackList && in_array($tag, $this->blackList)) {
$error = 1;
} else if (null !== $this->whiteList && ! in_array($tag, $this->whiteList)) {
$error = 2;
}

$pos = 0;
for ($id = $this->dataId; $id > 0; --$id) {
// не текстовый узел
if (! isset($this->data[$id]['text'])) {
continue;
}

foreach ($matches[0] as $match) {
$this->addTextNode(substr($text, $pos, $match[1] - $pos), $id);
$pid = $this->data[$id]['parent'];

$new = $this->addTagNode('url', $id, [], true);
$this->addTextNode($match[0], $new);
// родитель может содержать только текст или не подходит по типу
if (isset($this->data[$pid]['text only']) ||
! isset($this->bbcodes[$tag]['parents'][$this->bbcodes[$this->data[$pid]['tag']]['type']])
) {
continue;
}

$pos = $match[1] + strlen($match[0]);
}
if (! preg_match_all($pattern, $this->data[$id]['text'], $matches, PREG_OFFSET_CAPTURE)) {
continue;
} else if ($error) {
$this->errors[] = [$error, $tag];
return $this;
}

$this->addTextNode($this->endStr($text, $pos), $id);
unset($this->data[$cid]);
$idx = array_search($id, $this->data[$pid]['children']);
$arrEnd = array_slice($this->data[$pid]['children'], $idx + 1);
$this->data[$pid]['children'] = array_slice($this->data[$pid]['children'], 0, $idx);

$pos = 0;

foreach ($matches[0] as $match) {
$this->addTextNode(substr($this->data[$id]['text'], $pos, $match[1] - $pos), $pid);

$new = $this->addTagNode($tag, $pid, [], $textOnly);
$this->addTextNode($match[0], $new);

$pos = $match[1] + strlen($match[0]);
}

$this->addTextNode($this->endStr($this->data[$id]['text'], $pos), $pid);
unset($this->data[$id]);

$this->data[$pid]['children'] = array_merge($this->data[$pid]['children'], $arrEnd);
}

return $this;
}


/**
* Метод удаляет пустые теги из дерева
*
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Parserus

[![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)

BBCode parser.

## Requirements
Expand Down
4 changes: 2 additions & 2 deletions examples/detectUrls.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#...
},
],
])->parse('Hello www.exemple.com World!')
])->parse('Hello www.example.com World!')
->detectUrls()
->getCode();

#output: Hello [url]www.exemple.com[/url] World!
#output: Hello [url]www.example.com[/url] World!
63 changes: 63 additions & 0 deletions examples/detectUrls2.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

include '../Parserus.php';

$parser = new Parserus();

echo $parser->setBBCodes([
['tag' => 'url',
'type' => 'url',
'parents' => ['inline', 'block'],
'attrs' => [
'Def' => [
'format' => '%^[^\x00-\x1f]+$%',
],
'no attr' => [
'body format' => '%^[^\x00-\x1f]+$%D',
],
],
'handler' => function($body, $attrs, $parser) {
#...
},
],
['tag' => 'h',
'type' => 'h',
'handler' => function($body, $attrs, $parser) {
#...
},
],
])->parse('www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3')
->detectUrls()
->getCode();

#output: [url]www.example.com/link1[/url][h]Hello www.example.com/link2 World![/h][url]www.example.com/link3[/url]

echo "\n\n";

echo $parser->setBlackList(['url'])
->setWhiteList()
->parse('www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3')
->detectUrls()
->getCode();

#output: www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3

var_dump($parser->getErrors());

#output: array (size=1)
#output: 0 => string 'Тег [url] находится в черном списке' (length=60)

echo "\n\n";

echo $parser->setBlackList()
->setWhiteList(['h'])
->parse('www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3')
->detectUrls()
->getCode();

#output: www.example.com/link1[h]Hello www.example.com/link2 World![/h]www.example.com/link3

var_dump($parser->getErrors());

#output: array (size=1)
#output: 0 => string 'Тег [url] отсутствует в белом списке' (length=62)

0 comments on commit d039178

Please sign in to comment.