Line 320 | Line 320 |
---|
* Generates a text with approx. the specified length which contains the specified words and their context * * @param string $text The full text from which context shall be extracted
|
* Generates a text with approx. the specified length which contains the specified words and their context * * @param string $text The full text from which context shall be extracted
|
* @param string $words An array of words which should be contained in the result, has to be a valid part of a PCRE pattern (escape with preg_quote!)
| * @param array $words An array of words which should be contained in the result, has to be a valid part of a PCRE pattern (escape with preg_quote!)
|
* @param int $length The desired length of the resulting text, however the result might be shorter or longer than this value * * @return string Context of the specified words separated by "..." */
|
* @param int $length The desired length of the resulting text, however the result might be shorter or longer than this value * * @return string Context of the specified words separated by "..." */
|
function get_context($text, $words, $length = 400)
| function get_context(string $text, array $words, int $length = 400): string
|
{
|
{
|
// first replace all whitespaces with single spaces $text = preg_replace('/ +/', ' ', strtr($text, "\t\n\r\x0C ", ' '));
| if ($length <= 0) { return '...'; }
|
// we need to turn the entities back into their original form, to not cut the message in between them
|
// we need to turn the entities back into their original form, to not cut the message in between them
|
$entities = array('<', '>', '[', ']', '.', ':', ':'); $characters = array('<', '>', '[', ']', '.', ':', ':'); $text = str_replace($entities, $characters, $text);
| $text = html_entity_decode($text);
// Replace all spaces/invisible characters with single spaces $text = preg_replace("/\s+/u", ' ', $text);
|
|
|
$word_indizes = array(); if (count($words)) { $match = ''; // find the starting indizes of all words
| $text_length = utf8_strlen($text);
// Get first occurrence of each word $word_indexes = [];
|
foreach ($words as $word) {
|
foreach ($words as $word) {
|
if ($word) { if (preg_match('#(?:[^\w]|^)(' . $word . ')(?:[^\w]|$)#i', $text, $match)) { if (empty($match[1])) { continue; }
| $pos = utf8_stripos($text, $word);
|
|
|
$pos = utf8_strpos($text, $match[1]);
| |
if ($pos !== false) {
|
if ($pos !== false) {
|
$word_indizes[] = $pos;
| $word_indexes[$pos] = $word;
|
} }
|
} }
|
} } unset($match);
| |
|
|
if (count($word_indizes))
| if (!empty($word_indexes))
|
{
|
{
|
$word_indizes = array_unique($word_indizes); sort($word_indizes);
| ksort($word_indexes);
|
|
|
$wordnum = count($word_indizes); // number of characters on the right and left side of each word $sequence_length = (int) ($length / (2 * $wordnum)) - 2; $final_text = ''; $word = $j = 0; $final_text_index = -1;
| // Size of the fragment of text per word $num_indexes = count($word_indexes); $characters_per_word = (int) ($length / $num_indexes) + 2; // 2 to leave one character of margin at the sides to don't cut words
|
|
|
// cycle through every character in the original text for ($i = $word_indizes[$word], $n = utf8_strlen($text); $i < $n; $i++)
| // Get text fragment indexes $fragments = []; foreach ($word_indexes as $index => $word)
|
{
|
{
|
// if the current position is the start of one of the words then append $sequence_length characters to the final text if (isset($word_indizes[$word]) && ($i == $word_indizes[$word]))
| $word_length = utf8_strlen($word); $start = max(0, min($text_length - 1 - $characters_per_word, (int) ($index + ($word_length / 2) - ($characters_per_word / 2)))); $end = $start + $characters_per_word;
// Check if we can merge this fragment into the previous fragment if (!empty($fragments))
|
{
|
{
|
if ($final_text_index < $i - $sequence_length - 1)
| [$prev_start, $prev_end] = end($fragments);
if ($prev_end + $characters_per_word >= $index + $word_length)
|
{
|
{
|
$final_text .= '... ' . preg_replace('#^([^ ]*)#', '', utf8_substr($text, $i - $sequence_length, $sequence_length));
| array_pop($fragments); $start = $prev_start; $end = $prev_end + $characters_per_word; } }
$fragments[] = [$start, $end]; }
|
} else {
|
} else {
|
// if the final text is already nearer to the current word than $sequence_length we only append the text // from its current index on and distribute the unused length to all other sequenes $sequence_length += (int) (($final_text_index - $i + $sequence_length + 1) / (2 * $wordnum)); $final_text .= utf8_substr($text, $final_text_index + 1, $i - $final_text_index - 1); } $final_text_index = $i - 1;
// add the following characters to the final text (see below) $word++; $j = 1;
| // There is no coincidences, so we just create a fragment with the first $length characters $fragments[] = [0, $length]; $end = $length;
|
}
|
}
|
if ($j > 0)
| $output = []; foreach ($fragments as [$start, $end])
|
{
|
{
|
// add the character to the final text and increment the sequence counter $final_text .= utf8_substr($text, $i, 1); $final_text_index++; $j++;
| $fragment = utf8_substr($text, $start, $end - $start + 1);
|
|
|
// if this is a whitespace then check whether we are done with this sequence if (utf8_substr($text, $i, 1) == ' ')
| $fragment_start = 0; $fragment_end = $end - $start + 1;
// Find the first valid alphanumeric character in the fragment to don't cut words if ($start > 0)
|
{
|
{
|
// only check whether we have to exit the context generation completely if we haven't already reached the end anyway if ($i + 4 < $n) { if (($j > $sequence_length && $word >= $wordnum) || utf8_strlen($final_text) > $length) { $final_text .= ' ...'; break;
| preg_match('/[^a-zA-Z0-9][a-zA-Z0-9]/u', $fragment, $matches, PREG_OFFSET_CAPTURE); $fragment_start = (int) $matches[0][1] + 1; // first valid alphanumeric character
|
}
|
}
|
} else
| // Find the last valid alphanumeric character in the fragment to don't cut words if ($end < $text_length - 1)
|
{
|
{
|
// make sure the text really reaches the end $j -= 4;
| preg_match_all('/[a-zA-Z0-9][^a-zA-Z0-9]/u', $fragment, $matches, PREG_OFFSET_CAPTURE); $fragment_end = end($matches[0])[1]; // last valid alphanumeric character
|
}
|
}
|
// stop context generation and wait for the next word if ($j > $sequence_length) { $j = 0; } } } } return str_replace($characters, $entities, $final_text); }
| $output[] = utf8_substr($fragment, $fragment_start, $fragment_end - $fragment_start + 1);
|
}
|
}
|
if (!count($words) || !count($word_indizes)) { return str_replace($characters, $entities, ((utf8_strlen($text) >= $length + 3) ? utf8_substr($text, 0, $length) . '...' : $text)); }
| return ($fragments[0][0] !== 0 ? '... ' : '') . htmlentities(implode(' ... ', $output)) . ($end < $text_length - 1 ? ' ...' : '');
|
}
/**
| }
/**
|
Line 532 | Line 511 |
---|
if (preg_match('#^<[rt][ >]#', $text)) {
|
if (preg_match('#^<[rt][ >]#', $text)) {
|
$text = $phpbb_container->get('text_formatter.utils')->clean_formatting($text);
| $text = utf8_htmlspecialchars($phpbb_container->get('text_formatter.utils')->clean_formatting($text));
|
} else {
| } else {
|
Line 803 | Line 782 |
---|
$orig_url = $url; $orig_relative = $relative_url; $append = '';
|
$orig_url = $url; $orig_relative = $relative_url; $append = '';
|
$url = htmlspecialchars_decode($url); $relative_url = htmlspecialchars_decode($relative_url);
| $url = html_entity_decode($url, ENT_COMPAT); $relative_url = html_entity_decode($relative_url, ENT_COMPAT);
|
// make sure no HTML entities were matched $chars = array('<', '>', '"');
| // make sure no HTML entities were matched $chars = array('<', '>', '"');
|
Line 911 | Line 890 |
---|
break; }
|
break; }
|
$url = htmlspecialchars($url); $text = htmlspecialchars($text); $append = htmlspecialchars($append);
| $url = htmlspecialchars($url, ENT_COMPAT); $text = htmlspecialchars($text, ENT_COMPAT); $append = htmlspecialchars($append, ENT_COMPAT);
|
$html = "$whitespace<!-- $tag --><a$class href=\"$url\">$text</a><!-- $tag -->$append";
| $html = "$whitespace<!-- $tag --><a$class href=\"$url\">$text</a><!-- $tag -->$append";
|
Line 921 | Line 900 |
---|
}
/**
|
}
/**
|
* make_clickable function * * Replace magic urls of form http://xxx.xxx., www.xxx. and [email protected].
| * Replaces magic urls of form http://xxx.xxx., www.xxx. and [email protected].
|
* Cuts down displayed size of link if over 50 chars, turns absolute links * into relative versions when the server/script path matches the link
|
* Cuts down displayed size of link if over 50 chars, turns absolute links * into relative versions when the server/script path matches the link
|
| * * @param string $text Message text to parse URL/email entries * @param bool|string $server_url The server URL. If false, the board URL will be used * @param string $class CSS class selector to add to the parsed URL entries * * @return string A text with parsed URL/email entries
|
*/
|
*/
|
function make_clickable($text, $server_url = false, $class = 'postlink')
| function make_clickable($text, $server_url = false, string $class = 'postlink')
|
{ if ($server_url === false) {
| { if ($server_url === false) {
|
Line 948 | Line 931 |
---|
$magic_url_match_args = array(); }
|
$magic_url_match_args = array(); }
|
| // Check if the match for this $server_url and $class already exists $element_exists = false; if (isset($magic_url_match_args[$server_url])) { array_walk_recursive($magic_url_match_args[$server_url], function($value) use (&$element_exists, $static_class) { if ($value == $static_class) { $element_exists = true; return; } } ); }
// Only add new $server_url and $class matches if not exist if (!$element_exists) {
|
// relative urls for this board
|
// relative urls for this board
|
$magic_url_match_args[$server_url][] = array(
| $magic_url_match_args[$server_url][] = [
|
'#(^|[\n\t (>.])(' . preg_quote($server_url, '#') . ')/(' . get_preg_expression('relative_url_inline') . ')#iu', MAGIC_URL_LOCAL, $local_class,
|
'#(^|[\n\t (>.])(' . preg_quote($server_url, '#') . ')/(' . get_preg_expression('relative_url_inline') . ')#iu', MAGIC_URL_LOCAL, $local_class,
|
);
| $static_class, ];
|
// matches a xxxx://aaaaa.bbb.cccc. ...
|
// matches a xxxx://aaaaa.bbb.cccc. ...
|
$magic_url_match_args[$server_url][] = array(
| $magic_url_match_args[$server_url][] = [
|
'#(^|[\n\t (>.])(' . get_preg_expression('url_inline') . ')#iu', MAGIC_URL_FULL, $class,
|
'#(^|[\n\t (>.])(' . get_preg_expression('url_inline') . ')#iu', MAGIC_URL_FULL, $class,
|
);
| $static_class, ];
|
// matches a "www.xxxx.yyyy[/zzzz]" kinda lazy URL thing
|
// matches a "www.xxxx.yyyy[/zzzz]" kinda lazy URL thing
|
$magic_url_match_args[$server_url][] = array(
| $magic_url_match_args[$server_url][] = [
|
'#(^|[\n\t (>])(' . get_preg_expression('www_url_inline') . ')#iu', MAGIC_URL_WWW, $class,
|
'#(^|[\n\t (>])(' . get_preg_expression('www_url_inline') . ')#iu', MAGIC_URL_WWW, $class,
|
);
| $static_class, ]; }
|
|
|
| if (!isset($magic_url_match_args[$server_url]['email'])) {
|
// matches an email@domain type address at the start of a line, or after a space or after what might be a BBCode.
|
// matches an email@domain type address at the start of a line, or after a space or after what might be a BBCode.
|
$magic_url_match_args[$server_url][] = array(
| $magic_url_match_args[$server_url]['email'] = [
|
'/(^|[\n\t (>])(' . get_preg_expression('email') . ')/iu', MAGIC_URL_EMAIL, '',
|
'/(^|[\n\t (>])(' . get_preg_expression('email') . ')/iu', MAGIC_URL_EMAIL, '',
|
);
| ]; }
|
}
foreach ($magic_url_match_args[$server_url] as $magic_args) { if (preg_match($magic_args[0], $text, $matches)) {
|
}
foreach ($magic_url_match_args[$server_url] as $magic_args) { if (preg_match($magic_args[0], $text, $matches)) {
|
| // Only apply $class from the corresponding function call argument (excepting emails which never has a class) if ($magic_args[1] != MAGIC_URL_EMAIL && $magic_args[3] != $static_class) { continue; }
|
$text = preg_replace_callback($magic_args[0], function($matches) use ($magic_args) { $relative_url = isset($matches[3]) ? $matches[3] : '';
| $text = preg_replace_callback($magic_args[0], function($matches) use ($magic_args) { $relative_url = isset($matches[3]) ? $matches[3] : '';
|
Line 1053 | Line 1067 |
---|
} else {
|
} else {
|
$root_path = (defined('PHPBB_USE_BOARD_URL_PATH') && PHPBB_USE_BOARD_URL_PATH) ? generate_board_url() . '/' : $phpbb_path_helper->get_web_root_path();
| $root_path = $phpbb_path_helper->get_web_root_path();
|
/** * Event to override the root_path for smilies
| /** * Event to override the root_path for smilies
|
Line 1166 | Line 1180 |
---|
$filename = $phpbb_root_path . $config['upload_path'] . '/' . utf8_basename($attachment['physical_filename']);
$upload_icon = '';
|
$filename = $phpbb_root_path . $config['upload_path'] . '/' . utf8_basename($attachment['physical_filename']);
$upload_icon = '';
|
| $download_link = ''; $display_cat = false;
|
if (isset($extensions[$attachment['extension']])) {
| if (isset($extensions[$attachment['extension']])) {
|
Line 1325 | Line 1341 |
---|
); extract($phpbb_dispatcher->trigger_event('core.parse_attachments_modify_template_data', compact($vars))); $update_count_ary = $update_count;
|
); extract($phpbb_dispatcher->trigger_event('core.parse_attachments_modify_template_data', compact($vars))); $update_count_ary = $update_count;
|
unset($update_count);
| unset($update_count, $display_cat, $download_link);
|
$template->assign_block_vars('_file', $block_array);
| $template->assign_block_vars('_file', $block_array);
|
Line 1419 | Line 1435 |
---|
$string = substr($string, 4); }
|
$string = substr($string, 4); }
|
$_chars = utf8_str_split(htmlspecialchars_decode($string));
| $_chars = utf8_str_split(html_entity_decode($string, ENT_COMPAT));
|
$chars = array_map('utf8_htmlspecialchars', $_chars);
// Now check the length ;)
| $chars = array_map('utf8_htmlspecialchars', $_chars);
// Now check the length ;)
|
Line 1434 | Line 1450 |
---|
if (utf8_strlen($string) > $max_store_length) { // let's split again, we do not want half-baked strings where entities are split
|
if (utf8_strlen($string) > $max_store_length) { // let's split again, we do not want half-baked strings where entities are split
|
$_chars = utf8_str_split(htmlspecialchars_decode($string));
| $_chars = utf8_str_split(html_entity_decode($string, ENT_COMPAT));
|
$chars = array_map('utf8_htmlspecialchars', $_chars);
do
| $chars = array_map('utf8_htmlspecialchars', $_chars);
do
|