phpBB

Code Changes

File: phpbb/search/fulltext_native.php

  Unmodified   Added   Modified   Removed
Line 18Line 18
*/
class fulltext_native extends \phpbb\search\base
{

*/
class fulltext_native extends \phpbb\search\base
{

 
	const UTF8_HANGUL_FIRST = "\xEA\xB0\x80";
const UTF8_HANGUL_LAST = "\xED\x9E\xA3";
const UTF8_CJK_FIRST = "\xE4\xB8\x80";
const UTF8_CJK_LAST = "\xE9\xBE\xBB";
const UTF8_CJK_B_FIRST = "\xF0\xA0\x80\x80";
const UTF8_CJK_B_LAST = "\xF0\xAA\x9B\x96";


	/**
* Associative array holding index stats
* @var array

	/**
* Associative array holding index stats
* @var array

Line 99Line 106
	protected $user;

/**

	protected $user;

/**

	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded

	* Initialises the fulltext_native search backend with min/max word length

	*
* @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure
* @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object

	*
* @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure
* @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object

Line 118Line 125
		/**
* Load the UTF tools
*/

		/**
* Load the UTF tools
*/

		if (!class_exists('utf_normalizer'))
{
include($this->phpbb_root_path . 'includes/utf/utf_normalizer.' . $this->php_ext);
}

 
		if (!function_exists('utf8_decode_ncr'))
{
include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);

		if (!function_exists('utf8_decode_ncr'))
{
include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);

Line 348Line 351
		$this->must_contain_ids = array();
$this->must_not_contain_ids = array();
$this->must_exclude_one_ids = array();

		$this->must_contain_ids = array();
$this->must_not_contain_ids = array();
$this->must_exclude_one_ids = array();


$mode = '';
$ignore_no_id = true;

 

foreach ($query as $word)
{


foreach ($query as $word)
{

Line 594Line 594
		$id_ary = array();

$sql_where = array();

		$id_ary = array();

$sql_where = array();

		$group_by = false;

 
		$m_num = 0;
$w_num = 0;


		$m_num = 0;
$w_num = 0;


Line 879Line 878

break;



break;


				case 'sqlite':

 
				case 'sqlite3':
$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results

				case 'sqlite3':
$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results

Line 1186Line 1184
					}
else
{

					}
else
{

						if ($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3')

						if ($this->db->get_sql_layer() == 'sqlite3')

						{
$sql = 'SELECT COUNT(topic_id) as total_results
FROM (SELECT DISTINCT t.topic_id';

						{
$sql = 'SELECT COUNT(topic_id) as total_results
FROM (SELECT DISTINCT t.topic_id';

Line 1203Line 1201
								$post_visibility
$sql_fora
AND t.topic_id = p.topic_id

								$post_visibility
$sql_fora
AND t.topic_id = p.topic_id

								$sql_time" . (($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3') ? ')' : '');

								$sql_time" . ($this->db->get_sql_layer() == 'sqlite3' ? ')' : '');

					}
$result = $this->db->sql_query($sql);


					}
$result = $this->db->sql_query($sql);


Line 1262Line 1260
		if (!$total_results && $is_mysql)
{
// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.

		if (!$total_results && $is_mysql)
{
// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.

			$sql_calc = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql);

			$sql_calc = str_replace('SELECT ' . $select, 'SELECT SQL_CALC_FOUND_ROWS ' . $select, $sql);


$result = $this->db->sql_query($sql_calc);
$this->db->sql_freeresult($result);


$result = $this->db->sql_query($sql_calc);
$this->db->sql_freeresult($result);

Line 1325Line 1323
		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';

$min = $this->word_length['min'];

		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';

$min = $this->word_length['min'];

		$max = $this->word_length['max'];

 

$isset_min = $min - 1;



$isset_min = $min - 1;


Line 1361Line 1358
				* Note: this could be optimized. If the codepoint is lower than Hangul's range
* we know that it will also be lower than CJK ranges
*/

				* Note: this could be optimized. If the codepoint is lower than Hangul's range
* we know that it will also be lower than CJK ranges
*/

				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
&& (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
&& (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))

				if ((strncmp($word, self::UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, self::UTF8_HANGUL_LAST, 3) > 0)
&& (strncmp($word, self::UTF8_CJK_FIRST, 3) < 0 || strncmp($word, self::UTF8_CJK_LAST, 3) > 0)
&& (strncmp($word, self::UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, self::UTF8_CJK_B_LAST, 4) > 0))

				{
$word = strtok(' ');
continue;

				{
$word = strtok(' ');
continue;

Line 1608Line 1605
		// carry on ... it's okay ... I know when I'm not wanted boo hoo
if (!$this->config['fulltext_native_load_upd'])
{

		// carry on ... it's okay ... I know when I'm not wanted boo hoo
if (!$this->config['fulltext_native_load_upd'])
{

			set_config('search_last_gc', time(), true);

			$this->config->set('search_last_gc', time(), false);

			return;
}


			return;
}


Line 1643Line 1640

// by setting search_last_gc to the new time here we make sure that if a user reloads because the
// following query takes too long, he won't run into it again


// by setting search_last_gc to the new time here we make sure that if a user reloads because the
// following query takes too long, he won't run into it again

				set_config('search_last_gc', time(), true);

				$this->config->set('search_last_gc', time(), false);


// Delete the matches
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '


// Delete the matches
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '

Line 1659Line 1656
			$this->destroy_cache(array_unique($destroy_cache_words));
}


			$this->destroy_cache(array_unique($destroy_cache_words));
}


		set_config('search_last_gc', time(), true);

		$this->config->set('search_last_gc', time(), false);

	}

/**

	}

/**

Line 1669Line 1666
	{
switch ($this->db->get_sql_layer())
{

	{
switch ($this->db->get_sql_layer())
{

			case 'sqlite':

 
			case 'sqlite3':
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);

			case 'sqlite3':
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);

Line 1730Line 1726
	* @param	string	$allowed_chars	String of special chars to allow
* @param string $encoding Text encoding
* @return string Cleaned up text, only alphanumeric chars are left

	* @param	string	$allowed_chars	String of special chars to allow
* @param string $encoding Text encoding
* @return string Cleaned up text, only alphanumeric chars are left

	*
* @todo \normalizer::cleanup being able to be used?

 
	*/
protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
{
static $conv = array(), $conv_loaded = array();

	*/
protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
{
static $conv = array(), $conv_loaded = array();

		$words = $allow = array();

		$allow = array();


// Convert the text to UTF-8
$encoding = strtolower($encoding);


// Convert the text to UTF-8
$encoding = strtolower($encoding);

Line 1758Line 1752
		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);

/**

		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);

/**

		* Load the UTF-8 normalizer
*
* If we use it more widely, an instance of that class should be held in a
* a global variable instead

		* Normalize to NFC




		*/

		*/

		\utf_normalizer::nfc($text);

		$text = \Normalizer::normalize($text);


/**
* The first thing we do is:


/**
* The first thing we do is:

Line 1856Line 1847
			$utf_char = substr($text, $pos, $utf_len);
$pos += $utf_len;


			$utf_char = substr($text, $pos, $utf_len);
$pos += $utf_len;


			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
|| ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
|| ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))

			if (($utf_char >= self::UTF8_HANGUL_FIRST && $utf_char <= self::UTF8_HANGUL_LAST)
|| ($utf_char >= self::UTF8_CJK_FIRST && $utf_char <= self::UTF8_CJK_LAST)
|| ($utf_char >= self::UTF8_CJK_B_FIRST && $utf_char <= self::UTF8_CJK_B_LAST))

			{
/**
* All characters within these ranges are valid

			{
/**
* All characters within these ranges are valid

Line 1975Line 1966
		</dl>
<dl>
<dt><label for="fulltext_native_min_chars">' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

		</dl>
<dl>
<dt><label for="fulltext_native_min_chars">' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

			<dd><input id="fulltext_native_min_chars" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd>

			<dd><input id="fulltext_native_min_chars" type="number" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd>

		</dl>
<dl>
<dt><label for="fulltext_native_max_chars">' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

		</dl>
<dl>
<dt><label for="fulltext_native_max_chars">' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

			<dd><input id="fulltext_native_max_chars" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd>

			<dd><input id="fulltext_native_max_chars" type="number" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd>

		</dl>
<dl>
<dt><label for="fulltext_native_common_thres">' . $this->user->lang['COMMON_WORD_THRESHOLD'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>

		</dl>
<dl>
<dt><label for="fulltext_native_common_thres">' . $this->user->lang['COMMON_WORD_THRESHOLD'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>

			<dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd>

			<dd><input id="fulltext_native_common_thres" type="text" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd>

		</dl>
';


		</dl>
';