phpBB

Code Changes

File: phpbb/search/fulltext_native.php

  Unmodified   Added   Modified   Removed
Line 18Line 18
*/
class fulltext_native extends \phpbb\search\base
{

*/
class fulltext_native extends \phpbb\search\base
{

 
	const UTF8_HANGUL_FIRST = "\xEA\xB0\x80";
const UTF8_HANGUL_LAST = "\xED\x9E\xA3";
const UTF8_CJK_FIRST = "\xE4\xB8\x80";
const UTF8_CJK_LAST = "\xE9\xBE\xBB";
const UTF8_CJK_B_FIRST = "\xF0\xA0\x80\x80";
const UTF8_CJK_B_LAST = "\xF0\xAA\x9B\x96";


	/**
* Associative array holding index stats
* @var array

	/**
* Associative array holding index stats
* @var array

Line 57Line 64
	protected $must_not_contain_ids = array();

/**

	protected $must_not_contain_ids = array();

/**

	 * Post ids of posts containing atleast one word that needs to be excluded

	 * Post ids of posts containing at least one word that needs to be excluded

	 * @var array
*/
protected $must_exclude_one_ids = array();

	 * @var array
*/
protected $must_exclude_one_ids = array();

Line 99Line 106
	protected $user;

/**

	protected $user;

/**

	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded

	* Initialises the fulltext_native search backend with min/max word length

	*
* @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure
* @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object

	*
* @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure
* @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object

Line 118Line 125
		/**
* Load the UTF tools
*/

		/**
* Load the UTF tools
*/

		if (!class_exists('utf_normalizer'))
{
include($this->phpbb_root_path . 'includes/utf/utf_normalizer.' . $this->php_ext);
}

 
		if (!function_exists('utf8_decode_ncr'))
{
include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);

		if (!function_exists('utf8_decode_ncr'))
{
include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);

Line 348Line 351
		$this->must_contain_ids = array();
$this->must_not_contain_ids = array();
$this->must_exclude_one_ids = array();

		$this->must_contain_ids = array();
$this->must_not_contain_ids = array();
$this->must_exclude_one_ids = array();


$mode = '';
$ignore_no_id = true;

 

foreach ($query as $word)
{


foreach ($query as $word)
{

Line 530Line 530
		sort($must_exclude_one_ids);

// generate a search_key from all the options to identify the results

		sort($must_exclude_one_ids);

// generate a search_key from all the options to identify the results

		$search_key = md5(implode('#', array(

		$search_key_array = array(

			serialize($must_contain_ids),
serialize($must_not_contain_ids),
serialize($must_exclude_one_ids),

			serialize($must_contain_ids),
serialize($must_not_contain_ids),
serialize($must_exclude_one_ids),

Line 544Line 544
			$post_visibility,
implode(',', $author_ary),
$author_name,

			$post_visibility,
implode(',', $author_ary),
$author_name,

		)));







































		);

/**
* Allow changing the search_key for cached results
*
* @event core.search_native_by_keyword_modify_search_key
* @var array search_key_array Array with search parameters to generate the search_key
* @var array must_contain_ids Array with post ids of posts containing words that are to be included
* @var array must_not_contain_ids Array with post ids of posts containing words that should not be included
* @var array must_exclude_one_ids Array with post ids of posts containing at least one word that needs to be excluded
* @var string type Searching type ('posts', 'topics')
* @var string fields Searching fields ('titleonly', 'msgonly', 'firstpost', 'all')
* @var string terms Searching terms ('all', 'any')
* @var int sort_days Time, in days, of the oldest possible post to list
* @var string sort_key The sort type used from the possible sort types
* @var int topic_id Limit the search to this topic_id only
* @var array ex_fid_ary Which forums not to search on
* @var string post_visibility Post visibility data
* @var array author_ary Array of user_id containing the users to filter the results to
* @since 3.1.7-RC1
*/
$vars = array(
'search_key_array',
'must_contain_ids',
'must_not_contain_ids',
'must_exclude_one_ids',
'type',
'fields',
'terms',
'sort_days',
'sort_key',
'topic_id',
'ex_fid_ary',
'post_visibility',
'author_ary',
);
extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_keyword_modify_search_key', compact($vars)));

$search_key = md5(implode('#', $search_key_array));


// try reading the results from cache
$total_results = 0;


// try reading the results from cache
$total_results = 0;

Line 556Line 594
		$id_ary = array();

$sql_where = array();

		$id_ary = array();

$sql_where = array();

		$group_by = false;

 
		$m_num = 0;
$w_num = 0;


		$m_num = 0;
$w_num = 0;


Line 733Line 770
		* @var	array	must_not_contain_ids	Ids that cannot be taken into account for the results
* @var array must_exclude_one_ids Ids that cannot be on the results
* @var array must_contain_ids Ids that must be on the results

		* @var	array	must_not_contain_ids	Ids that cannot be taken into account for the results
* @var array must_exclude_one_ids Ids that cannot be on the results
* @var array must_contain_ids Ids that must be on the results

		* @var	int		result_count			The previous result count for the format of the query

		* @var	int		total_results			The previous result count for the format of the query

		*										Set to 0 to force a re-count

		*										Set to 0 to force a re-count

		* @var	bool	join_topic				Weather or not TOPICS_TABLE should be CROSS JOIN'ED


		* @var	array	sql_array				The data on how to search in the DB at this point
* @var bool left_join_topics Whether or not TOPICS_TABLE should be CROSS JOIN'ED

		* @var	array	author_ary				Array of user_id containing the users to filter the results to
* @var string author_name An extra username to search on (!empty(author_ary) must be true, to be relevant)
* @var array ex_fid_ary Which forums not to search on

		* @var	array	author_ary				Array of user_id containing the users to filter the results to
* @var string author_name An extra username to search on (!empty(author_ary) must be true, to be relevant)
* @var array ex_fid_ary Which forums not to search on

Line 748Line 786
		* @var	string	sql_where				An array of the current WHERE clause conditions
* @var string sql_match Which columns to do the search on
* @var string sql_match_where Extra conditions to use to properly filter the matching process

		* @var	string	sql_where				An array of the current WHERE clause conditions
* @var string sql_match Which columns to do the search on
* @var string sql_match_where Extra conditions to use to properly filter the matching process

		* @var	string	group_by				Whether or not the SQL query requires a GROUP BY for the elements in the SELECT clause

		* @var	bool	group_by				Whether or not the SQL query requires a GROUP BY for the elements in the SELECT clause

		* @var	string	sort_by_sql				The possible predefined sort types
* @var string sort_key The sort type used from the possible sort types
* @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used

		* @var	string	sort_by_sql				The possible predefined sort types
* @var string sort_key The sort type used from the possible sort types
* @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used

Line 761Line 799
			'must_not_contain_ids',
'must_exclude_one_ids',
'must_contain_ids',

			'must_not_contain_ids',
'must_exclude_one_ids',
'must_contain_ids',

			'result_count',
'join_topic',


			'total_results',
'sql_array',
'left_join_topics',

			'author_ary',
'author_name',
'ex_fid_ary',

			'author_ary',
'author_name',
'ex_fid_ary',

Line 839Line 878

break;



break;


				case 'sqlite':

 
				case 'sqlite3':
$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results

				case 'sqlite3':
$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results

Line 978Line 1016
		}

// generate a search_key from all the options to identify the results

		}

// generate a search_key from all the options to identify the results

		$search_key = md5(implode('#', array(

		$search_key_array = array(

			'',
$type,
($firstpost_only) ? 'firstpost' : '',

			'',
$type,
($firstpost_only) ? 'firstpost' : '',

Line 991Line 1029
			$post_visibility,
implode(',', $author_ary),
$author_name,

			$post_visibility,
implode(',', $author_ary),
$author_name,

		)));

































		);

/**
* Allow changing the search_key for cached results
*
* @event core.search_native_by_author_modify_search_key
* @var array search_key_array Array with search parameters to generate the search_key
* @var string type Searching type ('posts', 'topics')
* @var boolean firstpost_only Flag indicating if only topic starting posts are considered
* @var int sort_days Time, in days, of the oldest possible post to list
* @var string sort_key The sort type used from the possible sort types
* @var int topic_id Limit the search to this topic_id only
* @var array ex_fid_ary Which forums not to search on
* @var string post_visibility Post visibility data
* @var array author_ary Array of user_id containing the users to filter the results to
* @var string author_name The username to search on
* @since 3.1.7-RC1
*/
$vars = array(
'search_key_array',
'type',
'firstpost_only',
'sort_days',
'sort_key',
'topic_id',
'ex_fid_ary',
'post_visibility',
'author_ary',
'author_name',
);
extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_author_modify_search_key', compact($vars)));

$search_key = md5(implode('#', $search_key_array));


// try reading the results from cache
$total_results = 0;


// try reading the results from cache
$total_results = 0;

Line 1048Line 1118
		* @event core.search_native_author_count_query_before
* @var int total_results The previous result count for the format of the query.
* Set to 0 to force a re-count

		* @event core.search_native_author_count_query_before
* @var int total_results The previous result count for the format of the query.
* Set to 0 to force a re-count

 
		* @var	string	type				The type of search being made

		* @var	string	select				SQL SELECT clause for what to get
* @var string sql_sort_table CROSS JOIN'ed table to allow doing the sort chosen
* @var string sql_sort_join Condition to define how to join the CROSS JOIN'ed table specifyed in sql_sort_table

		* @var	string	select				SQL SELECT clause for what to get
* @var string sql_sort_table CROSS JOIN'ed table to allow doing the sort chosen
* @var string sql_sort_join Condition to define how to join the CROSS JOIN'ed table specifyed in sql_sort_table

Line 1060Line 1131
		* @var	string	sort_days			Time, in days, that the oldest post showing can have
* @var string sql_time The SQL to search on the time specifyed by sort_days
* @var bool firstpost_only Wether or not to search only on the first post of the topics

		* @var	string	sort_days			Time, in days, that the oldest post showing can have
* @var string sql_time The SQL to search on the time specifyed by sort_days
* @var bool firstpost_only Wether or not to search only on the first post of the topics

 
		* @var	string	sql_firstpost		The SQL used in the WHERE claused to filter by firstpost.

		* @var	array	ex_fid_ary			Forum ids that must not be searched on
* @var array sql_fora SQL query for ex_fid_ary
* @var int start How many posts to skip in the search results (used for pagination)

		* @var	array	ex_fid_ary			Forum ids that must not be searched on
* @var array sql_fora SQL query for ex_fid_ary
* @var int start How many posts to skip in the search results (used for pagination)

Line 1067Line 1139
		*/
$vars = array(
'total_results',

		*/
$vars = array(
'total_results',

 
			'type',

			'select',
'sql_sort_table',
'sql_sort_join',

			'select',
'sql_sort_table',
'sql_sort_join',

Line 1079Line 1152
			'sort_days',
'sql_time',
'firstpost_only',

			'sort_days',
'sql_time',
'firstpost_only',

 
			'sql_firstpost',

			'ex_fid_ary',
'sql_fora',
'start',

			'ex_fid_ary',
'sql_fora',
'start',

Line 1110Line 1184
					}
else
{

					}
else
{

						if ($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3')

						if ($this->db->get_sql_layer() == 'sqlite3')

						{
$sql = 'SELECT COUNT(topic_id) as total_results
FROM (SELECT DISTINCT t.topic_id';

						{
$sql = 'SELECT COUNT(topic_id) as total_results
FROM (SELECT DISTINCT t.topic_id';

Line 1127Line 1201
								$post_visibility
$sql_fora
AND t.topic_id = p.topic_id

								$post_visibility
$sql_fora
AND t.topic_id = p.topic_id

								$sql_time" . (($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3') ? ')' : '');

								$sql_time" . ($this->db->get_sql_layer() == 'sqlite3' ? ')' : '');

					}
$result = $this->db->sql_query($sql);


					}
$result = $this->db->sql_query($sql);


Line 1186Line 1260
		if (!$total_results && $is_mysql)
{
// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.

		if (!$total_results && $is_mysql)
{
// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.

			$sql_calc = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql);

			$sql_calc = str_replace('SELECT ' . $select, 'SELECT SQL_CALC_FOUND_ROWS ' . $select, $sql);


$result = $this->db->sql_query($sql_calc);
$this->db->sql_freeresult($result);


$result = $this->db->sql_query($sql_calc);
$this->db->sql_freeresult($result);

Line 1249Line 1323
		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';

$min = $this->word_length['min'];

		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';

$min = $this->word_length['min'];

		$max = $this->word_length['max'];

 

$isset_min = $min - 1;



$isset_min = $min - 1;


Line 1285Line 1358
				* Note: this could be optimized. If the codepoint is lower than Hangul's range
* we know that it will also be lower than CJK ranges
*/

				* Note: this could be optimized. If the codepoint is lower than Hangul's range
* we know that it will also be lower than CJK ranges
*/

				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
&& (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
&& (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))

				if ((strncmp($word, self::UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, self::UTF8_HANGUL_LAST, 3) > 0)
&& (strncmp($word, self::UTF8_CJK_FIRST, 3) < 0 || strncmp($word, self::UTF8_CJK_LAST, 3) > 0)
&& (strncmp($word, self::UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, self::UTF8_CJK_B_LAST, 4) > 0))

				{
$word = strtok(' ');
continue;

				{
$word = strtok(' ');
continue;

Line 1532Line 1605
		// carry on ... it's okay ... I know when I'm not wanted boo hoo
if (!$this->config['fulltext_native_load_upd'])
{

		// carry on ... it's okay ... I know when I'm not wanted boo hoo
if (!$this->config['fulltext_native_load_upd'])
{

			set_config('search_last_gc', time(), true);

			$this->config->set('search_last_gc', time(), false);

			return;
}


			return;
}


Line 1567Line 1640

// by setting search_last_gc to the new time here we make sure that if a user reloads because the
// following query takes too long, he won't run into it again


// by setting search_last_gc to the new time here we make sure that if a user reloads because the
// following query takes too long, he won't run into it again

				set_config('search_last_gc', time(), true);

				$this->config->set('search_last_gc', time(), false);


// Delete the matches
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '


// Delete the matches
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '

Line 1583Line 1656
			$this->destroy_cache(array_unique($destroy_cache_words));
}


			$this->destroy_cache(array_unique($destroy_cache_words));
}


		set_config('search_last_gc', time(), true);

		$this->config->set('search_last_gc', time(), false);

	}

/**

	}

/**

Line 1593Line 1666
	{
switch ($this->db->get_sql_layer())
{

	{
switch ($this->db->get_sql_layer())
{

			case 'sqlite':

 
			case 'sqlite3':
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);

			case 'sqlite3':
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
$this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);

Line 1654Line 1726
	* @param	string	$allowed_chars	String of special chars to allow
* @param string $encoding Text encoding
* @return string Cleaned up text, only alphanumeric chars are left

	* @param	string	$allowed_chars	String of special chars to allow
* @param string $encoding Text encoding
* @return string Cleaned up text, only alphanumeric chars are left

	*
* @todo \normalizer::cleanup being able to be used?

 
	*/
protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
{
static $conv = array(), $conv_loaded = array();

	*/
protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
{
static $conv = array(), $conv_loaded = array();

		$words = $allow = array();

		$allow = array();


// Convert the text to UTF-8
$encoding = strtolower($encoding);


// Convert the text to UTF-8
$encoding = strtolower($encoding);

Line 1682Line 1752
		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);

/**

		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);

/**

		* Load the UTF-8 normalizer
*
* If we use it more widely, an instance of that class should be held in a
* a global variable instead

		* Normalize to NFC




		*/

		*/

		\utf_normalizer::nfc($text);

		$text = \Normalizer::normalize($text);


/**
* The first thing we do is:


/**
* The first thing we do is:

Line 1780Line 1847
			$utf_char = substr($text, $pos, $utf_len);
$pos += $utf_len;


			$utf_char = substr($text, $pos, $utf_len);
$pos += $utf_len;


			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
|| ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
|| ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))

			if (($utf_char >= self::UTF8_HANGUL_FIRST && $utf_char <= self::UTF8_HANGUL_LAST)
|| ($utf_char >= self::UTF8_CJK_FIRST && $utf_char <= self::UTF8_CJK_LAST)
|| ($utf_char >= self::UTF8_CJK_B_FIRST && $utf_char <= self::UTF8_CJK_B_LAST))

			{
/**
* All characters within these ranges are valid

			{
/**
* All characters within these ranges are valid

Line 1899Line 1966
		</dl>
<dl>
<dt><label for="fulltext_native_min_chars">' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

		</dl>
<dl>
<dt><label for="fulltext_native_min_chars">' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

			<dd><input id="fulltext_native_min_chars" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd>

			<dd><input id="fulltext_native_min_chars" type="number" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd>

		</dl>
<dl>
<dt><label for="fulltext_native_max_chars">' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

		</dl>
<dl>
<dt><label for="fulltext_native_max_chars">' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>

			<dd><input id="fulltext_native_max_chars" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd>

			<dd><input id="fulltext_native_max_chars" type="number" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd>

		</dl>
<dl>
<dt><label for="fulltext_native_common_thres">' . $this->user->lang['COMMON_WORD_THRESHOLD'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>

		</dl>
<dl>
<dt><label for="fulltext_native_common_thres">' . $this->user->lang['COMMON_WORD_THRESHOLD'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>

			<dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd>

			<dd><input id="fulltext_native_common_thres" type="text" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd>

		</dl>
';


		</dl>
';