N-gram technique. Version: 0.2.2 Author: Joel Lee Author URI: http://blog.bcse.info License: GNU General Public License 2.0 */ /** * Build the n-gram table * @param string $target could be posts, comments or both * @param integer $N */ function bfs_BuildNgramTable($target='both', $N=2) { global $wpdb; if ( $target == 'posts' || $target == 'both' ) { //Truncate n-gram table $wpdb->query("TRUNCATE {$wpdb->prefix}posts_ngram"); //Get posts $posts = $wpdb->get_results("SELECT ID, post_title, post_content FROM $wpdb->posts"); //Build initial n-gram data foreach ( $posts as $post ) { $post_title_ngram = bfs_StringToNgram($post->post_title, $N, TRUE, ' '); $post_content_ngram = bfs_StringToNgram($post->post_content, $N, TRUE, ' '); $wpdb->query("INSERT DELAYED INTO {$wpdb->prefix}posts_ngram (ID, post_title_ngram, post_content_ngram) VALUES ('{$post->ID}', '{$post_title_ngram}', '{$post_content_ngram}')"); } } if ( $target == 'comments' || $target == 'both' ) { //Truncate n-gram table $wpdb->query("TRUNCATE {$wpdb->prefix}comments_ngram"); //Get posts $comments = $wpdb->get_results("SELECT comment_post_ID, comment_content FROM $wpdb->comments WHERE comment_type NOT IN ('pingback', 'trackback')"); //Build initial n-gram data foreach ( $comments as $comment ) { $concated_comments[$comment->comment_post_ID][] = $comment->comment_content; } //var_dump($concated_comments); foreach ( $concated_comments as $comment_post_ID => $comment_content_ngram ) { $concated_comment_content_ngram = bfs_StringToNgram(implode(' ', $comment_content_ngram), $N, TRUE, ' '); $wpdb->query("INSERT DELAYED INTO {$wpdb->prefix}comments_ngram (ID, comment_content_ngram) VALUES ('{$comment_post_ID}', '{$concated_comment_content_ngram}')"); } } } /** * Update or insert a n-gram record * @param integer $id post primary key (post_ID) * @param integer $N */ function bfs_UpdatePostNgramRow($id, $N=2) { global $wpdb; $post = $wpdb->get_row("SELECT post_title, post_content FROM $wpdb->posts WHERE ID = '{$id}'"); $post_title_ngram = bfs_StringToNgram($post->post_title, $N, TRUE, ' '); $post_content_ngram = bfs_StringToNgram($post->post_content, $N, TRUE, ' '); $wpdb->query("INSERT DELAYED INTO {$wpdb->prefix}posts_ngram (ID, post_title_ngram, post_content_ngram) VALUES ('{$id}', '{$post_title_ngram}', '{$post_content_ngram}') ON DUPLICATE KEY UPDATE post_title_ngram = '{$post_title_ngram}', post_content_ngram = '{$post_content_ngram}'"); } /** * Update or insert a n-gram record * @param integer $id comment primary key (comment_ID) * @param integer $N */ function bfs_UpdateCommentNgramRow($id, $N=2) { global $wpdb; $post_id = $wpdb->get_var("SELECT comment_post_ID FROM $wpdb->comments WHERE comment_ID = '{$id}' LIMIT 1"); $comments = $wpdb->get_col("SELECT comment_content FROM $wpdb->comments WHERE comment_post_ID = '{$post_id}'"); $comment_content_ngram = bfs_StringToNgram(implode(' ', $comments), $N, TRUE, ' '); $wpdb->query("INSERT DELAYED INTO {$wpdb->prefix}comments_ngram (ID, comment_content_ngram) VALUES ('{$post_id}', '{$comment_content_ngram}') ON DUPLICATE KEY UPDATE comment_content_ngram = '{$comment_content_ngram}'"); } /** * Delete a n-gram record * @param integer $id post primary key (post_ID) */ function bfs_DeleteNgramRow($id) { global $wpdb; $wpdb->query("DELETE FROM {$wpdb->prefix}posts_ngram WHERE ID = $id LIMIT 1"); $wpdb->query("DELETE FROM {$wpdb->prefix}comments_ngram WHERE ID = $id LIMIT 1"); } /** * Generate search query * @param string $query what user input in search box * @param integer $N * @return string SQL full-text search query */ function bfs_GetSearchQuery($criteria, $N=2) { global $wp_query, $wpdb, $wp_version; $include_comment = get_option('bcse_bfs_include_comment'); //Construct limit $current_page = ( !empty($wp_query->query_vars['paged']) ) ? $wp_query->query_vars['paged'] : 1; $limit_diff = get_settings('posts_per_page'); $limit = ( ( $current_page - 1 ) * $limit_diff); //Build ngram query $ngram_query = bfs_ParseOperators($criteria, $N); $calc_found_rows = ( !empty($wp_version) && version_compare($wp_version, '2.1', '>') ) ? 'SQL_CALC_FOUND_ROWS' : '' ; $query = "SELECT\n" . " {$calc_found_rows} {$wpdb->posts}.*,\n" . " (MATCH({$wpdb->prefix}posts_ngram.post_title_ngram)\n" . " AGAINST('{$ngram_query}' IN BOOLEAN MODE)*100) +\n" . " (MATCH({$wpdb->prefix}posts_ngram.post_content_ngram)\n" . " AGAINST('{$ngram_query}' IN BOOLEAN MODE)*50)"; if ( $include_comment ) { $query .= " +\n" . " (MATCH({$wpdb->prefix}comments_ngram.comment_content_ngram)\n" . " AGAINST('{$ngram_query}' IN BOOLEAN MODE)*10)"; } $query .= " AS score\n" . "FROM\n" . " {$wpdb->posts} INNER JOIN {$wpdb->prefix}posts_ngram USING (ID)"; if ( $include_comment ) { $query .= " LEFT JOIN {$wpdb->prefix}comments_ngram USING (ID)"; } $query .= "\nWHERE\n" . " (MATCH({$wpdb->prefix}posts_ngram.post_title_ngram, {$wpdb->prefix}posts_ngram.post_content_ngram)\n" . " AGAINST('{$ngram_query}' IN BOOLEAN MODE)"; if ( $include_comment ) { $query .= "\n OR MATCH({$wpdb->prefix}comments_ngram.comment_content_ngram)\n" . " AGAINST('{$ngram_query}' IN BOOLEAN MODE)"; } $query .= ")\n" . " AND post_date_gmt <= '" . date('Y-m-d H:i:s') . "'\n" . " AND post_password = ''\n" . " AND (post_status = 'publish' OR post_status = 'private')\n" . "ORDER BY\n" . " score DESC, post_date_gmt DESC\n" . "LIMIT\n" . " " . $limit . ", " . $limit_diff; return $query; } /** * Generate search query * @param string $query what user input in search box * @param integer $N * @return string SQL full-text search query */ function bfs_BuildSearchQuery($query, $N=2) { global $wp_query, $wpdb, $wp_version; if ( !empty($_GET['s']) ) { $query = bfs_GetSearchQuery($_GET['s'], $N); } //echo ""; //For debugging return $query; } /** * Add Bigram Full-Text Search options page to Options sub-menu */ function bfs_AdminMenu() { add_options_page('Bigram Full-Text Search Options', 'Bigram Full-Text Search', 5, basename(__FILE__), 'bfs_OptionsPage'); } function bfs_ByteAutoUnit($byte, $precision=2) { if ( $byte >= 1048576 ) { return round($byte/1048576, $precision) . ' MB'; } elseif ( $byte >= 1024 ) { return round($byte/1024, $precision) . ' KB'; } else { return $byte . ' bytes'; } } /** * Bigram Full-Text Search options page */ function bfs_OptionsPage() { global $wpdb; //Initialize update flag $update = FALSE; //Get requests if ( !empty($_POST['rebuild_ngram']) ) { bfs_BuildNgramTable($_POST['rebuild_ngram']); } elseif ( !empty($_POST['uninstall']) && $_POST['uninstall'] == 'Uninstall' ) { //Drop n-gram table $wpdb->query("DROP TABLE IF EXISTS '{$wpdb->prefix}posts_ngram'"); $wpdb->query("DROP TABLE IF EXISTS '{$wpdb->prefix}comments_ngram'"); //Delete all options delete_option('bcse_bfs_version'); delete_option('bcse_bfs_include_comment'); delete_option('bcse_bfs_strip_html'); //Deactive plugin $current_active_plugins = get_option('active_plugins'); $plugin_filename = $_GET['page']; foreach ( $current_active_plugins as $key => $plugin ) { if ( strstr($plugin, $plugin_filename) ) { $plugin_id = $key; break; } } array_splice($current_active_plugins, $plugin_id, 1); update_option('active_plugins', $current_active_plugins); //Done! :p wp_redirect('plugins.php?deactivate=true'); } elseif ( !empty($_POST['submit']) ) { // "Search in comments" $include_comment = ( !empty($_POST['include_comment']) && $_POST['include_comment'] == 'true' ) ? TRUE : FALSE ; update_option('bcse_bfs_include_comment', $include_comment); // "Strip HTML" $strip_html = ( !empty($_POST['strip_html']) && $_POST['strip_html'] == 'true' ) ? TRUE : FALSE ; update_option('bcse_bfs_strip_html', $strip_html); // "Stop words list" $stop_words = !empty($_POST['stop_words']) ? $wpdb->escape($_POST['stop_words']) : array() ; update_option('bcse_bfs_stop_words', $stop_words); $update = TRUE; } //Get posts n-gram table status $posts_ngram_table_status = $wpdb->get_row("SHOW TABLE STATUS LIKE '{$wpdb->prefix}posts_ngram'"); $posts_ngram_data_length = bfs_ByteAutoUnit($posts_ngram_table_status->Data_length); $posts_ngram_index_length = bfs_ByteAutoUnit($posts_ngram_table_status->Index_length); //Get comments n-gram table status $comments_ngram_table_status = $wpdb->get_row("SHOW TABLE STATUS LIKE '{$wpdb->prefix}comments_ngram'"); $comments_ngram_data_length = bfs_ByteAutoUnit($comments_ngram_table_status->Data_length); $comments_ngram_index_length = bfs_ByteAutoUnit($comments_ngram_table_status->Index_length); //Get options $include_comment = get_option('bcse_bfs_include_comment'); $strip_html = get_option('bcse_bfs_strip_html'); $stop_words = get_option('bcse_bfs_stop_words'); if ( $update ) { echo '
Options saved.