* /lib/indexing.php
* @package Relevanssi
* @author Mikko Saari
* @license https://wordpress.org/about/gpl/ GNU General Public License
* @see https://www.relevanssi.com/
add_filter( 'relevanssi_index_get_post_type', 'relevanssi_index_get_post_type', 1, 2 );
* Returns the total number of posts to index.
* Counts the total number of posts to index, considering post type restrictions and
* the valid statuses.
* @return int The number of posts to index.
function relevanssi_count_total_posts() {
$extend = false;
return relevanssi_indexing_post_counter( $extend );
* Returns the number of posts missing from the index.
* Counts the total number of posts to index, considering post type restrictions and
* the valid statuses, and only looks at posts missing from the index.
* @return int The number of posts to index.
function relevanssi_count_missing_posts() {
$extend = true;
return relevanssi_indexing_post_counter( $extend );
* Counts the total number of posts.
* Counts the total number of posts to index, considering post type restrictions and
* the valid statuses.
* @global object $wpdb The WordPress database interface.
* @param boolean $extend If true, count only missing posts. If false, count all
* posts. Default false.
* @return int The number of posts to index.
function relevanssi_indexing_post_counter( $extend = false ) {
global $wpdb;
$restriction = relevanssi_post_type_restriction();
$valid_status = relevanssi_valid_status_array();
$limit = '';
$query = relevanssi_generate_indexing_query( $valid_status, $extend, $restriction, $limit );
$query = str_replace( 'SELECT post.ID', 'SELECT COUNT(post.ID)', $query );
* Allows actions to happen before the indexing query is run.
* The indexing query fetches a list of posts to index (either all posts or only
* those missing from the index, depending on the case).
do_action( 'relevanssi_pre_indexing_query' );
$count = $wpdb->get_var( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
if ( empty( $count ) ) {
$count = 0;
return $count;
* Generates the indexing query.
* Generates the query that fetches the list of posts to index. The parameters are
* assumed to be safely escaped. In regular use, the values are generated by
* Relevanssi functions which provide reliable source data.
* @global object $wpdb The WordPress database interface.
* @global array $relevanssi_variables The Relevanssi global variables array, used
* for table names.
* @param string $valid_status Comma-separated list of valid post statuses.
* @param boolean $extend If true, only care about posts missing from the
* index. If false, take all posts. Default false.
* @param string $restriction Query restrictions, MySQL code that restricts the
* posts fetched in the desired way. Default ''.
* @param string $limit MySQL code to set the LIMIT and OFFSET values.
* Default ''.
* @return string MySQL query to fetch the posts.
function relevanssi_generate_indexing_query( $valid_status, $extend = false, $restriction = '', $limit = '' ) {
global $wpdb, $relevanssi_variables;
$relevanssi_table = $relevanssi_variables['relevanssi_table'];
if ( 'off' === get_option( 'relevanssi_index_image_files', 'off' ) ) {
$restriction .= "
AND post.ID NOT IN (
SELECT ID FROM $wpdb->posts WHERE post_type = 'attachment'
AND post_mime_type LIKE 'image%' )
* Filters the WHERE restriction for indexing queries.
* This filter hook can be used to exclude posts from indexing as early as is
* possible.
* @since 4.0.9 / 2.1.5
* @param string The WHERE restriction.
* @param array $restriction An array with two values: 'mysql' for the MySQL
* query restriction to modify, 'reason' for the reason of restriction.
$restriction = apply_filters(
'mysql' => $restriction,
'reason' => '',
* Backwards compatibility for the change in filter parameters in Premium
* 2.8.0 in March 2020. Remove this eventually.
if ( is_string( $restriction ) ) {
$restriction['mysql'] = $restriction;
$restriction['reason'] = 'relevanssi_indexing_restriction filter';
if ( ! $extend ) {
$q = "SELECT post.ID
FROM $wpdb->posts post
LEFT JOIN $wpdb->posts parent ON (post.post_parent=parent.ID)
(post.post_status IN ($valid_status)
(parent.ID is not null AND (parent.post_status IN ($valid_status)))
OR (post.post_parent=0)
AND post.ID NOT IN (SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_relevanssi_hide_post' AND meta_value = 'on')
{$restriction['mysql']} ORDER BY post.ID DESC $limit";
} else {
$processed_post_filter = 'r.doc is null';
if ( 'noindex' !== get_option( 'relevanssi_internal_links', 'noindex' ) ) {
$processed_post_filter = "(r.doc is null OR r.doc NOT IN (SELECT DISTINCT(doc) FROM $relevanssi_table WHERE link = 0))";
$q = "SELECT post.ID
FROM $wpdb->posts post
LEFT JOIN $wpdb->posts parent ON (post.post_parent=parent.ID)
LEFT JOIN $relevanssi_table r ON (post.ID=r.doc)
(post.post_status IN ($valid_status)
(parent.ID is not null AND (parent.post_status IN ($valid_status)))
OR (post.post_parent=0)
AND post.ID NOT IN (SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_relevanssi_hide_post' AND meta_value = 'on')
{$restriction['mysql']} ORDER BY post.ID DESC $limit";
* Filters the Relevanssi indexing query.
* @param string $q The indexing MySQL query.
return apply_filters( 'relevanssi_indexing_query', $q );
* Generates a post type restriction.
* Generates a post type restriction for the MySQL query based on the
* 'relevanssi_index_post_types' option.
* @return string MySQL code for the post type restriction.
function relevanssi_post_type_restriction() {
$post_types = array();
$restriction = '';
$types = get_option( 'relevanssi_index_post_types' );
if ( ! is_array( $types ) ) {
$types = array();
foreach ( $types as $type ) {
if ( 'bogus' === $type ) {
// 'bogus' is not a real post type Relevanssi uses to make sure
// the post type setting is saved, even if it's empty.
if ( post_type_exists( $type ) ) {
// Only accept post types that actually exist.
array_push( $post_types, "'$type'" );
if ( empty( $post_types ) ) {
$post_types[] = "'no_post_types_chosen_so_index_no_posts'";
if ( count( $post_types ) > 0 ) {
$restriction = ' AND post.post_type IN (' . implode( ', ', $post_types ) . ') ';
return $restriction;
* Generates a list of valid post statuses.
* Generates a list of valid post statuses to use in indexing. By default,
* Relevanssi accepts 'publish', 'draft', 'private', 'pending', and 'future'. If
* you need to use a custom post status, you can use the
* 'relevanssi_valid_status' filter hook to add your own post status to the list
* of valid statuses.
* @param boolean $return_array If true, return array; default false, return
* string.
* @return string|array A comma-separated list of escaped valid post statuses
* ready for MySQL, or an unfiltered array, depending on the $return_array
* parameter.
function relevanssi_valid_status_array( $return_array = false ) {
* Filters the valid status array.
* Allows you to modify the array that contains all valid post statuses for
* Relevanssi post indexing.
* @return array Array of post statuses.
$valid_status_array = apply_filters( 'relevanssi_valid_status', array( 'publish', 'draft', 'private', 'pending', 'future' ) );
if ( $return_array ) {
return $valid_status_array;
$valid_status = array();
if ( is_array( $valid_status_array ) && count( $valid_status_array ) > 0 ) {
foreach ( $valid_status_array as $status ) {
$valid_status[] = "'" . esc_sql( $status ) . "'";
$valid_status = implode( ',', $valid_status );
} else {
// If the filter makes the setting a non-array, fall back to reasonable
// default values.
$valid_status = "'publish', 'draft', 'private', 'pending', 'future'";
return $valid_status;
* Builds the index.
* @global object $wpdb The WordPress database interface.
* @global array $relevanssi_variables The Relevanssi global variables array,
* used for table names.
* @param boolean|int $extend_offset If numeric, offsets the indexing by that
* amount. If true, doesn't truncate the index before indexing. If false,
* truncates index before indexing. Default false.
* @param boolean $verbose Not used anymore, kept for backwards
* compatibility.
* @param int $post_limit How many posts to index. Default null, no
* limit.
* @param boolean $is_ajax If true, indexing is done in AJAX context.
* Default false.
* @return array In AJAX context, returns array with two values:
* 'indexing_complete' tells whether indexing is completed or not, and 'indexed'
* returns the number of posts indexed. Outside AJAX context, these values are
* returned as an array in format of array(completed, posts indexed).
function relevanssi_build_index( $extend_offset = false, $verbose = null, $post_limit = null, $is_ajax = false ) {
global $wpdb, $relevanssi_variables;
$relevanssi_table = $relevanssi_variables['relevanssi_table'];
// Thanks to Julien Mession. This speeds up indexing a lot.
wp_suspend_cache_addition( true );
if ( false === $extend_offset ) {
// Truncate the index first.
// Taxonomy term, user profile and post type archive indexing.
if ( function_exists( 'relevanssi_premium_indexing' ) ) {
update_option( 'relevanssi_index', '' );
$indexing_query_args = relevanssi_indexing_query_args( $extend_offset, $post_limit );
// The values generated by these functions are safe to use for MySQL.
$restriction = relevanssi_post_type_restriction();
$valid_status = relevanssi_valid_status_array();
$query = relevanssi_generate_indexing_query(
/* This action documented earlier in lib/indexing.php. */
do_action( 'relevanssi_pre_indexing_query' );
$content = $wpdb->get_results( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
if ( defined( 'WP_CLI' ) && WP_CLI && function_exists( 'relevanssi_generate_progress_bar' ) ) {
// @codeCoverageIgnoreStart
$progress = relevanssi_generate_progress_bar( 'Indexing posts', count( $content ) );
// @codeCoverageIgnoreEnd
$custom_fields = relevanssi_get_custom_fields();
$n = 0;
$remove_first = false;
$bypass_global_post = true; // $bypassglobalpost set to true, because at this
// point global $post should be null, but in some cases it is not.
foreach ( $content as $post ) {
$result = relevanssi_index_doc( $post->ID, $remove_first, $custom_fields, $bypass_global_post );
if ( is_numeric( $result ) && $result > 0 ) {
// $n calculates the number of posts indexed.
if ( defined( 'WP_CLI' ) && WP_CLI && $progress ) {
// @codeCoverageIgnoreStart
// @codeCoverageIgnoreEnd
if ( defined( 'WP_CLI' ) && WP_CLI && $progress ) {
// @codeCoverageIgnoreStart
// @codeCoverageIgnoreEnd
// To prevent empty indices.
$wpdb->query( "ANALYZE TABLE $relevanssi_table" ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared
$complete = false;
$size = $indexing_query_args['size'];
if ( ( 0 === $size ) || ( count( $content ) < $size ) ) {
$complete = true;
update_option( 'relevanssi_indexed', 'done' );
// Update the document count variable.
wp_suspend_cache_addition( false );
if ( $is_ajax ) {
$response = array(
'indexing_complete' => $complete,
'indexed' => $n,
return $response;
return array( $complete, $n );
* Indexes one document.
* Different cases:
* Build index:
* - global $post is null, $index_post is a post object.
* Update post:
* - global $post has the original $post, $index_post is the ID of revision.
* Quick edit:
* - global $post is an array, $index_post is the ID of current revision.
* @global object $wpdb The WordPress database interface.
* @global array $relevanssi_variables The Relevanssi global variables array, used
* for table names.
* @global object $post The global post object.
* @param object|int $index_post The post to index, either post object or
* post ID.
* @param boolean $remove_first If true, remove the post from the index
* before indexing. Default false.
* @param array $custom_fields The custom fields that are indexed for the
* post. Default an empty string.
* @param boolean $bypass_global_post If true, do not use the global $post object.
* Default false.
* @param boolean $debug If true, echo out debugging information.
* Default false.
* @return string|int Number of insert queries run, or -1 if the indexing fails,
* or 'hide' in case the post is hidden or 'donotindex' if a filter blocks this.
function relevanssi_index_doc( $index_post, $remove_first = false, $custom_fields = '', $bypass_global_post = false, $debug = false ) {
global $wpdb, $post, $relevanssi_variables;
$relevanssi_table = $relevanssi_variables['relevanssi_table'];
$post_was_null = true;
$previous_post = null;
// Check if this is a Jetpack Contact Form entry.
if ( isset( $_REQUEST['contact-form-id'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification
return -1;
if ( isset( $post ) ) {
$post_was_null = false;
$previous_post = $post;
if ( empty( $post ) || $bypass_global_post ) {
$post = is_object( $index_post ) ? $index_post : get_post( $index_post ); // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
// Finally fetch the post again by ID. Complicated, yes, but unless we do this,
// we might end up indexing the post before the updates come in.
$post = isset( $post->ID ) ? get_post( $post->ID ) : null; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
if ( null === $post ) {
// At this point we should have something in $post; if not, quit.
if ( $previous_post || $post_was_null ) {
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
return -1;
// Post exclusion feature from Relevanssi Premium.
if ( function_exists( 'relevanssi_hide_post' ) ) {
if ( relevanssi_hide_post( $post->ID ) ) {
if ( $debug ) {
relevanssi_debug_echo( 'relevanssi_hide_post() returned true.' );
if ( $previous_post || $post_was_null ) {
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
__( 'Relevanssi index exclude', 'relevanssi' )
return 'hide';
$index_this_post = false;
$post->indexing_content = true;
$index_types = get_option( 'relevanssi_index_post_types', array() );
if ( is_array( $index_types ) && in_array( $post->post_type, $index_types, true ) ) {
$index_this_post = true;
* Filters whether a post is indexed or not.
* Allows you to filter whether a post is indexed or not.
* @param boolean|string If not false, the post is not indexed. The value
* can be a boolean, or a string containing an explanation for the
* exclusion. Default false.
* @param int The post ID.
$do_not_index = apply_filters( 'relevanssi_do_not_index', false, $post->ID );
if ( $do_not_index ) {
// Filter says no.
if ( true === $do_not_index ) {
$do_not_index = __( 'Blocked by a filter function', 'relevanssi' );
if ( $debug ) {
relevanssi_debug_echo( 'relevanssi_do_not_index says exclude, because: ' . $do_not_index );
update_post_meta( $post->ID, '_relevanssi_noindex_reason', $do_not_index );
$index_this_post = false;
if ( $remove_first ) {
// We are updating a post, so remove the old stuff first.
relevanssi_remove_doc( $post->ID, true );
if ( function_exists( 'relevanssi_remove_item' ) ) {
relevanssi_remove_item( $post->ID, 'post' );
if ( $debug ) {
relevanssi_debug_echo( 'Removed the post from the index.' );
// This needs to be here, after the call to relevanssi_remove_doc(), because
// otherwise a post that's in the index but shouldn't be there won't get removed.
if ( ! $index_this_post ) {
if ( $previous_post || $post_was_null ) {
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
return 'donotindex';
$n = 0;
* Allows filtering the indexed post before indexing.
* @param object $post The post object.
* @param object $post The post object again (in other uses for this filter, the
* second parameter actually makes sense).
$post = apply_filters( 'relevanssi_post_to_index', $post, $post ); // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
$min_word_length = get_option( 'relevanssi_min_word_length', 3 );
$insert_data = array();
if ( 'none' !== get_option( 'relevanssi_index_comments' ) ) {
$n += relevanssi_index_comments( $insert_data, $post->ID, $min_word_length, $debug );
$taxonomies = get_option( 'relevanssi_index_taxonomies_list', array() );
foreach ( $taxonomies as $taxonomy ) {
$n += relevanssi_index_taxonomy_terms( $insert_data, $post->ID, $taxonomy, $debug );
if ( 'on' === get_option( 'relevanssi_index_author' ) ) {
$n += relevanssi_index_author( $insert_data, $post->post_author, $min_word_length, $debug );
$n += relevanssi_index_custom_fields( $insert_data, $post->ID, $custom_fields, $min_word_length, $debug );
if (
isset( $post->post_excerpt )
&& ( 'on' === get_option( 'relevanssi_index_excerpt' ) || 'attachment' === $post->post_type )
) {
// Attachment caption is stored in the excerpt.
$n += relevanssi_index_excerpt( $insert_data, $post->post_excerpt, $min_word_length, $debug );
// Premium can index arbitrary MySQL columns.
if ( function_exists( 'relevanssi_index_mysql_columns' ) ) {
if ( $debug ) {
relevanssi_debug_echo( 'Indexing MySQL columns.' );
$insert_data = relevanssi_index_mysql_columns( $insert_data, $post->ID );
// Premium can index PDF content for the parent post.
if ( function_exists( 'relevanssi_index_pdf_for_parent' ) ) {
if ( $debug ) {
relevanssi_debug_echo( 'Indexing PDF content for parent post.' );
$insert_data = relevanssi_index_pdf_for_parent( $insert_data, $post->ID );
$n += relevanssi_index_title( $insert_data, $post, $min_word_length, $debug );
$n += relevanssi_index_content( $insert_data, $post, $min_word_length, $debug );
$values = relevanssi_convert_data_to_values( $insert_data, $post );
if ( ! empty( $values ) ) {
$values = implode( ', ', $values );
$query = "INSERT IGNORE INTO $relevanssi_table (doc, term, term_reverse, content, title, comment, tag, link, author, category, excerpt, taxonomy, customfield, type, taxonomy_detail, customfield_detail, mysqlcolumn) VALUES $values";
if ( $debug ) {
relevanssi_debug_echo( "Final indexing query:\n\t$query" );
$wpdb->query( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
delete_post_meta( $post->ID, '_relevanssi_noindex_reason' );
if ( $previous_post || $post_was_null ) {
$post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
return $n;
* Index taxonomy terms for given post and given taxonomy.
* @since 1.8
* @param array $insert_data Insert query data array, modified here.
* @param int $post_id The indexed post ID.
* @param string $taxonomy Taxonomy name.
* @param boolean $debug If true, print out debugging notices.
* @return int The number of new tokens added.
function relevanssi_index_taxonomy_terms( &$insert_data, $post_id, $taxonomy, $debug ) {
if ( $debug ) {
relevanssi_debug_echo( "Indexing taxonomy terms for $taxonomy" );
$n = 0;
$min_word_length = get_option( 'relevanssi_min_word_length', 3 );
$post_taxonomy_terms = get_the_terms( $post_id, $taxonomy );
if ( false === $post_taxonomy_terms ) {
return $n;
$term_string = '';
foreach ( $post_taxonomy_terms as $post_term ) {
if ( is_object( $post_term ) ) {
$term_string .= $post_term->name . ' ';
if ( $debug ) {
relevanssi_debug_echo( "Taxonomy term content for $taxonomy: $term_string" );
* Filters the taxonomy term content before indexing.
* @param string The taxonomy term content.
* @param string The taxonomy term name.
* @param string The taxonomy.
* @param int The post ID.
$term_string = apply_filters( 'relevanssi_tag_before_tokenize', trim( $term_string ), $post_term, $taxonomy, $post_id );
/** This filter is documented in lib/indexing.php */
$term_tokens = apply_filters(
relevanssi_tokenize( $term_string, true, $min_word_length ),
'taxonomy-' . $taxonomy
if ( count( $term_tokens ) > 0 ) {
foreach ( $term_tokens as $token => $count ) {
switch ( $taxonomy ) {
case 'post_tag':
$type = 'tag';
case 'category':
$type = 'category';
$type = 'taxonomy';
$insert_data[ $token ][ $type ] = isset( $insert_data[ $token ][ $type ] )
? $insert_data[ $token ][ $type ] + $count : $count;
$tax_detail = array();
if ( isset( $insert_data[ $token ]['taxonomy_detail'] ) ) {
$tax_detail = json_decode( $insert_data[ $token ]['taxonomy_detail'], true );
$tax_detail[ $taxonomy ] = isset( $tax_detail[ $taxonomy ] )
? $tax_detail[ $taxonomy ] + $count : $count;
$insert_data[ $token ]['taxonomy_detail'] = wp_json_encode( $tax_detail );
return $n;
* Updates child posts when a parent post changes status.
* Called from 'transition_post_status' action hook when a post is edited,
* published, or deleted. Will do the appropriate indexing action on the child
* posts and attachments.
* @author renaissancehack
* @param string $new_status The new status.
* @param string $old_status The old status.
* @param object $post The post object.
* @return null|array Null in problem cases, an array of 'removed' and
* 'indexed' values that show how many posts were indexed and removed.
function relevanssi_update_child_posts( $new_status, $old_status, $post ) {
// Safety check, for WordPress Editorial Calendar incompatibility.
if ( ! isset( $post ) || ! isset( $post->ID ) ) {
/** Documented in lib/indexing.php. */
$index_statuses = apply_filters(
array( 'publish', 'private', 'draft', 'pending', 'future' )
* Filters the attachment and revision post types.
* If you want attachment indexing to cover other post types than just
* attachment, you need to include the new post type in the array with
* this filter.
* @param array Array of post types, default 'attachment' and 'revision'.
$attachment_revision_types = apply_filters(
array( 'attachment', 'revision' )
$did_nothing = array(
'removed' => 0,
'indexed' => 0,
* Either:
* 1. New status equals old status.
* 2. Both new and old status are in the list of stati to index.
* 3. The post is an attachment or a revision.
* In any of these cases, do nothing.
if ( $new_status === $old_status ) {
return $did_nothing;
if ( in_array( $new_status, $index_statuses, true ) && in_array( $old_status, $index_statuses, true ) ) {
return $did_nothing;
if ( in_array( $post->post_type, $attachment_revision_types, true ) ) {
return $did_nothing;
$post_types = get_option( 'relevanssi_index_post_types' );
$args = array(
'post_parent' => $post->ID,
'post_type' => $post_types,
$removed = 0;
$indexed = 0;
$child_posts = get_children( $args );
if ( ! empty( $child_posts ) ) {
if ( ! in_array( $new_status, $index_statuses, true ) ) {
foreach ( $child_posts as $post ) {
relevanssi_remove_doc( $post->ID );
} else {
foreach ( $child_posts as $post ) {
relevanssi_publish( $post->ID );
return array(
'removed' => $removed,
'indexed' => $indexed,
* Indexes a published post.
* @param int $post_id The post ID.
* @param boolean $bypass_global_post If true, bypass the global $post object.
* Default false.
* @return string|int Returns 'auto-draft' if the post is an auto draft and
* thus skipped, or the relevanssi_index_doc() return value.
* @see relevanssi_index_doc()
function relevanssi_publish( $post_id, $bypass_global_post = false ) {
$post_status = get_post_status( $post_id );
if ( 'auto-draft' === $post_status ) {
return 'auto-draft';
$custom_fields = relevanssi_get_custom_fields();
return relevanssi_index_doc( $post_id, true, $custom_fields, $bypass_global_post );
* Indexes a post after publishing or modification.
* Hooks on to 'wp_insert_post' action hook and triggers when wp_insert_post() is
* used to add a post into the database. Doesn't use the global $post object, because
* that doesn't have the correct post.
* @author Lumpysimon.
* @global object $wpdb The WP database interface.
* @param int $post_id The post ID.
* @return string|int Returns 'auto-draft' if the post is an auto draft and
* thus skipped, 'removed' if the post is removed or the relevanssi_index_doc()
* return value from relevanssi_publish().
* @see relevanssi_publish()
function relevanssi_insert_edit( $post_id ) {
global $wpdb;
$post_status = get_post_status( $post_id );
if ( 'auto-draft' === $post_status ) {
return 'auto-draft';
if ( 'inherit' === $post_status ) {
// Get the post status from the parent post.
$parent_id = wp_get_post_parent_id( $post_id );
$post_status = get_post_status( $parent_id );
$index_this_post = true;
/* Documented in lib/indexing.php. */
$restriction = apply_filters(
'mysql' => '',
'reason' => '',
if ( ! empty( $restriction['mysql'] ) ) {
// Check the indexing restriction filter: if the post passes the filter, this
// should return the post ID.
$is_unrestricted = $wpdb->get_var(
"SELECT ID FROM $wpdb->posts AS post WHERE ID = $post_id {$restriction['mysql']}" // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared
if ( ! $is_unrestricted ) {
$index_this_post = false;
$return_array = true;
$index_statuses = relevanssi_valid_status_array( $return_array );
if ( ! in_array( $post_status, $index_statuses, true ) ) {
$index_this_post = false;
if ( $index_this_post ) {
$bypass_global_post = true;
$return_value = relevanssi_publish( $post_id, $bypass_global_post );
} else {
// The post isn't supposed to be indexed anymore, remove it from index.
relevanssi_remove_doc( $post_id );
trim( $restriction['reason'] )
$return_value = 'removed';
return $return_value;
* Updates comment indexing when comments are added, edited or deleted.
* @author OdditY
* @param int $comment_id Commend ID.
* @see relevanssi_comment_remove
* @see relevanssi_comment_edit
* @see relevanssi_publish
* @return int|string The relevanssi_publish return value, "nocommentfound" if
* the comment doesn't exist or "donotindex" if it cannot be indexed.
* comment indexing is disabled.
function relevanssi_index_comment( $comment_id ) {
$comment_indexing_type = get_option( 'relevanssi_index_comments' );
$no_pingbacks = false;
$post_id = null;
if ( 'normal' === $comment_indexing_type ) {
$no_pingbacks = true;
if ( 'normal' !== $comment_indexing_type && 'all' !== $comment_indexing_type ) {
return 'donotindex';
$comment = get_comment( $comment_id );
if ( ! $comment ) {
return 'nocommentfound';
if ( $no_pingbacks && ! empty( $comment->comment_type ) ) {
return 'donotindex';
if ( 1 !== intval( $comment->comment_approved ) ) {
// Comment isn't approved, do not index.
return 'donotindex';
return relevanssi_publish( $comment->comment_post_ID );
* Returns the comment text for a post.
* @param int $post_id The post ID.
* @return string All the comment content as a string that has the comment author
* and the comment text.
function relevanssi_get_comments( $post_id ) {
* If this filter returns true, the comments for the post are not indexed.
* @param boolean Return true to block the comment indexing. Default false.
* @param int $post_id The post ID.
if ( apply_filters( 'relevanssi_index_comments_exclude', false, $post_id ) ) {
return '';
$comment_indexing = get_option( 'relevanssi_index_comments' );
$comment_types = array( 'comment' );
$comment_string = '';
if ( 'all' === $comment_indexing ) {
$comment_types[] = 'pings';
if ( 'none' === $comment_indexing ) {
return '';
$offset = 0;
$limit = 20;
while ( true ) {
// Posts may have lots of comments. Do 20 at the time to avoid memory issues.
$args = array(
'offset' => $offset,
'number' => $limit,
'type' => $comment_types,
$comments = get_approved_comments( $post_id, $args );
if ( count( $comments ) === 0 ) {
foreach ( $comments as $comment ) {
* Filters the comment author before indexing.
* @param string Comment author display name.
* @param int The comment ID.
$comment_string .= ' ' . apply_filters( 'relevanssi_comment_author_to_index', $comment->comment_author, $comment->comment_ID );
* Filters the comment content before indexing.
* @param string Comment content.
* @param int The comment ID.
$comment_string .= ' ' . apply_filters( 'relevanssi_comment_content_to_index', $comment->comment_content, $comment->comment_ID );
$offset += $limit;
return $comment_string;
* Truncates the Relevanssi index.
* @global object $wpdb The WordPress database interface.
* @global array $relevanssi_variables The Relevanssi global variables array, used
* for table names.
* @return boolean True on success, false on failure.
function relevanssi_truncate_index() {
global $wpdb, $relevanssi_variables;
$relevanssi_table = $relevanssi_variables['relevanssi_table'];
return $wpdb->query( "TRUNCATE TABLE $relevanssi_table" ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared
* Remove post from the Relevanssi index.
* @global object $wpdb The WordPress database interface.
* @global array $relevanssi_variables The Relevanssi global variables array, used
* for table names.
* @param int $post_id The post ID.
* @param boolean $keep_internal_links If true, keep internal link indexing (a
* Premium feature). Default false.
function relevanssi_remove_doc( $post_id, $keep_internal_links = false ) {
if ( function_exists( 'relevanssi_premium_remove_doc' ) ) {
// Premium has a different method, because the index can include taxonomy
// terms and user profiles.
relevanssi_premium_remove_doc( $post_id, $keep_internal_links );
} else {
global $wpdb, $relevanssi_variables;
$post_id = intval( $post_id );
if ( empty( $post_id ) ) {
// No post ID specified.
$doc_count = get_option( 'relevanssi_doc_count' );
$rows_updated = $wpdb->query(
'DELETE FROM ' . $relevanssi_variables['relevanssi_table'] . ' WHERE doc=%d', // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared
if ( $rows_updated && $rows_updated > 0 ) {
update_option( 'relevanssi_doc_count', $doc_count - $rows_updated );
* Filter that allows you to set the index type based on the post type.
* @param string $type The index 'type' column value, default 'post'.
* @param object $post The post object containing the post being indexed.
* @return string The index 'type' column value, default 'post'.
function relevanssi_index_get_post_type( $type, $post ) {
if ( 'attachment' === $post->post_type ) {
$type = 'attachment';
return $type;
* Sets the indexing MySQL LIMIT parameter and other parameters.
* @param boolean|int $extend_offset If numeric, offsets the indexing by that
* amount. If true, doesn't truncate the index before indexing. If false,
* truncates index before indexing. Default false.
* @param int $post_limit How many posts to index. Default null, no
* limit.
* @return array Array with the LIMIT clause in 'limit', the extend boolean in
* 'extend' and the size integer in 'size'.
function relevanssi_indexing_query_args( $extend_offset, $post_limit ) {
$size = 0;
$limit = '';
$extend = false;
// If $post_limit parameter is present, numeric and > 0, use that.
if ( isset( $post_limit ) && is_numeric( $post_limit ) && $post_limit > 0 ) {
$size = $post_limit;
$limit = " LIMIT $post_limit";
if ( false === $extend_offset ) {
$extend = false;
} elseif ( ! is_numeric( $extend_offset ) ) {
// Extending, so do not truncate and skip the posts already in the index.
if ( ! $limit ) {
// CLI request with no limit specified.
$size = 200;
$limit = ' LIMIT 200';
$extend = true;
} else { // $extend_offset is numeric.
if ( ! $limit ) {
// CLI request with no limit specified.
$size = 200;
$limit = ' LIMIT 200';
if ( ! empty( $limit ) ) {
$limit .= " OFFSET $extend_offset";
// Extend is set to false, because $limit now has LIMIT and OFFSET.
$extend = false;
return array(
'limit' => $limit,
'extend' => $extend,
'size' => $size,
* Creates indexing queries for the comment content.
* @param array $insert_data The INSERT query data. Modified here.
* @param int $post_id The indexed post ID.
* @param int $min_word_length The minimum word length.
* @param boolean $debug If true, print out debug notices.
* @return int The number of tokens added to the data.
function relevanssi_index_comments( &$insert_data, $post_id, $min_word_length, $debug ) {
$n = 0;
if ( $debug ) {
relevanssi_debug_echo( 'Indexing comments.' );
$post_comments = relevanssi_get_comments( $post_id );
if ( ! empty( $post_comments ) ) {
$post_comments = relevanssi_strip_invisibles( $post_comments );
$post_comments = preg_replace( '/<[a-zA-Z\/][^>]*>/', ' ', $post_comments );
$post_comments = wp_strip_all_tags( $post_comments );
if ( $debug ) {
relevanssi_debug_echo( "Comment content: $post_comments" );
* Filters the indexing tokens before they are added to the $insert_data.
* @param array An array of token-frequency pairs.
* @param string The context of the tokens (eg. 'content', 'title').
* @return array The filtered tokens.
$post_comments_tokens = apply_filters(
relevanssi_tokenize( $post_comments, true, $min_word_length ),
if ( count( $post_comments_tokens ) > 0 ) {
foreach ( $post_comments_tokens as $token => $count ) {
$insert_data[ $token ]['comment'] = $count;
return $n;
* Creates indexing queries for the post author.
* @param array $insert_data The INSERT query data. Modified here.
* @param int $post_author The post author id.
* @param int $min_word_length The minimum word length.
* @param boolean $debug If true, print out debug notices.
* @return int The number of tokens added to the data.
function relevanssi_index_author( &$insert_data, $post_author, $min_word_length, $debug ) {
$n = 0;
$display_name = get_the_author_meta( 'display_name', $post_author );
/** This filter is documented in lib/indexing.php */
$name_tokens = apply_filters(
relevanssi_tokenize( $display_name, false, $min_word_length ),
if ( $debug ) {
relevanssi_debug_echo( 'Indexing post author as: ' . implode( ' ', array_keys( $name_tokens ) ) );
foreach ( $name_tokens as $token => $count ) {
if ( ! isset( $insert_data[ $token ]['author'] ) ) {
$insert_data[ $token ]['author'] = 0;
$insert_data[ $token ]['author'] += $count;
return $n;
* Creates indexing query data for custom fields.
* @param array $insert_data The INSERT query data. Modified here.
* @param int $post_id The indexed post ID.
* @param string|array $custom_fields The custom fields to index.
* @param int $min_word_length The minimum word length.
* @param boolean $debug If true, print out debug notices.
* @return int The number of tokens added to the data.
function relevanssi_index_custom_fields( &$insert_data, $post_id, $custom_fields, $min_word_length, $debug ) {
$n = 0;
$remove_underscore_fields = 'visible' === $custom_fields ? true : false;
if ( 'all' === $custom_fields || 'visible' === $custom_fields ) {
$custom_fields = get_post_custom_keys( $post_id );
* Filters the list of custom fields to index before indexing.
* @param array $custom_fields List of custom field names.
* @param int $post_id The post ID.
$custom_fields = apply_filters( 'relevanssi_index_custom_fields', $custom_fields, $post_id );
if ( ! is_array( $custom_fields ) ) {
return 0;
$custom_fields = array_unique( $custom_fields );
if ( $remove_underscore_fields ) {
$custom_fields = array_filter(
function( $field ) {
if ( '_relevanssi_pdf_content' === $field || '_' !== substr( $field, 0, 1 ) ) {
return $field;
// Premium includes some support for ACF repeater fields.
if ( function_exists( 'relevanssi_add_repeater_fields' ) ) {
relevanssi_add_repeater_fields( $custom_fields, $post_id );
$custom_fields = array_filter( $custom_fields );
if ( $debug ) {
relevanssi_debug_echo( 'Custom fields to index: ' . implode( ', ', $custom_fields ) );
foreach ( $custom_fields as $field ) {
* Filters the custom field value before indexing.
* @param array Custom field values.
* @param string $field The custom field name.
* @param int $post_id The post ID.
$values = apply_filters( 'relevanssi_custom_field_value', get_post_meta( $post_id, $field, false ), $field, $post_id );
if ( empty( $values ) || ! is_array( $values ) ) {
foreach ( $values as $value ) {
// Quick hack : allow indexing of PODS relationship custom fields // TMV.
if ( is_array( $value ) && isset( $value['post_title'] ) ) {
$value = $value['post_title'];
if ( function_exists( 'relevanssi_index_acf' ) ) {
// @codeCoverageIgnoreStart
// Handle ACF fields. Only defined when ACF is active.
$acf_tokens = relevanssi_index_acf( $insert_data, $post_id, $field, $value );
if ( $acf_tokens ) {
$n += $acf_tokens;
// @codeCoverageIgnoreEnd
// Flatten other arrays.
if ( is_array( $value ) ) {
$value = relevanssi_flatten_array( $value );
if ( $debug ) {
relevanssi_debug_echo( "\tKey: " . $field . ' - value: ' . $value );
$context = 'custom_field';
$remove_stops = true;
if ( '_relevanssi_pdf_content' === $field ) {
$context = 'body';
$remove_stops = 'body';
/** This filter is documented in lib/indexing.php */
$value_tokens = apply_filters(
relevanssi_tokenize( $value, $remove_stops, $min_word_length ),
foreach ( $value_tokens as $token => $count ) {
if ( ! isset( $insert_data[ $token ]['customfield'] ) ) {
$insert_data[ $token ]['customfield'] = 0;
$insert_data[ $token ]['customfield'] += $count;
// Premium indexes more detail about custom fields.
if ( function_exists( 'relevanssi_customfield_detail' ) ) {
$insert_data = relevanssi_customfield_detail(
return $n;
* Creates indexing queries for the excerpt content.
* @param array $insert_data The INSERT query data. Modified here.
* @param string $excerpt The post excerpt to index.
* @param int $min_word_length The minimum word length.
* @param boolean $debug If true, print out debug notices.
* @return int The number of tokens added to the data.
function relevanssi_index_excerpt( &$insert_data, $excerpt, $min_word_length, $debug ) {
$n = 0;
// Include excerpt for attachments which use post_excerpt for captions - modified by renaissancehack.
if ( $debug ) {
relevanssi_debug_echo( "Indexing post excerpt: $excerpt" );
/** This filter is documented in common/indexing.php */
$excerpt_tokens = apply_filters(
relevanssi_tokenize( $excerpt, true, $min_word_length ),
foreach ( $excerpt_tokens as $token => $count ) {
if ( ! isset( $insert_data[ $token ]['excerpt'] ) ) {
$insert_data[ $token ]['excerpt'] = 0;
$insert_data[ $token ]['excerpt'] += $count;
return $n;
* Creates indexing queries for post title.
* @param array $insert_data The INSERT query data. Modified here.
* @param object $post The post object.
* @param int $min_word_length The minimum word length.
* @param boolean $debug If true, print out debug notices.
* @return int The number of tokens added to the data.
function relevanssi_index_title( &$insert_data, $post, $min_word_length, $debug ) {
$n = 0;
if ( empty( $post->post_title ) ) {
return 0;
* If this filter returns false, titles are not indexed at all.
* @param boolean Return false to prevent titles from being indexed. Default true.
if ( ! apply_filters( 'relevanssi_index_titles', true ) ) {
return 0;
if ( $debug ) {
relevanssi_debug_echo( 'Indexing post title.' );
/** This filter is documented in wp-includes/post-template.php */
$filtered_title = apply_filters( 'the_title', $post->post_title, $post->ID );
* Filters the title before tokenizing and indexing.
* @param string $post->post_title The title.
* @param object $post The full post object.
$filtered_title = apply_filters( 'relevanssi_post_title_before_tokenize', $filtered_title, $post );
$title_tokens = relevanssi_tokenize(
* Filters whether stopwords should be removed from titles in tokenizing or not.
* @param boolean If true, remove stopwords. Default true.
apply_filters( 'relevanssi_remove_stopwords_in_titles', true ),
/** This filter is documented in lib/indexing.php */
$title_tokens = apply_filters( 'relevanssi_indexing_tokens', $title_tokens, 'title' );
if ( $debug ) {
relevanssi_debug_echo( "\tTitle, tokenized: " . implode( ' ', array_keys( $title_tokens ) ) );
foreach ( $title_tokens as $token => $count ) {
if ( ! isset( $insert_data[ $token ]['title'] ) ) {
$insert_data[ $token ]['title'] = 0;
$insert_data[ $token ]['title'] += $count;
return $n;
* Creates indexing queries for post content.
* @param array $insert_data The INSERT query data. Modified here.
* @param object $post The post object.
* @param int $min_word_length The minimum word length.
* @param boolean $debug If true, print out debug notices.
* @return int The number of tokens added to the data.
function relevanssi_index_content( &$insert_data, $post, $min_word_length, $debug ) {
$n = 0;
* If this filter returns false, post content is not indexed at all.
* @param boolean Return false to prevent post content from being indexed. Default true.
if ( ! apply_filters( 'relevanssi_index_content', true ) ) {
return $n;
if ( $debug ) {
relevanssi_debug_echo( 'Indexing post content.' );
remove_shortcode( 'noindex' );
add_shortcode( 'noindex', 'relevanssi_noindex_shortcode_indexing' );
* Filters the post content before indexing.
* @param string $post->post_content The post content.
* @param object $post The full post object.
$contents = apply_filters( 'relevanssi_post_content', $post->post_content, $post );
if ( $debug ) {
relevanssi_debug_echo( "\tPost content after relevanssi_post_content:\n$contents" );
* Can be used to add extra content to the post before indexing.
* @author Alexander Gieg
* @param string The additional content.
* @param object $post The post object.
$additional_content = trim( apply_filters( 'relevanssi_content_to_index', '', $post ) );
if ( ! empty( $additional_content ) ) {
$contents .= ' ' . $additional_content;
if ( $debug ) {
relevanssi_debug_echo( "\tAdditional content from relevanssi_content_to_index:\n$additional_content" );
if ( 'on' === get_option( 'relevanssi_expand_shortcodes' ) ) {
// TablePress support.
$tablepress_controller = relevanssi_enable_tablepress_shortcodes();
$post_before_shortcode = $post;
$contents = do_shortcode( $contents );
$post = $post_before_shortcode; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited
unset( $tablepress_controller );
} else {
$contents = strip_shortcodes( $contents );
remove_shortcode( 'noindex' );
add_shortcode( 'noindex', 'relevanssi_noindex_shortcode' );
* Filters the post content after shortcodes but before HTML stripping.
* @param string $contents The post content.
* @param object $post The full post object.
$contents = apply_filters(
$contents = relevanssi_strip_invisibles( $contents );
// Premium feature for better control over internal links.
if ( function_exists( 'relevanssi_process_internal_links' ) ) {
$contents = relevanssi_process_internal_links( $contents, $post->ID );
$contents = preg_replace( '/<[a-zA-Z\/][^>]*>/', ' ', $contents );
$contents = wp_strip_all_tags( $contents );
* Filters the post content in indexing before tokenization.
* @param string $contents The post content.
* @param object $post The full post object.
$contents = apply_filters( 'relevanssi_post_content_before_tokenize', $contents, $post );
/** This filter is documented in lib/indexing.php */
$content_tokens = apply_filters(
relevanssi_tokenize( $contents, 'body', $min_word_length ),
if ( $debug ) {
relevanssi_debug_echo( "\tContent, tokenized:\n" . implode( ' ', array_keys( $content_tokens ) ) );
foreach ( $content_tokens as $token => $count ) {
if ( ! isset( $insert_data[ $token ]['content'] ) ) {
$insert_data[ $token ]['content'] = 0;
$insert_data[ $token ]['content'] += $count;
return $n;
* Disables problematic shortcode before Relevanssi indexing to avoid problems.
* Uses the `relevanssi_disabled_shortcodes` filter hook to filter the
* shortcodes. The disabled shortcodes are first removed with
* remove_shortcode() and then given a reference to `__return_empty_string`.
* The option `relevanssi_disable_shortcodes` is also supported for legacy
* reasons, but it's better to use the filter instead.
function relevanssi_disable_shortcodes() {
$default_disables = array(
'contact-form', // Jetpack Contact Form causes an error message.
'starrater', // GD Star Rating rater shortcode causes problems.
'responsive-flipbook', // Responsive Flipbook causes problems.
'avatar_upload', // WP User Avatar is incompatible.
'product_categories', // A problematic WooCommerce shortcode.
'recent_products', // A problematic WooCommerce shortcode.
'php', // PHP Code for Posts.
'watupro', // Watu PRO doesn't co-operate.
'starbox', // Starbox shortcode breaks Relevanssi.
'cfdb-save-form-post', // Contact Form DB.
'woocommerce_cart', // WooCommerce.
'maxmegamenu', // Max Mega Menu.
'searchandfilter', // Search and Filter.
'downloads', // Easy Digital Downloads.
'swpm_protected', // Simple Membership Partially Protected content.
'gravityform', // Gravity Forms.
'sdm_latest_downloads', // SDM Simple Download Monitor.
'slimstat', // Slimstat Analytics.
'ninja_tables', // Ninja Tables.
$disable_shortcodes = get_option( 'relevanssi_disable_shortcodes' );
$shortcodes = explode( ',', $disable_shortcodes );
* Filters the shortcodes Relevanssi disables while indexing posts.
* @param array An array of shortcodes disabled.
* @return array An array of shortcodes disabled.
$shortcodes = apply_filters(
array_unique( array_merge( $shortcodes, $default_disables ) )
foreach ( $shortcodes as $shortcode ) {
if ( empty( $shortcode ) ) {
remove_shortcode( trim( $shortcode ) );
add_shortcode( trim( $shortcode ), '__return_empty_string' );
* Converts INSERT query data array to query values.
* Takes the collected data and converts it to values that can be fed into
* an INSERT query using $wpdb->prepare(). Provides filters to modify the
* insert query values before and after the conversion.
* @global $wpdb The WordPress database interface.
* @global $relevanssi_variables Used for the Relevanssi db table name.
* @param array $insert_data An array of term => data pairs, where data has
* token counts for the term in different contexts.
* @param object $post The indexed post object.
* @return array An array of values clauses for an INSERT query.
function relevanssi_convert_data_to_values( $insert_data, $post ) {
global $wpdb, $relevanssi_variables;
$charset = $wpdb->get_col_charset(
* Sets the indexed post 'type' column in the index.
* Default value is 'post', but other common values include 'attachment',
* 'user' and taxonomy name.
* @param string Type value.
* @param object The post object for the current post.
$type = apply_filters( 'relevanssi_index_get_post_type', 'post', $post );
* Filters the indexing data before it is converted to INSERT queries.
* @param array $insert_data All the tokens and their counts.
* @param object $post The post object.
$insert_data = apply_filters( 'relevanssi_indexing_data', $insert_data, $post );
$values = array();
foreach ( $insert_data as $term => $data ) {
$content = isset( $data['content'] ) ? $data['content'] : 0;
$title = isset( $data['title'] ) ? $data['title'] : 0;
$comment = isset( $data['comment'] ) ? $data['comment'] : 0;
$tag = isset( $data['tag'] ) ? $data['tag'] : 0;
$link = isset( $data['link'] ) ? $data['link'] : 0;
$author = isset( $data['author'] ) ? $data['author'] : 0;
$category = isset( $data['category'] ) ? $data['category'] : 0;
$excerpt = isset( $data['excerpt'] ) ? $data['excerpt'] : 0;
$taxonomy = isset( $data['taxonomy'] ) ? $data['taxonomy'] : 0;
$customfield = isset( $data['customfield'] ) ? $data['customfield'] : 0;
$mysqlcolumn = isset( $data['mysqlcolumn'] ) ? $data['mysqlcolumn'] : 0;
$taxonomy_detail = isset( $data['taxonomy_detail'] ) ? $data['taxonomy_detail'] : '';
$customfield_detail = isset( $data['customfield_detail'] ) ? $data['customfield_detail'] : '';
if ( 'utf8' === $charset ) {
$term = wp_encode_emoji( $term );
$term = trim( $term );
$value = $wpdb->prepare(
'(%d, %s, REVERSE(%s), %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s, %s, %d)',
array_push( $values, $value );
* Filters the INSERT query VALUES sections before they are inserted in the INSERT query.
* @param array $values Value sets.
* @param object $post The post object.
return apply_filters( 'relevanssi_indexing_values', $values, $post );