get_var( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared if ( empty( $count ) ) { $count = 0; } return $count; } /** * Generates the indexing query. * * Generates the query that fetches the list of posts to index. The parameters are * assumed to be safely escaped. In regular use, the values are generated by * Relevanssi functions which provide reliable source data. * * @global object $wpdb The WordPress database interface. * @global array $relevanssi_variables The Relevanssi global variables array, used * for table names. * * @param string $valid_status Comma-separated list of valid post statuses. * @param boolean $extend If true, only care about posts missing from the * index. If false, take all posts. Default false. * @param string $restriction Query restrictions, MySQL code that restricts the * posts fetched in the desired way. Default ''. * @param string $limit MySQL code to set the LIMIT and OFFSET values. * Default ''. * * @return string MySQL query to fetch the posts. */ function relevanssi_generate_indexing_query( $valid_status, $extend = false, $restriction = '', $limit = '' ) { global $wpdb, $relevanssi_variables; $relevanssi_table = $relevanssi_variables['relevanssi_table']; if ( 'off' === get_option( 'relevanssi_index_image_files', 'off' ) ) { $restriction .= " AND post.ID NOT IN ( SELECT ID FROM $wpdb->posts WHERE post_type = 'attachment' AND post_mime_type LIKE 'image%' ) "; } /** * Filters the WHERE restriction for indexing queries. * * This filter hook can be used to exclude posts from indexing as early as is * possible. * * @since 4.0.9 / 2.1.5 * * @param string The WHERE restriction. * * @param array $restriction An array with two values: 'mysql' for the MySQL * query restriction to modify, 'reason' for the reason of restriction. */ $restriction = apply_filters( 'relevanssi_indexing_restriction', array( 'mysql' => $restriction, 'reason' => '', ) ); /** * Backwards compatibility for the change in filter parameters in Premium * 2.8.0 in March 2020. Remove this eventually. */ if ( is_string( $restriction ) ) { $restriction['mysql'] = $restriction; $restriction['reason'] = 'relevanssi_indexing_restriction filter'; } if ( ! $extend ) { $q = "SELECT post.ID FROM $wpdb->posts post LEFT JOIN $wpdb->posts parent ON (post.post_parent=parent.ID) WHERE (post.post_status IN ($valid_status) OR (post.post_status='inherit' AND( (parent.ID is not null AND (parent.post_status IN ($valid_status))) OR (post.post_parent=0) ) )) AND post.ID NOT IN (SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_relevanssi_hide_post' AND meta_value = 'on') {$restriction['mysql']} ORDER BY post.ID DESC $limit"; } else { $processed_post_filter = 'r.doc is null'; if ( 'noindex' !== get_option( 'relevanssi_internal_links', 'noindex' ) ) { $processed_post_filter = "(r.doc is null OR r.doc NOT IN (SELECT DISTINCT(doc) FROM $relevanssi_table WHERE link = 0))"; } $q = "SELECT post.ID FROM $wpdb->posts post LEFT JOIN $wpdb->posts parent ON (post.post_parent=parent.ID) LEFT JOIN $relevanssi_table r ON (post.ID=r.doc) WHERE $processed_post_filter AND (post.post_status IN ($valid_status) OR (post.post_status='inherit' AND( (parent.ID is not null AND (parent.post_status IN ($valid_status))) OR (post.post_parent=0) ) ) ) AND post.ID NOT IN (SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_relevanssi_hide_post' AND meta_value = 'on') {$restriction['mysql']} ORDER BY post.ID DESC $limit"; } /** * Filters the Relevanssi indexing query. * * @param string $q The indexing MySQL query. */ return apply_filters( 'relevanssi_indexing_query', $q ); } /** * Generates a post type restriction. * * Generates a post type restriction for the MySQL query based on the * 'relevanssi_index_post_types' option. * * @return string MySQL code for the post type restriction. */ function relevanssi_post_type_restriction() { $post_types = array(); $restriction = ''; $types = get_option( 'relevanssi_index_post_types' ); if ( ! is_array( $types ) ) { $types = array(); } foreach ( $types as $type ) { if ( 'bogus' === $type ) { // 'bogus' is not a real post type Relevanssi uses to make sure // the post type setting is saved, even if it's empty. continue; } if ( post_type_exists( $type ) ) { // Only accept post types that actually exist. array_push( $post_types, "'$type'" ); } } if ( empty( $post_types ) ) { $post_types[] = "'no_post_types_chosen_so_index_no_posts'"; } if ( count( $post_types ) > 0 ) { $restriction = ' AND post.post_type IN (' . implode( ', ', $post_types ) . ') '; } return $restriction; } /** * Generates a list of valid post statuses. * * Generates a list of valid post statuses to use in indexing. By default, * Relevanssi accepts 'publish', 'draft', 'private', 'pending', and 'future'. If * you need to use a custom post status, you can use the * 'relevanssi_valid_status' filter hook to add your own post status to the list * of valid statuses. * * @param boolean $return_array If true, return array; default false, return * string. * * @return string|array A comma-separated list of escaped valid post statuses * ready for MySQL, or an unfiltered array, depending on the $return_array * parameter. */ function relevanssi_valid_status_array( $return_array = false ) { /** * Filters the valid status array. * * Allows you to modify the array that contains all valid post statuses for * Relevanssi post indexing. * * @return array Array of post statuses. */ $valid_status_array = apply_filters( 'relevanssi_valid_status', array( 'publish', 'draft', 'private', 'pending', 'future' ) ); if ( $return_array ) { return $valid_status_array; } $valid_status = array(); if ( is_array( $valid_status_array ) && count( $valid_status_array ) > 0 ) { foreach ( $valid_status_array as $status ) { $valid_status[] = "'" . esc_sql( $status ) . "'"; } $valid_status = implode( ',', $valid_status ); } else { // If the filter makes the setting a non-array, fall back to reasonable // default values. $valid_status = "'publish', 'draft', 'private', 'pending', 'future'"; } return $valid_status; } /** * Builds the index. * * @global object $wpdb The WordPress database interface. * @global array $relevanssi_variables The Relevanssi global variables array, * used for table names. * * @param boolean|int $extend_offset If numeric, offsets the indexing by that * amount. If true, doesn't truncate the index before indexing. If false, * truncates index before indexing. Default false. * @param boolean $verbose Not used anymore, kept for backwards * compatibility. * @param int $post_limit How many posts to index. Default null, no * limit. * @param boolean $is_ajax If true, indexing is done in AJAX context. * Default false. * * @return array In AJAX context, returns array with two values: * 'indexing_complete' tells whether indexing is completed or not, and 'indexed' * returns the number of posts indexed. Outside AJAX context, these values are * returned as an array in format of array(completed, posts indexed). */ function relevanssi_build_index( $extend_offset = false, $verbose = null, $post_limit = null, $is_ajax = false ) { global $wpdb, $relevanssi_variables; $relevanssi_table = $relevanssi_variables['relevanssi_table']; // Thanks to Julien Mession. This speeds up indexing a lot. wp_suspend_cache_addition( true ); if ( false === $extend_offset ) { // Truncate the index first. relevanssi_truncate_index(); // Taxonomy term, user profile and post type archive indexing. if ( function_exists( 'relevanssi_premium_indexing' ) ) { relevanssi_premium_indexing(); } update_option( 'relevanssi_index', '' ); } $indexing_query_args = relevanssi_indexing_query_args( $extend_offset, $post_limit ); // The values generated by these functions are safe to use for MySQL. $restriction = relevanssi_post_type_restriction(); $valid_status = relevanssi_valid_status_array(); $query = relevanssi_generate_indexing_query( $valid_status, $indexing_query_args['extend'], $restriction, $indexing_query_args['limit'] ); /* This action documented earlier in lib/indexing.php. */ do_action( 'relevanssi_pre_indexing_query' ); $content = $wpdb->get_results( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared if ( defined( 'WP_CLI' ) && WP_CLI && function_exists( 'relevanssi_generate_progress_bar' ) ) { // @codeCoverageIgnoreStart $progress = relevanssi_generate_progress_bar( 'Indexing posts', count( $content ) ); // @codeCoverageIgnoreEnd } $custom_fields = relevanssi_get_custom_fields(); $n = 0; $remove_first = false; $bypass_global_post = true; // $bypassglobalpost set to true, because at this // point global $post should be null, but in some cases it is not. foreach ( $content as $post ) { $result = relevanssi_index_doc( $post->ID, $remove_first, $custom_fields, $bypass_global_post ); if ( is_numeric( $result ) && $result > 0 ) { // $n calculates the number of posts indexed. $n++; } if ( defined( 'WP_CLI' ) && WP_CLI && $progress ) { // @codeCoverageIgnoreStart $progress->tick(); // @codeCoverageIgnoreEnd } } if ( defined( 'WP_CLI' ) && WP_CLI && $progress ) { // @codeCoverageIgnoreStart $progress->finish(); // @codeCoverageIgnoreEnd } // To prevent empty indices. $wpdb->query( "ANALYZE TABLE $relevanssi_table" ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared $complete = false; $size = $indexing_query_args['size']; if ( ( 0 === $size ) || ( count( $content ) < $size ) ) { $complete = true; update_option( 'relevanssi_indexed', 'done' ); } // Update the document count variable. relevanssi_update_doc_count(); wp_suspend_cache_addition( false ); if ( $is_ajax ) { $response = array( 'indexing_complete' => $complete, 'indexed' => $n, ); return $response; } return array( $complete, $n ); } /** * Indexes one document. * * Different cases: * * Build index: * - global $post is null, $index_post is a post object. * * Update post: * - global $post has the original $post, $index_post is the ID of revision. * * Quick edit: * - global $post is an array, $index_post is the ID of current revision. * * @global object $wpdb The WordPress database interface. * @global array $relevanssi_variables The Relevanssi global variables array, used * for table names. * @global object $post The global post object. * * @param object|int $index_post The post to index, either post object or * post ID. * @param boolean $remove_first If true, remove the post from the index * before indexing. Default false. * @param array $custom_fields The custom fields that are indexed for the * post. Default an empty string. * @param boolean $bypass_global_post If true, do not use the global $post object. * Default false. * @param boolean $debug If true, echo out debugging information. * Default false. * * @return string|int Number of insert queries run, or -1 if the indexing fails, * or 'hide' in case the post is hidden or 'donotindex' if a filter blocks this. */ function relevanssi_index_doc( $index_post, $remove_first = false, $custom_fields = '', $bypass_global_post = false, $debug = false ) { global $wpdb, $post, $relevanssi_variables; $relevanssi_table = $relevanssi_variables['relevanssi_table']; $post_was_null = true; $previous_post = null; // Check if this is a Jetpack Contact Form entry. if ( isset( $_REQUEST['contact-form-id'] ) ) { // phpcs:ignore WordPress.Security.NonceVerification return -1; } if ( isset( $post ) ) { $post_was_null = false; $previous_post = $post; } if ( empty( $post ) || $bypass_global_post ) { $post = is_object( $index_post ) ? $index_post : get_post( $index_post ); // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited } // Finally fetch the post again by ID. Complicated, yes, but unless we do this, // we might end up indexing the post before the updates come in. $post = isset( $post->ID ) ? get_post( $post->ID ) : null; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited if ( null === $post ) { // At this point we should have something in $post; if not, quit. if ( $previous_post || $post_was_null ) { $post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited } return -1; } // Post exclusion feature from Relevanssi Premium. if ( function_exists( 'relevanssi_hide_post' ) ) { if ( relevanssi_hide_post( $post->ID ) ) { if ( $debug ) { relevanssi_debug_echo( 'relevanssi_hide_post() returned true.' ); } if ( $previous_post || $post_was_null ) { $post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited } update_post_meta( $post->ID, '_relevanssi_noindex_reason', __( 'Relevanssi index exclude', 'relevanssi' ) ); return 'hide'; } } $index_this_post = false; $post->indexing_content = true; $index_types = get_option( 'relevanssi_index_post_types', array() ); if ( is_array( $index_types ) && in_array( $post->post_type, $index_types, true ) ) { $index_this_post = true; } /** * Filters whether a post is indexed or not. * * Allows you to filter whether a post is indexed or not. * * @param boolean|string If not false, the post is not indexed. The value * can be a boolean, or a string containing an explanation for the * exclusion. Default false. * @param int The post ID. */ $do_not_index = apply_filters( 'relevanssi_do_not_index', false, $post->ID ); if ( $do_not_index ) { // Filter says no. if ( true === $do_not_index ) { $do_not_index = __( 'Blocked by a filter function', 'relevanssi' ); } if ( $debug ) { relevanssi_debug_echo( 'relevanssi_do_not_index says exclude, because: ' . $do_not_index ); } update_post_meta( $post->ID, '_relevanssi_noindex_reason', $do_not_index ); $index_this_post = false; } if ( $remove_first ) { // We are updating a post, so remove the old stuff first. relevanssi_remove_doc( $post->ID, true ); if ( function_exists( 'relevanssi_remove_item' ) ) { relevanssi_remove_item( $post->ID, 'post' ); } if ( $debug ) { relevanssi_debug_echo( 'Removed the post from the index.' ); } } // This needs to be here, after the call to relevanssi_remove_doc(), because // otherwise a post that's in the index but shouldn't be there won't get removed. if ( ! $index_this_post ) { if ( $previous_post || $post_was_null ) { $post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited } return 'donotindex'; } $n = 0; /** * Allows filtering the indexed post before indexing. * * @param object $post The post object. * @param object $post The post object again (in other uses for this filter, the * second parameter actually makes sense). */ $post = apply_filters( 'relevanssi_post_to_index', $post, $post ); // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited $min_word_length = get_option( 'relevanssi_min_word_length', 3 ); $insert_data = array(); if ( 'none' !== get_option( 'relevanssi_index_comments' ) ) { $n += relevanssi_index_comments( $insert_data, $post->ID, $min_word_length, $debug ); } $taxonomies = get_option( 'relevanssi_index_taxonomies_list', array() ); foreach ( $taxonomies as $taxonomy ) { $n += relevanssi_index_taxonomy_terms( $insert_data, $post->ID, $taxonomy, $debug ); } if ( 'on' === get_option( 'relevanssi_index_author' ) ) { $n += relevanssi_index_author( $insert_data, $post->post_author, $min_word_length, $debug ); } $n += relevanssi_index_custom_fields( $insert_data, $post->ID, $custom_fields, $min_word_length, $debug ); if ( isset( $post->post_excerpt ) && ( 'on' === get_option( 'relevanssi_index_excerpt' ) || 'attachment' === $post->post_type ) ) { // Attachment caption is stored in the excerpt. $n += relevanssi_index_excerpt( $insert_data, $post->post_excerpt, $min_word_length, $debug ); } // Premium can index arbitrary MySQL columns. if ( function_exists( 'relevanssi_index_mysql_columns' ) ) { if ( $debug ) { relevanssi_debug_echo( 'Indexing MySQL columns.' ); } $insert_data = relevanssi_index_mysql_columns( $insert_data, $post->ID ); } // Premium can index PDF content for the parent post. if ( function_exists( 'relevanssi_index_pdf_for_parent' ) ) { if ( $debug ) { relevanssi_debug_echo( 'Indexing PDF content for parent post.' ); } $insert_data = relevanssi_index_pdf_for_parent( $insert_data, $post->ID ); } $n += relevanssi_index_title( $insert_data, $post, $min_word_length, $debug ); $n += relevanssi_index_content( $insert_data, $post, $min_word_length, $debug ); $values = relevanssi_convert_data_to_values( $insert_data, $post ); if ( ! empty( $values ) ) { $values = implode( ', ', $values ); $query = "INSERT IGNORE INTO $relevanssi_table (doc, term, term_reverse, content, title, comment, tag, link, author, category, excerpt, taxonomy, customfield, type, taxonomy_detail, customfield_detail, mysqlcolumn) VALUES $values"; if ( $debug ) { relevanssi_debug_echo( "Final indexing query:\n\t$query" ); } $wpdb->query( $query ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared } delete_post_meta( $post->ID, '_relevanssi_noindex_reason' ); if ( $previous_post || $post_was_null ) { $post = $previous_post; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited } return $n; } /** * Index taxonomy terms for given post and given taxonomy. * * @since 1.8 * * @param array $insert_data Insert query data array, modified here. * @param int $post_id The indexed post ID. * @param string $taxonomy Taxonomy name. * @param boolean $debug If true, print out debugging notices. * * @return int The number of new tokens added. */ function relevanssi_index_taxonomy_terms( &$insert_data, $post_id, $taxonomy, $debug ) { if ( $debug ) { relevanssi_debug_echo( "Indexing taxonomy terms for $taxonomy" ); } $n = 0; $min_word_length = get_option( 'relevanssi_min_word_length', 3 ); $post_taxonomy_terms = get_the_terms( $post_id, $taxonomy ); if ( false === $post_taxonomy_terms ) { return $n; } $term_string = ''; foreach ( $post_taxonomy_terms as $post_term ) { if ( is_object( $post_term ) ) { $term_string .= $post_term->name . ' '; } } if ( $debug ) { relevanssi_debug_echo( "Taxonomy term content for $taxonomy: $term_string" ); } /** * Filters the taxonomy term content before indexing. * * @param string The taxonomy term content. * @param string The taxonomy term name. * @param string The taxonomy. * @param int The post ID. */ $term_string = apply_filters( 'relevanssi_tag_before_tokenize', trim( $term_string ), $post_term, $taxonomy, $post_id ); /** This filter is documented in lib/indexing.php */ $term_tokens = apply_filters( 'relevanssi_indexing_tokens', relevanssi_tokenize( $term_string, true, $min_word_length ), 'taxonomy-' . $taxonomy ); if ( count( $term_tokens ) > 0 ) { foreach ( $term_tokens as $token => $count ) { $n++; switch ( $taxonomy ) { case 'post_tag': $type = 'tag'; break; case 'category': $type = 'category'; break; default: $type = 'taxonomy'; } $insert_data[ $token ][ $type ] = isset( $insert_data[ $token ][ $type ] ) ? $insert_data[ $token ][ $type ] + $count : $count; $tax_detail = array(); if ( isset( $insert_data[ $token ]['taxonomy_detail'] ) ) { $tax_detail = json_decode( $insert_data[ $token ]['taxonomy_detail'], true ); } $tax_detail[ $taxonomy ] = isset( $tax_detail[ $taxonomy ] ) ? $tax_detail[ $taxonomy ] + $count : $count; $insert_data[ $token ]['taxonomy_detail'] = wp_json_encode( $tax_detail ); } } return $n; } /** * Updates child posts when a parent post changes status. * * Called from 'transition_post_status' action hook when a post is edited, * published, or deleted. Will do the appropriate indexing action on the child * posts and attachments. * * @author renaissancehack * * @param string $new_status The new status. * @param string $old_status The old status. * @param object $post The post object. * * @return null|array Null in problem cases, an array of 'removed' and * 'indexed' values that show how many posts were indexed and removed. */ function relevanssi_update_child_posts( $new_status, $old_status, $post ) { // Safety check, for WordPress Editorial Calendar incompatibility. if ( ! isset( $post ) || ! isset( $post->ID ) ) { return; } /** Documented in lib/indexing.php. */ $index_statuses = apply_filters( 'relevanssi_valid_status', array( 'publish', 'private', 'draft', 'pending', 'future' ) ); /** * Filters the attachment and revision post types. * * If you want attachment indexing to cover other post types than just * attachment, you need to include the new post type in the array with * this filter. * * @param array Array of post types, default 'attachment' and 'revision'. */ $attachment_revision_types = apply_filters( 'relevanssi_index_attachment_revision_types', array( 'attachment', 'revision' ) ); $did_nothing = array( 'removed' => 0, 'indexed' => 0, ); /** * Either: * * 1. New status equals old status. * 2. Both new and old status are in the list of stati to index. * 3. The post is an attachment or a revision. * * In any of these cases, do nothing. */ if ( $new_status === $old_status ) { return $did_nothing; } if ( in_array( $new_status, $index_statuses, true ) && in_array( $old_status, $index_statuses, true ) ) { return $did_nothing; } if ( in_array( $post->post_type, $attachment_revision_types, true ) ) { return $did_nothing; } $post_types = get_option( 'relevanssi_index_post_types' ); $args = array( 'post_parent' => $post->ID, 'post_type' => $post_types, ); $removed = 0; $indexed = 0; $child_posts = get_children( $args ); if ( ! empty( $child_posts ) ) { if ( ! in_array( $new_status, $index_statuses, true ) ) { foreach ( $child_posts as $post ) { relevanssi_remove_doc( $post->ID ); $removed++; } } else { foreach ( $child_posts as $post ) { relevanssi_publish( $post->ID ); $indexed++; } } } return array( 'removed' => $removed, 'indexed' => $indexed, ); } /** * Indexes a published post. * * @param int $post_id The post ID. * @param boolean $bypass_global_post If true, bypass the global $post object. * Default false. * * @return string|int Returns 'auto-draft' if the post is an auto draft and * thus skipped, or the relevanssi_index_doc() return value. * * @see relevanssi_index_doc() */ function relevanssi_publish( $post_id, $bypass_global_post = false ) { $post_status = get_post_status( $post_id ); if ( 'auto-draft' === $post_status ) { return 'auto-draft'; } $custom_fields = relevanssi_get_custom_fields(); return relevanssi_index_doc( $post_id, true, $custom_fields, $bypass_global_post ); } /** * Indexes a post after publishing or modification. * * Hooks on to 'wp_insert_post' action hook and triggers when wp_insert_post() is * used to add a post into the database. Doesn't use the global $post object, because * that doesn't have the correct post. * * @author Lumpysimon. * * @global object $wpdb The WP database interface. * * @param int $post_id The post ID. * * @return string|int Returns 'auto-draft' if the post is an auto draft and * thus skipped, 'removed' if the post is removed or the relevanssi_index_doc() * return value from relevanssi_publish(). * * @see relevanssi_publish() */ function relevanssi_insert_edit( $post_id ) { global $wpdb; $post_status = get_post_status( $post_id ); if ( 'auto-draft' === $post_status ) { return 'auto-draft'; } if ( 'inherit' === $post_status ) { // Get the post status from the parent post. $parent_id = wp_get_post_parent_id( $post_id ); $post_status = get_post_status( $parent_id ); } $index_this_post = true; /* Documented in lib/indexing.php. */ $restriction = apply_filters( 'relevanssi_indexing_restriction', array( 'mysql' => '', 'reason' => '', ) ); if ( ! empty( $restriction['mysql'] ) ) { // Check the indexing restriction filter: if the post passes the filter, this // should return the post ID. $is_unrestricted = $wpdb->get_var( "SELECT ID FROM $wpdb->posts AS post WHERE ID = $post_id {$restriction['mysql']}" // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared ); if ( ! $is_unrestricted ) { $index_this_post = false; } } $return_array = true; $index_statuses = relevanssi_valid_status_array( $return_array ); if ( ! in_array( $post_status, $index_statuses, true ) ) { $index_this_post = false; } if ( $index_this_post ) { $bypass_global_post = true; $return_value = relevanssi_publish( $post_id, $bypass_global_post ); } else { // The post isn't supposed to be indexed anymore, remove it from index. relevanssi_remove_doc( $post_id ); update_post_meta( $post_id, '_relevanssi_noindex_reason', trim( $restriction['reason'] ) ); $return_value = 'removed'; } relevanssi_update_doc_count(); return $return_value; } /** * Updates comment indexing when comments are added, edited or deleted. * * @author OdditY * * @param int $comment_id Commend ID. * * @see relevanssi_comment_remove * @see relevanssi_comment_edit * @see relevanssi_publish * * @return int|string The relevanssi_publish return value, "nocommentfound" if * the comment doesn't exist or "donotindex" if it cannot be indexed. * comment indexing is disabled. */ function relevanssi_index_comment( $comment_id ) { $comment_indexing_type = get_option( 'relevanssi_index_comments' ); $no_pingbacks = false; $post_id = null; if ( 'normal' === $comment_indexing_type ) { $no_pingbacks = true; } if ( 'normal' !== $comment_indexing_type && 'all' !== $comment_indexing_type ) { return 'donotindex'; } $comment = get_comment( $comment_id ); if ( ! $comment ) { return 'nocommentfound'; } if ( $no_pingbacks && ! empty( $comment->comment_type ) ) { return 'donotindex'; } if ( 1 !== intval( $comment->comment_approved ) ) { // Comment isn't approved, do not index. return 'donotindex'; } return relevanssi_publish( $comment->comment_post_ID ); } /** * Returns the comment text for a post. * * @param int $post_id The post ID. * * @return string All the comment content as a string that has the comment author * and the comment text. */ function relevanssi_get_comments( $post_id ) { /** * If this filter returns true, the comments for the post are not indexed. * * @param boolean Return true to block the comment indexing. Default false. * @param int $post_id The post ID. */ if ( apply_filters( 'relevanssi_index_comments_exclude', false, $post_id ) ) { return ''; } $comment_indexing = get_option( 'relevanssi_index_comments' ); $comment_types = array( 'comment' ); $comment_string = ''; if ( 'all' === $comment_indexing ) { $comment_types[] = 'pings'; } if ( 'none' === $comment_indexing ) { return ''; } $offset = 0; $limit = 20; while ( true ) { // Posts may have lots of comments. Do 20 at the time to avoid memory issues. $args = array( 'offset' => $offset, 'number' => $limit, 'type' => $comment_types, ); $comments = get_approved_comments( $post_id, $args ); if ( count( $comments ) === 0 ) { break; } foreach ( $comments as $comment ) { /** * Filters the comment author before indexing. * * @param string Comment author display name. * @param int The comment ID. */ $comment_string .= ' ' . apply_filters( 'relevanssi_comment_author_to_index', $comment->comment_author, $comment->comment_ID ); /** * Filters the comment content before indexing. * * @param string Comment content. * @param int The comment ID. */ $comment_string .= ' ' . apply_filters( 'relevanssi_comment_content_to_index', $comment->comment_content, $comment->comment_ID ); } $offset += $limit; } return $comment_string; } /** * Truncates the Relevanssi index. * * @global object $wpdb The WordPress database interface. * @global array $relevanssi_variables The Relevanssi global variables array, used * for table names. * * @return boolean True on success, false on failure. */ function relevanssi_truncate_index() { global $wpdb, $relevanssi_variables; $relevanssi_table = $relevanssi_variables['relevanssi_table']; return $wpdb->query( "TRUNCATE TABLE $relevanssi_table" ); // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared } /** * Remove post from the Relevanssi index. * * @global object $wpdb The WordPress database interface. * @global array $relevanssi_variables The Relevanssi global variables array, used * for table names. * * @param int $post_id The post ID. * @param boolean $keep_internal_links If true, keep internal link indexing (a * Premium feature). Default false. */ function relevanssi_remove_doc( $post_id, $keep_internal_links = false ) { if ( function_exists( 'relevanssi_premium_remove_doc' ) ) { // Premium has a different method, because the index can include taxonomy // terms and user profiles. relevanssi_premium_remove_doc( $post_id, $keep_internal_links ); } else { global $wpdb, $relevanssi_variables; $post_id = intval( $post_id ); if ( empty( $post_id ) ) { // No post ID specified. return; } $doc_count = get_option( 'relevanssi_doc_count' ); $rows_updated = $wpdb->query( $wpdb->prepare( 'DELETE FROM ' . $relevanssi_variables['relevanssi_table'] . ' WHERE doc=%d', // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared,WordPress.DB.PreparedSQL.InterpolatedNotPrepared $post_id ) ); if ( $rows_updated && $rows_updated > 0 ) { update_option( 'relevanssi_doc_count', $doc_count - $rows_updated ); } } } /** * Filter that allows you to set the index type based on the post type. * * @param string $type The index 'type' column value, default 'post'. * @param object $post The post object containing the post being indexed. * * @return string The index 'type' column value, default 'post'. */ function relevanssi_index_get_post_type( $type, $post ) { if ( 'attachment' === $post->post_type ) { $type = 'attachment'; } return $type; } /** * Sets the indexing MySQL LIMIT parameter and other parameters. * * @param boolean|int $extend_offset If numeric, offsets the indexing by that * amount. If true, doesn't truncate the index before indexing. If false, * truncates index before indexing. Default false. * @param int $post_limit How many posts to index. Default null, no * limit. * * @return array Array with the LIMIT clause in 'limit', the extend boolean in * 'extend' and the size integer in 'size'. */ function relevanssi_indexing_query_args( $extend_offset, $post_limit ) { $size = 0; $limit = ''; $extend = false; // If $post_limit parameter is present, numeric and > 0, use that. if ( isset( $post_limit ) && is_numeric( $post_limit ) && $post_limit > 0 ) { $size = $post_limit; $limit = " LIMIT $post_limit"; } if ( false === $extend_offset ) { $extend = false; } elseif ( ! is_numeric( $extend_offset ) ) { // Extending, so do not truncate and skip the posts already in the index. if ( ! $limit ) { // CLI request with no limit specified. $size = 200; $limit = ' LIMIT 200'; } $extend = true; } else { // $extend_offset is numeric. if ( ! $limit ) { // CLI request with no limit specified. $size = 200; $limit = ' LIMIT 200'; } if ( ! empty( $limit ) ) { $limit .= " OFFSET $extend_offset"; } // Extend is set to false, because $limit now has LIMIT and OFFSET. $extend = false; } return array( 'limit' => $limit, 'extend' => $extend, 'size' => $size, ); } /** * Creates indexing queries for the comment content. * * @param array $insert_data The INSERT query data. Modified here. * @param int $post_id The indexed post ID. * @param int $min_word_length The minimum word length. * @param boolean $debug If true, print out debug notices. * * @return int The number of tokens added to the data. */ function relevanssi_index_comments( &$insert_data, $post_id, $min_word_length, $debug ) { $n = 0; if ( $debug ) { relevanssi_debug_echo( 'Indexing comments.' ); } $post_comments = relevanssi_get_comments( $post_id ); if ( ! empty( $post_comments ) ) { $post_comments = relevanssi_strip_invisibles( $post_comments ); $post_comments = preg_replace( '/<[a-zA-Z\/][^>]*>/', ' ', $post_comments ); $post_comments = wp_strip_all_tags( $post_comments ); if ( $debug ) { relevanssi_debug_echo( "Comment content: $post_comments" ); } /** * Filters the indexing tokens before they are added to the $insert_data. * * @param array An array of token-frequency pairs. * @param string The context of the tokens (eg. 'content', 'title'). * * @return array The filtered tokens. */ $post_comments_tokens = apply_filters( 'relevanssi_indexing_tokens', relevanssi_tokenize( $post_comments, true, $min_word_length ), 'comments' ); if ( count( $post_comments_tokens ) > 0 ) { foreach ( $post_comments_tokens as $token => $count ) { $n++; $insert_data[ $token ]['comment'] = $count; } } } return $n; } /** * Creates indexing queries for the post author. * * @param array $insert_data The INSERT query data. Modified here. * @param int $post_author The post author id. * @param int $min_word_length The minimum word length. * @param boolean $debug If true, print out debug notices. * * @return int The number of tokens added to the data. */ function relevanssi_index_author( &$insert_data, $post_author, $min_word_length, $debug ) { $n = 0; $display_name = get_the_author_meta( 'display_name', $post_author ); /** This filter is documented in lib/indexing.php */ $name_tokens = apply_filters( 'relevanssi_indexing_tokens', relevanssi_tokenize( $display_name, false, $min_word_length ), 'author' ); if ( $debug ) { relevanssi_debug_echo( 'Indexing post author as: ' . implode( ' ', array_keys( $name_tokens ) ) ); } foreach ( $name_tokens as $token => $count ) { $n++; if ( ! isset( $insert_data[ $token ]['author'] ) ) { $insert_data[ $token ]['author'] = 0; } $insert_data[ $token ]['author'] += $count; } return $n; } /** * Creates indexing query data for custom fields. * * @param array $insert_data The INSERT query data. Modified here. * @param int $post_id The indexed post ID. * @param string|array $custom_fields The custom fields to index. * @param int $min_word_length The minimum word length. * @param boolean $debug If true, print out debug notices. * * @return int The number of tokens added to the data. */ function relevanssi_index_custom_fields( &$insert_data, $post_id, $custom_fields, $min_word_length, $debug ) { $n = 0; $remove_underscore_fields = 'visible' === $custom_fields ? true : false; if ( 'all' === $custom_fields || 'visible' === $custom_fields ) { $custom_fields = get_post_custom_keys( $post_id ); } /** * Filters the list of custom fields to index before indexing. * * @param array $custom_fields List of custom field names. * @param int $post_id The post ID. */ $custom_fields = apply_filters( 'relevanssi_index_custom_fields', $custom_fields, $post_id ); if ( ! is_array( $custom_fields ) ) { return 0; } $custom_fields = array_unique( $custom_fields ); if ( $remove_underscore_fields ) { $custom_fields = array_filter( $custom_fields, function( $field ) { if ( '_relevanssi_pdf_content' === $field || '_' !== substr( $field, 0, 1 ) ) { return $field; } } ); } // Premium includes some support for ACF repeater fields. if ( function_exists( 'relevanssi_add_repeater_fields' ) ) { relevanssi_add_repeater_fields( $custom_fields, $post_id ); } $custom_fields = array_filter( $custom_fields ); if ( $debug ) { relevanssi_debug_echo( 'Custom fields to index: ' . implode( ', ', $custom_fields ) ); } foreach ( $custom_fields as $field ) { /** * Filters the custom field value before indexing. * * @param array Custom field values. * @param string $field The custom field name. * @param int $post_id The post ID. */ $values = apply_filters( 'relevanssi_custom_field_value', get_post_meta( $post_id, $field, false ), $field, $post_id ); if ( empty( $values ) || ! is_array( $values ) ) { continue; } foreach ( $values as $value ) { // Quick hack : allow indexing of PODS relationship custom fields // TMV. if ( is_array( $value ) && isset( $value['post_title'] ) ) { $value = $value['post_title']; } if ( function_exists( 'relevanssi_index_acf' ) ) { // @codeCoverageIgnoreStart // Handle ACF fields. Only defined when ACF is active. $acf_tokens = relevanssi_index_acf( $insert_data, $post_id, $field, $value ); if ( $acf_tokens ) { $n += $acf_tokens; continue; } // @codeCoverageIgnoreEnd } // Flatten other arrays. if ( is_array( $value ) ) { $value = relevanssi_flatten_array( $value ); } if ( $debug ) { relevanssi_debug_echo( "\tKey: " . $field . ' - value: ' . $value ); } $context = 'custom_field'; $remove_stops = true; if ( '_relevanssi_pdf_content' === $field ) { $context = 'body'; $remove_stops = 'body'; } /** This filter is documented in lib/indexing.php */ $value_tokens = apply_filters( 'relevanssi_indexing_tokens', relevanssi_tokenize( $value, $remove_stops, $min_word_length ), $context ); foreach ( $value_tokens as $token => $count ) { $n++; if ( ! isset( $insert_data[ $token ]['customfield'] ) ) { $insert_data[ $token ]['customfield'] = 0; } $insert_data[ $token ]['customfield'] += $count; // Premium indexes more detail about custom fields. if ( function_exists( 'relevanssi_customfield_detail' ) ) { $insert_data = relevanssi_customfield_detail( $insert_data, $token, $count, $field ); } } } } return $n; } /** * Creates indexing queries for the excerpt content. * * @param array $insert_data The INSERT query data. Modified here. * @param string $excerpt The post excerpt to index. * @param int $min_word_length The minimum word length. * @param boolean $debug If true, print out debug notices. * * @return int The number of tokens added to the data. */ function relevanssi_index_excerpt( &$insert_data, $excerpt, $min_word_length, $debug ) { $n = 0; // Include excerpt for attachments which use post_excerpt for captions - modified by renaissancehack. if ( $debug ) { relevanssi_debug_echo( "Indexing post excerpt: $excerpt" ); } /** This filter is documented in common/indexing.php */ $excerpt_tokens = apply_filters( 'relevanssi_indexing_tokens', relevanssi_tokenize( $excerpt, true, $min_word_length ), 'excerpt' ); foreach ( $excerpt_tokens as $token => $count ) { $n++; if ( ! isset( $insert_data[ $token ]['excerpt'] ) ) { $insert_data[ $token ]['excerpt'] = 0; } $insert_data[ $token ]['excerpt'] += $count; } return $n; } /** * Creates indexing queries for post title. * * @param array $insert_data The INSERT query data. Modified here. * @param object $post The post object. * @param int $min_word_length The minimum word length. * @param boolean $debug If true, print out debug notices. * * @return int The number of tokens added to the data. */ function relevanssi_index_title( &$insert_data, $post, $min_word_length, $debug ) { $n = 0; if ( empty( $post->post_title ) ) { return 0; } /** * If this filter returns false, titles are not indexed at all. * * @param boolean Return false to prevent titles from being indexed. Default true. */ if ( ! apply_filters( 'relevanssi_index_titles', true ) ) { return 0; } if ( $debug ) { relevanssi_debug_echo( 'Indexing post title.' ); } /** This filter is documented in wp-includes/post-template.php */ $filtered_title = apply_filters( 'the_title', $post->post_title, $post->ID ); /** * Filters the title before tokenizing and indexing. * * @param string $post->post_title The title. * @param object $post The full post object. */ $filtered_title = apply_filters( 'relevanssi_post_title_before_tokenize', $filtered_title, $post ); $title_tokens = relevanssi_tokenize( $filtered_title, /** * Filters whether stopwords should be removed from titles in tokenizing or not. * * @param boolean If true, remove stopwords. Default true. */ apply_filters( 'relevanssi_remove_stopwords_in_titles', true ), $min_word_length ); /** This filter is documented in lib/indexing.php */ $title_tokens = apply_filters( 'relevanssi_indexing_tokens', $title_tokens, 'title' ); if ( $debug ) { relevanssi_debug_echo( "\tTitle, tokenized: " . implode( ' ', array_keys( $title_tokens ) ) ); } foreach ( $title_tokens as $token => $count ) { $n++; if ( ! isset( $insert_data[ $token ]['title'] ) ) { $insert_data[ $token ]['title'] = 0; } $insert_data[ $token ]['title'] += $count; } return $n; } /** * Creates indexing queries for post content. * * @param array $insert_data The INSERT query data. Modified here. * @param object $post The post object. * @param int $min_word_length The minimum word length. * @param boolean $debug If true, print out debug notices. * * @return int The number of tokens added to the data. */ function relevanssi_index_content( &$insert_data, $post, $min_word_length, $debug ) { $n = 0; /** * If this filter returns false, post content is not indexed at all. * * @param boolean Return false to prevent post content from being indexed. Default true. */ if ( ! apply_filters( 'relevanssi_index_content', true ) ) { return $n; } if ( $debug ) { relevanssi_debug_echo( 'Indexing post content.' ); } remove_shortcode( 'noindex' ); add_shortcode( 'noindex', 'relevanssi_noindex_shortcode_indexing' ); /** * Filters the post content before indexing. * * @param string $post->post_content The post content. * @param object $post The full post object. */ $contents = apply_filters( 'relevanssi_post_content', $post->post_content, $post ); if ( $debug ) { relevanssi_debug_echo( "\tPost content after relevanssi_post_content:\n$contents" ); } /** * Can be used to add extra content to the post before indexing. * * @author Alexander Gieg * * @param string The additional content. * @param object $post The post object. */ $additional_content = trim( apply_filters( 'relevanssi_content_to_index', '', $post ) ); if ( ! empty( $additional_content ) ) { $contents .= ' ' . $additional_content; if ( $debug ) { relevanssi_debug_echo( "\tAdditional content from relevanssi_content_to_index:\n$additional_content" ); } } if ( 'on' === get_option( 'relevanssi_expand_shortcodes' ) ) { // TablePress support. $tablepress_controller = relevanssi_enable_tablepress_shortcodes(); relevanssi_disable_shortcodes(); $post_before_shortcode = $post; $contents = do_shortcode( $contents ); $post = $post_before_shortcode; // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited unset( $tablepress_controller ); } else { $contents = strip_shortcodes( $contents ); } remove_shortcode( 'noindex' ); add_shortcode( 'noindex', 'relevanssi_noindex_shortcode' ); /** * Filters the post content after shortcodes but before HTML stripping. * * @param string $contents The post content. * @param object $post The full post object. */ $contents = apply_filters( 'relevanssi_post_content_after_shortcodes', $contents, $post ); $contents = relevanssi_strip_invisibles( $contents ); // Premium feature for better control over internal links. if ( function_exists( 'relevanssi_process_internal_links' ) ) { $contents = relevanssi_process_internal_links( $contents, $post->ID ); } $contents = preg_replace( '/<[a-zA-Z\/][^>]*>/', ' ', $contents ); $contents = wp_strip_all_tags( $contents ); /** * Filters the post content in indexing before tokenization. * * @param string $contents The post content. * @param object $post The full post object. */ $contents = apply_filters( 'relevanssi_post_content_before_tokenize', $contents, $post ); /** This filter is documented in lib/indexing.php */ $content_tokens = apply_filters( 'relevanssi_indexing_tokens', relevanssi_tokenize( $contents, 'body', $min_word_length ), 'content' ); if ( $debug ) { relevanssi_debug_echo( "\tContent, tokenized:\n" . implode( ' ', array_keys( $content_tokens ) ) ); } foreach ( $content_tokens as $token => $count ) { $n++; if ( ! isset( $insert_data[ $token ]['content'] ) ) { $insert_data[ $token ]['content'] = 0; } $insert_data[ $token ]['content'] += $count; } return $n; } /** * Disables problematic shortcode before Relevanssi indexing to avoid problems. * * Uses the `relevanssi_disabled_shortcodes` filter hook to filter the * shortcodes. The disabled shortcodes are first removed with * remove_shortcode() and then given a reference to `__return_empty_string`. * * The option `relevanssi_disable_shortcodes` is also supported for legacy * reasons, but it's better to use the filter instead. */ function relevanssi_disable_shortcodes() { $default_disables = array( 'contact-form', // Jetpack Contact Form causes an error message. 'starrater', // GD Star Rating rater shortcode causes problems. 'responsive-flipbook', // Responsive Flipbook causes problems. 'avatar_upload', // WP User Avatar is incompatible. 'product_categories', // A problematic WooCommerce shortcode. 'recent_products', // A problematic WooCommerce shortcode. 'php', // PHP Code for Posts. 'watupro', // Watu PRO doesn't co-operate. 'starbox', // Starbox shortcode breaks Relevanssi. 'cfdb-save-form-post', // Contact Form DB. 'cfdb-datatable', 'cfdb-table', 'cfdb-json', 'cfdb-value', 'cfdb-count', 'cfdb-html', 'woocommerce_cart', // WooCommerce. 'woocommerce_checkout', 'woocommerce_order_tracking', 'woocommerce_my_account', 'woocommerce_edit_account', 'woocommerce_change_password', 'woocommerce_view_order', 'woocommerce_logout', 'woocommerce_pay', 'woocommerce_thankyou', 'woocommerce_lost_password', 'woocommerce_edit_address', 'tc_process_payment', 'maxmegamenu', // Max Mega Menu. 'searchandfilter', // Search and Filter. 'downloads', // Easy Digital Downloads. 'download_history', 'purchase_history', 'download_checkout', 'purchase_link', 'download_cart', 'edd_profile_editor', 'edd_login', 'edd_register', 'swpm_protected', // Simple Membership Partially Protected content. 'gravityform', // Gravity Forms. 'sdm_latest_downloads', // SDM Simple Download Monitor. 'slimstat', // Slimstat Analytics. 'ninja_tables', // Ninja Tables. ); $disable_shortcodes = get_option( 'relevanssi_disable_shortcodes' ); $shortcodes = explode( ',', $disable_shortcodes ); /** * Filters the shortcodes Relevanssi disables while indexing posts. * * @param array An array of shortcodes disabled. * * @return array An array of shortcodes disabled. */ $shortcodes = apply_filters( 'relevanssi_disabled_shortcodes', array_unique( array_merge( $shortcodes, $default_disables ) ) ); foreach ( $shortcodes as $shortcode ) { if ( empty( $shortcode ) ) { continue; } remove_shortcode( trim( $shortcode ) ); add_shortcode( trim( $shortcode ), '__return_empty_string' ); } } /** * Converts INSERT query data array to query values. * * Takes the collected data and converts it to values that can be fed into * an INSERT query using $wpdb->prepare(). Provides filters to modify the * insert query values before and after the conversion. * * @global $wpdb The WordPress database interface. * @global $relevanssi_variables Used for the Relevanssi db table name. * * @param array $insert_data An array of term => data pairs, where data has * token counts for the term in different contexts. * @param object $post The indexed post object. * * @return array An array of values clauses for an INSERT query. */ function relevanssi_convert_data_to_values( $insert_data, $post ) { global $wpdb, $relevanssi_variables; $charset = $wpdb->get_col_charset( $relevanssi_variables['relevanssi_table'], 'term' ); /** * Sets the indexed post 'type' column in the index. * * Default value is 'post', but other common values include 'attachment', * 'user' and taxonomy name. * * @param string Type value. * @param object The post object for the current post. */ $type = apply_filters( 'relevanssi_index_get_post_type', 'post', $post ); /** * Filters the indexing data before it is converted to INSERT queries. * * @param array $insert_data All the tokens and their counts. * @param object $post The post object. */ $insert_data = apply_filters( 'relevanssi_indexing_data', $insert_data, $post ); $values = array(); foreach ( $insert_data as $term => $data ) { $content = isset( $data['content'] ) ? $data['content'] : 0; $title = isset( $data['title'] ) ? $data['title'] : 0; $comment = isset( $data['comment'] ) ? $data['comment'] : 0; $tag = isset( $data['tag'] ) ? $data['tag'] : 0; $link = isset( $data['link'] ) ? $data['link'] : 0; $author = isset( $data['author'] ) ? $data['author'] : 0; $category = isset( $data['category'] ) ? $data['category'] : 0; $excerpt = isset( $data['excerpt'] ) ? $data['excerpt'] : 0; $taxonomy = isset( $data['taxonomy'] ) ? $data['taxonomy'] : 0; $customfield = isset( $data['customfield'] ) ? $data['customfield'] : 0; $mysqlcolumn = isset( $data['mysqlcolumn'] ) ? $data['mysqlcolumn'] : 0; $taxonomy_detail = isset( $data['taxonomy_detail'] ) ? $data['taxonomy_detail'] : ''; $customfield_detail = isset( $data['customfield_detail'] ) ? $data['customfield_detail'] : ''; if ( 'utf8' === $charset ) { $term = wp_encode_emoji( $term ); } $term = trim( $term ); $value = $wpdb->prepare( '(%d, %s, REVERSE(%s), %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s, %s, %d)', $post->ID, $term, $term, $content, $title, $comment, $tag, $link, $author, $category, $excerpt, $taxonomy, $customfield, $type, $taxonomy_detail, $customfield_detail, $mysqlcolumn ); array_push( $values, $value ); } /** * Filters the INSERT query VALUES sections before they are inserted in the INSERT query. * * @param array $values Value sets. * @param object $post The post object. */ return apply_filters( 'relevanssi_indexing_values', $values, $post ); }