crawl-cleanup-searches.php
5.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
<?php
namespace Yoast\WP\SEO\Integrations\Front_End;
use WP_Query;
use Yoast\WP\SEO\Conditionals\Front_End_Conditional;
use Yoast\WP\SEO\Helpers\Options_Helper;
use Yoast\WP\SEO\Integrations\Integration_Interface;
use Yoast\WP\SEO\Helpers\Redirect_Helper;
/**
* Class Crawl_Cleanup_Searches.
*/
class Crawl_Cleanup_Searches implements Integration_Interface {
/**
* Patterns to match against to find spam.
*
* @var array
*/
private $patterns = [
'/[:()【】[]]+/u',
'/(TALK|QQ)\:/iu',
];
/**
* The options helper.
*
* @var Options_Helper
*/
private $options_helper;
/**
* The redirect helper.
*
* @var Redirect_Helper
*/
private $redirect_helper;
/**
* Crawl_Cleanup_Searches integration constructor.
*
* @param Options_Helper $options_helper The option helper.
* @param Redirect_Helper $redirect_helper The redirect helper.
*/
public function __construct( Options_Helper $options_helper, Redirect_Helper $redirect_helper ) {
$this->options_helper = $options_helper;
$this->redirect_helper = $redirect_helper;
}
/**
* Initializes the integration.
*
* This is the place to register hooks and filters.
*
* @return void
*/
public function register_hooks() {
if ( $this->options_helper->get( 'search_cleanup' ) ) {
\add_filter( 'pre_get_posts', [ $this, 'validate_search' ] );
}
if ( $this->options_helper->get( 'redirect_search_pretty_urls' ) && ! empty( \get_option( 'permalink_structure' ) ) ) {
\add_action( 'template_redirect', [ $this, 'maybe_redirect_searches' ], 2 );
}
}
/**
* Returns the conditionals based in which this loadable should be active.
*
* @return array The array of conditionals.
*/
public static function get_conditionals() {
return [ Front_End_Conditional::class ];
}
/**
* Check if we want to allow this search to happen.
*
* @param WP_Query $query The main query.
*
* @return WP_Query
*/
public function validate_search( WP_Query $query ) {
if ( ! $query->is_search() ) {
return $query;
}
// First check against emoji and patterns we might not want.
$this->check_unwanted_patterns( $query );
// Then limit characters if still needed.
$this->limit_characters();
return $query;
}
/**
* Redirect pretty search URLs to the "raw" equivalent
*/
public function maybe_redirect_searches() {
if ( ! \is_search() ) {
return;
}
// phpcs:ignore WordPress.Security.ValidatedSanitizedInput
if ( isset( $_SERVER['REQUEST_URI'] ) && \stripos( $_SERVER['REQUEST_URI'], '/search/' ) === 0 ) {
$args = [];
// phpcs:ignore WordPress.Security.ValidatedSanitizedInput
$parsed = \wp_parse_url( $_SERVER['REQUEST_URI'] );
if ( ! empty( $parsed['query'] ) ) {
\wp_parse_str( $parsed['query'], $args );
}
$args['s'] = \get_search_query();
$proper_url = \home_url( '/' );
if ( \intval( \get_query_var( 'paged' ) ) > 1 ) {
$proper_url .= \sprintf( 'page/%s/', \get_query_var( 'paged' ) );
unset( $args['paged'] );
}
$proper_url = \add_query_arg( \array_map( 'rawurlencode_deep', $args ), $proper_url );
if ( ! empty( $parsed['fragment'] ) ) {
$proper_url .= '#' . \rawurlencode( $parsed['fragment'] );
}
$this->redirect_away( 'We redirect pretty URLs to the raw format.', $proper_url );
}
}
/**
* Check query against unwanted search patterns.
*
* @param WP_Query $query The main WordPress query.
*
* @return void
*/
private function check_unwanted_patterns( WP_Query $query ) {
$s = \rawurldecode( $query->query_vars['s'] );
if ( $this->options_helper->get( 'search_cleanup_emoji' ) && $this->has_emoji( $s ) ) {
$this->redirect_away( 'We don\'t allow searches with emojis and other special characters.' );
}
if ( ! $this->options_helper->get( 'search_cleanup_patterns' ) ) {
return;
}
foreach ( $this->patterns as $pattern ) {
$outcome = \preg_match( $pattern, $s, $matches );
if ( $outcome && $matches !== [] ) {
$this->redirect_away( 'Your search matched a common spam pattern.' );
}
}
}
/**
* Redirect to the homepage for invalid searches.
*
* @param string $reason The reason for redirecting away.
* @param string $to_url The URL to redirect to.
*
* @return void
*/
private function redirect_away( $reason, $to_url = '' ) {
if ( empty( $to_url ) ) {
$to_url = \get_home_url();
}
$this->redirect_helper->do_safe_redirect( $to_url, 301, 'Yoast Search Filtering: ' . $reason );
}
/**
* Limits the number of characters in the search query.
*
* @return void
*/
private function limit_characters() {
// We retrieve the search term unescaped because we want to count the characters properly. We make sure to escape it afterwards, if we do something with it.
$unescaped_s = \get_search_query( false );
// We then unslash the search term, again because we want to count the characters properly. We make sure to slash it afterwards, if we do something with it.
$raw_s = \wp_unslash( $unescaped_s );
if ( \mb_strlen( $raw_s, 'UTF-8' ) > $this->options_helper->get( 'search_character_limit' ) ) {
$new_s = \mb_substr( $raw_s, 0, $this->options_helper->get( 'search_character_limit' ), 'UTF-8' );
\set_query_var( 's', \wp_slash( \esc_attr( $new_s ) ) );
}
}
/**
* Determines if a text string contains an emoji or not.
*
* @param string $text The text string to detect emoji in.
*
* @return bool
*/
private function has_emoji( $text ) {
$emojis_regex = '/([^-\p{L}\x00-\x7F]+)/u';
\preg_match( $emojis_regex, $text, $matches );
return ! empty( $matches );
}
}