class-parser.php
11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
<?php
/**
* Smush page parser that is used by CDN and Lazy load modules.
*
* @since 3.2.2
* @package Smush\Core\Modules\Helpers
*/
namespace Smush\Core\Modules\Helpers;
use WP_Smush;
/**
* Class Parser
*/
class Parser {
/**
* CDN module status.
*
* @var bool $cdn
*/
private $cdn = false;
/**
* Lazy load module status.
*
* @var bool $lazy_load
*/
private $lazy_load = false;
/**
* Process background images.
*
* @since 3.2.2
* @var bool $background_images
*/
private $background_images = false;
/**
* Smush will __construct this class multiple times, but only once does it need to be initialized.
*
* @since 3.5.0 Moved from __construct().
*/
public function init() {
if ( is_admin() || is_customize_preview() || wp_doing_ajax() || wp_doing_cron() ) {
return;
}
if ( $this->is_page_builder() ) {
return;
}
if ( $this->is_smartcrawl_analysis() ) {
return;
}
// Start an output buffer before any output starts.
add_action(
'template_redirect',
function () {
ob_start( array( $this, 'parse_page' ) );
},
1
);
}
/**
* Enable parser for selected module.
*
* @since 3.2.2
* @param string $module Module ID.
*/
public function enable( $module ) {
if ( ! in_array( $module, array( 'cdn', 'lazy_load', 'background_images' ), true ) ) {
return;
}
$this->$module = true;
}
/**
* Disable parser for selected module.
*
* @since 3.2.2
* @param string $module Module ID.
*/
public function disable( $module ) {
if ( ! in_array( $module, array( 'cdn', 'lazy_load' ), true ) ) {
return;
}
$this->$module = false;
}
/**
* Process images from current buffer content.
*
* Use DOMDocument class to find all available images in current HTML content and set attachment ID attribute.
*
* @since 3.0
* @since 3.2.2 Moved from \Smush\Core\Modules\CDN.
*
* @param string $content Current buffer content.
*
* @return string
*/
public function parse_page( $content ) {
// Do not parse page if CDN and Lazy load modules are disabled.
if ( ! $this->cdn && ! $this->lazy_load ) {
return $content;
}
if ( is_customize_preview() ) {
return $content;
}
if ( empty( $content ) ) {
return $content;
}
$content = $this->process_images( $content );
if ( $this->background_images ) {
$content = $this->process_background_images( $content );
}
return $content;
}
/**
* Process all images within <img> tags.
*
* @since 3.2.2
*
* @param string $content Current buffer content.
*
* @return string
*/
private function process_images( $content ) {
$images = $this->get_images_from_content( $content );
if ( empty( $images ) ) {
return $content;
}
foreach ( $images[0] as $key => $image ) {
$img_src = $images['src'][ $key ];
$new_image = $image;
// Update the image with correct CDN links.
if ( $this->cdn ) {
$new_image = WP_Smush::get_instance()->core()->mod->cdn->parse_image( $img_src, $new_image, $images['srcset'][ $key ], $images['type'][ $key ] );
}
/**
* Internal filter to disable page parsing.
*
* Because the page parser module is universal, we need to make sure that all modules have the ability to skip
* parsing of certain pages. For example, lazy loading should skip if_preview() pages. In order to achieve this
* functionality, I've introduced this filter. Filter priority can be used to overwrite the $skip param.
*
* @since 3.2.2
*
* @param bool $skip Skip status.
*/
if ( $this->lazy_load && ! apply_filters( 'wp_smush_should_skip_parse', false ) ) {
$new_image = WP_Smush::get_instance()->core()->mod->lazy->parse_image( $img_src, $new_image, $images['type'][ $key ] );
}
$content = str_replace( $image, $new_image, $content );
}
return $content;
}
/**
* Process all images that are contained as background-images.
*
* @since 3.2.2
*
* @param string $content Current buffer content.
*
* @return string
*/
private function process_background_images( $content ) {
$images = self::get_background_images( $content );
if ( empty( $images ) ) {
return $content;
}
// Try to sort out the duplicate entries.
$elements = array_unique( $images[0] );
$urls = array_unique( $images['img_url'] );
if ( count( $elements ) === count( $urls ) ) {
$images[0] = $elements;
$images['img_url'] = $urls;
}
foreach ( $images[0] as $key => $image ) {
$img_src = $images['img_url'][ $key ];
$new_image = $image;
// Update the image with correct CDN links.
$new_image = WP_Smush::get_instance()->core()->mod->cdn->parse_background_image( $img_src, $new_image );
$content = str_replace( $image, $new_image, $content );
/**
* Filter the current page content after process background images.
*
* @param string $content Current Page content.
* @param string $image Backround Image tag without src.
* @param string $img_src Image src.
*/
$content = apply_filters( 'smush_after_process_background_images', $content, $image, $img_src );
}
return $content;
}
/**
* Compatibility with SmartCrawl readability analysis.
* Do not process page on analysis.
*
* @since 3.3.0
*/
private function is_smartcrawl_analysis() {
$wds_analysis = filter_input( INPUT_POST, 'action', FILTER_SANITIZE_SPECIAL_CHARS );
if ( ! is_null( $wds_analysis ) && 'wds-analysis-recheck' === $wds_analysis ) {
return true;
}
if ( null !== filter_input( INPUT_GET, 'wds-frontend-check', FILTER_SANITIZE_SPECIAL_CHARS ) ) {
return true;
}
return false;
}
/**
* Get image tags from page content.
*
* @since 3.1.0
* @since 3.2.0 Moved to WP_Smush_Content from \Smush\Core\Modules\CDN
* @since 3.2.2 Moved to Parser from WP_Smush_Content
*
* Performance test: auto generated page with ~900 lines of HTML code, 84 images.
* - Smush 2.4.0: 82 matches, 104359 steps (~80 ms) <- does not match <source> images in <picture>.
* - Smush 2.5.0: 84 matches, 63791 steps (~51 ms).
*
* @param string $content Page content.
*
* @return array
*/
public function get_images_from_content( $content ) {
$images = array();
/**
* Filter out only <body> content. As this was causing issues with escaped JS strings in <head>.
*
* @since 3.6.2
*/
if ( preg_match( '/(?=<body).*<\/body>/is', $content, $body ) ) {
$content = $body[0];
}
$pattern = '/<(?P<type>img|source|iframe)\b(?>\s+(?:src=[\'"](?P<src>[^\'"]*)[\'"]|srcset=[\'"](?P<srcset>[^\'"]*)[\'"])|[^\s>]+|\s+)*>/is';
$pattern = apply_filters( 'smush_images_from_content_regex', $pattern );
if ( preg_match_all( $pattern, $content, $images ) ) {
foreach ( $images as $key => $unused ) {
// Simplify the output as much as possible, mostly for confirming test results.
if ( is_numeric( $key ) && $key > 0 ) {
unset( $images[ $key ] );
}
}
}
return $images;
}
/**
* Get background images from content.
*
* @since 3.2.2
*
* Performance test: auto generated page with ~900 lines of HTML code, 84 images (only 1 with background image).
* - Smush 2.4.0: 1 match, 522510 steps (~355 ms)
* - Smush 2.5.0: 1 match, 12611 steps, (~12 ms)
*
* @param string $content Page content.
*
* @return array
*/
private static function get_background_images( $content ) {
$images = array();
$pattern = '/(?:background-image:\s*?url\(\s*[\'"]?(?P<img_url>.*?[^)\'"]+)[\'"]?\s*\))/i';
$pattern = apply_filters( 'smush_background_images_regex', $pattern );
if ( preg_match_all( $pattern, $content, $images ) ) {
foreach ( $images as $key => $unused ) {
// Simplify the output as much as possible, mostly for confirming test results.
if ( is_numeric( $key ) && $key > 0 ) {
unset( $images[ $key ] );
}
}
}
/**
* Make sure that the image doesn't start and end with ".
*
* @since 3.5.0
*/
$images['img_url'] = array_map(
function ( $image ) {
// Quote entities.
$quotes = apply_filters( 'wp_smush_background_image_quotes', array( '"', '"', ''', ''' ) );
$image = trim( $image );
// Remove the starting quotes.
if ( in_array( substr( $image, 0, 6 ), $quotes, true ) ) {
$image = substr( $image, 6 );
}
// Remove the ending quotes.
if ( in_array( substr( $image, -6 ), $quotes, true ) ) {
$image = substr( $image, 0, -6 );
}
return $image;
},
$images['img_url']
);
return $images;
}
/**
* Check if this is one of the known page builders.
*
* @since 3.5.1
*
* @return bool
*/
private function is_page_builder() {
// Oxygen builder.
if ( defined( 'SHOW_CT_BUILDER' ) && SHOW_CT_BUILDER ) {
return true;
}
// Oxygen builder as well.
if ( null !== filter_input( INPUT_GET, 'ct_builder' ) ) {
return true;
}
// Beaver builder.
if ( null !== filter_input( INPUT_GET, 'fl_builder' ) ) {
return true;
}
// Thrive Architect Builder.
if ( null !== filter_input( INPUT_GET, 'tve' ) && null !== filter_input( INPUT_GET, 'tcbf' ) ) {
return true;
}
// Tatsu page builder.
if ( null !== filter_input( INPUT_GET, 'tatsu' ) ) {
return true;
}
// BuddyBoss' AJAX requests. They do something strange and end up defining
// DOING_AJAX on template_redirect after self::parse_page() runs. That makes
// our lazy load page parsing break some of their AJAX requests.
if ( function_exists( 'bbp_is_ajax' ) && bbp_is_ajax() ) {
return true;
}
return false;
}
/**
* Add attribute to selected tag.
*
* @since 3.1.0
* @since 3.2.0 Moved to WP_Smush_Content from \Smush\Core\Modules\CDN
* @since 3.2.2 Moved to Parser from WP_Smush_Content
*
* @param string $element Image element.
* @param string $name Img attribute name (srcset, size, etc).
* @param string $value Attribute value.
*/
public static function add_attribute( &$element, $name, $value = null ) {
$closing = false === strpos( $element, '/>' ) ? '>' : ' />';
$quotes = false === strpos( $element, '"' ) ? '\'' : '"';
if ( ! is_null( $value ) ) {
$element = rtrim( $element, $closing ) . " {$name}={$quotes}{$value}{$quotes}{$closing}";
} else {
$element = rtrim( $element, $closing ) . " {$name}{$closing}";
}
}
/**
* Get attribute from an HTML element.
*
* @since 3.2.0
* @since 3.2.2 Moved to Parser from WP_Smush_Content
*
* @param string $element HTML element.
* @param string $name Attribute name.
*
* @return string
*/
public static function get_attribute( $element, $name ) {
preg_match( "/{$name}=['\"]([^'\"]+)['\"]/is", $element, $value );
return isset( $value['1'] ) ? $value['1'] : '';
}
/**
* Remove attribute from selected tag.
*
* @since 3.2.0
* @since 3.2.2 Moved to Parser from WP_Smush_Content
*
* @param string $element Image element.
* @param string $attribute Img attribute name (srcset, size, etc).
*/
public static function remove_attribute( &$element, $attribute ) {
$element = preg_replace( '/' . $attribute . '=[\'"](.*?)[\'"]/i', '', $element );
}
/**
* Get URLs from a string of content.
*
* This is mostly used to get the URLs from srcset and parse each single URL to use in CDN.
*
* Performance test: auto generated page with ~900 lines of HTML code, 84 images
* - Smush 2.4.0: 11957 matches, 237227 steps (~169 ms) <- many false positive matches.
* - Smush 2.5.0: 278 matches, 14509 steps, (~15 ms).
*
* @since 3.3.0
*
* @param string $content Content.
*
* @return array
*/
public static function get_links_from_content( $content ) {
$images = array();
preg_match_all( '/(?:https?[^\s\'"]*)/is', $content, $images );
return $images;
}
}