dd
Showing
1 changed file
with
31 additions
and
0 deletions
| ... | @@ -539,5 +539,36 @@ function strip_word_html($text, $allowed_tags = '<b><i><sup><sub><em><strong><u> | ... | @@ -539,5 +539,36 @@ function strip_word_html($text, $allowed_tags = '<b><i><sup><sub><em><strong><u> |
| 539 | 539 | ||
| 540 | $text = preg_replace('/(?:width\=\"\d*\")\S/mxi', '>', $text); | 540 | $text = preg_replace('/(?:width\=\"\d*\")\S/mxi', '>', $text); |
| 541 | $text = preg_replace('/(?:width\=\"\d*%\")\S/mxi', '>', $text); | 541 | $text = preg_replace('/(?:width\=\"\d*%\")\S/mxi', '>', $text); |
| 542 | |||
| 543 | $chr_map = array( | ||
| 544 | // Windows codepage 1252 | ||
| 545 | "\xC2\x82" => "'", // U+0082⇒U+201A single low-9 quotation mark | ||
| 546 | "\xC2\x84" => '"', // U+0084⇒U+201E double low-9 quotation mark | ||
| 547 | "\xC2\x8B" => "'", // U+008B⇒U+2039 single left-pointing angle quotation mark | ||
| 548 | "\xC2\x91" => "'", // U+0091⇒U+2018 left single quotation mark | ||
| 549 | "\xC2\x92" => "'", // U+0092⇒U+2019 right single quotation mark | ||
| 550 | "\xC2\x93" => '"', // U+0093⇒U+201C left double quotation mark | ||
| 551 | "\xC2\x94" => '"', // U+0094⇒U+201D right double quotation mark | ||
| 552 | "\xC2\x9B" => "'", // U+009B⇒U+203A single right-pointing angle quotation mark | ||
| 553 | |||
| 554 | // Regular Unicode // U+0022 quotation mark (") | ||
| 555 | // U+0027 apostrophe (') | ||
| 556 | "\xC2\xAB" => '"', // U+00AB left-pointing double angle quotation mark | ||
| 557 | "\xC2\xBB" => '"', // U+00BB right-pointing double angle quotation mark | ||
| 558 | "\xE2\x80\x98" => "'", // U+2018 left single quotation mark | ||
| 559 | "\xE2\x80\x99" => "'", // U+2019 right single quotation mark | ||
| 560 | "\xE2\x80\x9A" => "'", // U+201A single low-9 quotation mark | ||
| 561 | "\xE2\x80\x9B" => "'", // U+201B single high-reversed-9 quotation mark | ||
| 562 | "\xE2\x80\x9C" => '"', // U+201C left double quotation mark | ||
| 563 | "\xE2\x80\x9D" => '"', // U+201D right double quotation mark | ||
| 564 | "\xE2\x80\x9E" => '"', // U+201E double low-9 quotation mark | ||
| 565 | "\xE2\x80\x9F" => '"', // U+201F double high-reversed-9 quotation mark | ||
| 566 | "\xE2\x80\xB9" => "'", // U+2039 single left-pointing angle quotation mark | ||
| 567 | "\xE2\x80\xBA" => "'", // U+203A single right-pointing angle quotation mark | ||
| 568 | ); | ||
| 569 | $chr = array_keys ($chr_map); // but: for efficiency you should | ||
| 570 | $rpl = array_values($chr_map); // pre-calculate these two arrays | ||
| 571 | $text = str_replace($chr, $rpl, html_entity_decode($text, ENT_QUOTES, "UTF-8")); | ||
| 572 | |||
| 542 | return $text; | 573 | return $text; |
| 543 | } | 574 | } | ... | ... |
-
Please register or sign in to post a comment