Skip to content

Commit 32e386f

Browse files
committed
Improved charset tag recognition accuracy.
1 parent a1da3d4 commit 32e386f

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/Document.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ public function __construct(Extractor $extractor)
2828

2929
$encoding = null;
3030
$contentType = $extractor->getResponse()->getHeaderLine('content-type');
31-
preg_match('/charset="?(.*?)(?=$|\s|;|")/i', $contentType, $match);
31+
preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $contentType, $match);
3232
if (!empty($match[1])) {
3333
$encoding = trim($match[1], ',');
3434
} elseif (!empty($html)) {
35-
preg_match('/charset="?(.*?)(?=$|\s|;|")/i', $html, $match);
35+
preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $html, $match);
3636
if (!empty($match[1])) {
3737
$encoding = trim($match[1], ',');
3838
}

0 commit comments

Comments
 (0)