<?php namespace Lib\Imap\Parse; use Lib\Imap\Fun; /** * 解析邮件 * @author:dc * @time 2024/9/10 16:54 * Class Header */ class DeCode{ /** * Fallback Encoding * * @var string */ public $fallback_encoding = 'UTF-8'; /** * 进行解码 * @author:dc * @time 2024/9/10 16:56 */ public static function decode(string $value){ $obj = new self(); $value = trim($value); $original_value = $value; $is_utf8_base = $obj->is_uft8($value); if ($is_utf8_base) { $value = mb_decode_mimeheader($value); } if ($obj->notDecoded($original_value, $value)) { $decoded_value = $obj->mime_header_decode($value); if (count($decoded_value) > 0) { if (property_exists($decoded_value[0], "text")) { $value = $decoded_value[0]->text; } } } return $value; } /** * Decode MIME header elements * @link https://php.net/manual/en/function.imap-mime-header-decode.php * @param string $text The MIME text * * @return array The decoded elements are returned in an array of objects, where each * object has two properties, charset and text. */ private function mime_header_decode(string $text): array { $charset = $this->getEncoding($text); return [(object)[ "charset" => $charset, "text" => $this->convertEncoding($text, $charset) ]]; } /** * Convert the encoding * @param $str * @param string $from * @param string $to * * @return mixed|string */ public function convertEncoding($str, $from = "ISO-8859-2", $to = "UTF-8") { $str = mb_decode_mimeheader($str); $from = EncodingAliases::get($from, $this->fallback_encoding); $to = EncodingAliases::get($to, $this->fallback_encoding); if ($from === $to) { return $str; } // We don't need to do convertEncoding() if charset is ASCII (us-ascii): // ASCII is a subset of UTF-8, so all ASCII files are already UTF-8 encoded // https://stackoverflow.com/a/11303410 // // us-ascii is the same as ASCII: // ASCII is the traditional name for the encoding system; the Internet Assigned Numbers Authority (IANA) // prefers the updated name US-ASCII, which clarifies that this system was developed in the US and // based on the typographical symbols predominantly in use there. // https://en.wikipedia.org/wiki/ASCII // // convertEncoding() function basically means convertToUtf8(), so when we convert ASCII string into UTF-8 it gets broken. if (strtolower($from) == 'us-ascii' && $to == 'UTF-8') { return $str; } try { if(mb_detect_encoding($str)=='UTF-8'){ return $str; } if (!$from) { return Fun::mb_convert_encoding($str, $to); } return Fun::mb_convert_encoding($str, $to, $from); } catch (\Exception $e) { if (strstr($from, '-')) { $from = str_replace('-', '', $from); return $this->convertEncoding($str, $from, $to); } else { return $str; } } } /** * Get the encoding of a given abject * @param object|string $structure * * @return string */ private function getEncoding($structure): string { if (property_exists($structure, 'parameters')) { foreach ($structure->parameters as $parameter) { if (strtolower($parameter->attribute) == "charset") { return EncodingAliases::get($parameter->value, $this->fallback_encoding); } } } elseif (property_exists($structure, 'charset')) { return EncodingAliases::get($structure->charset, $this->fallback_encoding); } elseif (is_string($structure) === true) { preg_match("/^=\?([a-z0-9-]{3,})\?/i",$structure,$code); if(!empty($code[1])){ $code = EncodingAliases::get($code[1],''); if($code){ return $code; } } $result = mb_detect_encoding($structure); return $result === false ? $this->fallback_encoding : $result; } return $this->fallback_encoding; } /** * Check if a given pair of strings has been decoded * @param $encoded * @param $decoded * * @return bool */ private function notDecoded($encoded, $decoded): bool { return 0 === strpos($decoded, '=?') && strlen($decoded) - 2 === strpos($decoded, '?=') && false !== strpos($encoded, $decoded); } /** * Test if a given value is utf-8 encoded * @param $value * * @return bool */ private function is_uft8($value): bool { return strpos(strtolower($value), '=?utf-8?') === 0; } }