<?php namespace Lib\Mail; /** * 解析邮件body内容 * 通过 fetch msgno RFC822.text 获取到的内容 * 此内容包含html 文本 附件 * @author:dc * @time 2022/8/12 9:15 * Class Body * @package App\Mail\lib */ class Body { /** * @var string */ private $body; /** * * @var array */ private $item = []; /** * 保存的目录 * @var string */ private $fileSavePath; /** * 邮件的header * @var array */ private $header = []; /** * Body constructor. * @param string $body * @param string $fileSavePath */ public function __construct(string $body, string $fileSavePath='/',$header=[]) { if($header){ $this->header = $header; } $this->body = $body = trim($body); $this->fileSavePath = $fileSavePath; $body = $this->ltrimText($body); // 163 有 if(strpos($body,'------=_Part')!==false){ $this->parse($body,'------=_Part'); } elseif (mb_strpos($body,'------=_NextPart')!==false){ $this->parse($body,'------=_NextPart'); } elseif (mb_strpos($body,'----_NmP')!==false){ $this->parse($body,'----_NmP'); } elseif (mb_strpos($body,'--_=_swift')!==false){ $this->parse($body,'--_=_swift'); } elseif (mb_strpos($body,'----==_mimepart')!==false){ $this->parse($body,'----==_mimepart'); } elseif (mb_strpos($body,'--------------Boundary')!==false){ $this->parse($body,'--------------Boundary'); } elseif (mb_strpos($body,'--=-')!==false){ $this->parse($body,'--=-'); } // 很多--开始的,且不规则 elseif(strpos($body,'--')===0){ // 获取第一行 $tag = $this->body_get_tag($body,'--'); // 以第一行为标准 $this->parse($body,trim($tag)); } // 直接html elseif (mb_strpos($body,'<')===0){ $body = quoted_printable_decode($body); // preg_match("/<meta(?!\s*(?:name|value)\s*=)(?:[^>]*?content\s*=[\s\"']*)?([^>]*?)[\s\"';]*charset\s*=[\s\"']*([^\s\"'\/>]*)/",$body,$icon); // if(!empty($icon[2])){ // // 解码 // $body = mb_convert_encoding($body,'utf-8',$icon[2]); // } $this->setItem(['type'=>'text/html','body'=>$body]); } else{ // qq的是base64 if(MailFun::isBase64($body)){ // 可能是图片邮件体,什么都没有 if(!empty($this->header['Content-Type'])){ $ct = explode(";",$this->header['Content-Type']); // 图片,以后有其他,需要添加 if(in_array(trim($ct[0]),['image/png','image/jpg','image/jpeg','image/gif'])){ $isImage = 1; } } if(!empty($isImage)){ $this->setItem(['type'=>'text/html','body'=>"<img src=\"data:".($ct[0]??'image/png').";base64,".$body."\" />"]); }else{ $this->setItem(['type'=>'text/plain','body'=>base64_decode($body)]); } }else{ $this->setItem(['type'=>'text/plain','body'=>$body]); } } } public function ltrimText($body){ $bodyMiaoSuText = [ "This is a multi-part message in MIME format.", "This is a multipart message in MIME format.", "This is a MIME message. If you are reading this text, you may want to \r\nconsider changing to a mail reader or gateway that understands how to \r\nproperly handle MIME multipart messages.", // 'This is a MIME message.' ]; foreach ($bodyMiaoSuText as $str){ // 这个是描述特殊文本 if(strpos($body,$str)===0){ $body = ltrim($body,$str); $body = trim($body); } } return $body; } /** * 获取标签 * @param $body * @param $tag * @return mixed|string * @author:dc * @time 2022/8/12 10:49 */ private function body_get_tag($body,$tag){ preg_match("/{$tag}[\w\W].*/i",$body,$result); if(!empty($result[0])) { return $result[0]; } return ''; } /** * @param $item */ private function setItem($item): void { $this->item[] = $item; } /** * @return [] */ public function getItem(): array { foreach ($this->item as $k=>$item){ if(isset($item['type'])){ //是否有 =E2=81=B6=E2=81=B6=E2=81=B9=C2=B3=C2=B9=C2=B2=C2=BA=C2=B3=C2=BAS 一样的字符 if($item['type']=='text/plain' && preg_match("/(=[A-Z0-9]{2,}){2,}/",$item['body'])){ // 进行解码 $item['body'] = quoted_printable_decode($item['body']); } $this->item[$k] = $item; } } return $this->item; } /** * 开始解析 * @param string $body * @param string $tag * @return array * @author:dc * @time 2022/8/12 9:50 */ private function parse(string $body, string $tag){ // 删除第一个标签前面的数据,一般情况无用 $body = mb_substr($body,strpos($body,$tag),99999999999); // 有附件的情况 preg_match('/boundary="(.*)"/Ui',$body,$boundary); if(!empty($boundary[0])){ $body = str_replace($boundary[0],'',$body); // $body = mb_substr($body,mb_strpos($body,$boundary[0])+strlen($boundary[0]),99999999999); } // 附件情况 if(!empty($boundary[1])){ preg_match_all('/.*'.$boundary[1].'.*/i',$body,$boundary_tag); $body = str_replace($boundary_tag[0],'{--tag--}',$body); } // 查找tag块 preg_match_all("/((".str_replace( ['/','+','.','^','$','*','?','|'], ['\/','\+','\.','\^','\$','\*','\?','\|'], $tag).").*+)/i",$body."\n\n",$he); // if(error_get_last()){ // logs('body 解析标记 '.$tag."/(".str_replace(['/'],['\/'],$tag).".*+\n)/i".print_r(error_get_last(),true)); //// logs($body); // } // 把每个tag块分开成数组 if(!empty($he[0])){ foreach ($he[0] as $hk=>$h){ $he[0][$hk] = trim($h); } arsort($he[0]); $body = str_replace($he[0],'{--tag--}',$body); } $body = explode('{--tag--}',$body); // 处理 foreach ($body as $key=>$item){ $data = []; $item = trim($item); // 附件的头 if(!$item) { continue; } // 邮件体包含邮件体 if(preg_match('/boundary="?(.*)("|\r)/i',$item,$bm)){ if (str_contains($item, trim($bm[1]).'--')){ $data = (new self('--'.$bm[1]."\r\n".$item,$this->fileSavePath))->getItem(); // $this->setItem($data); // 合并邮件体 $this->item = array_merge($this->item,$data); } continue; } // 先解码解码 $encode = $this->body_match_tag('Content-Transfer-Encoding:',$item); if($encode){ $data['encode'] = strtolower($encode['text']); $item = str_replace($encode['origin'],'',$item); } // 内容类型 $type = $this->preg_match_type($item); if($type){ $data['type'] = strtolower($type['type']); // 编码 if(isset($type['charset'])){ $data['charset'] = strtolower($type['charset']); } // nama。附件 if(isset($type['name'])){ $data['name'] = $type['name']; } // 删除 $item = str_replace($type['origin'],'',$item); } // if(empty($data['charset'])){ // 编码 $code = $this->preg_match_charset($item); if($code){ $data['charset'] = strtolower($code['charset']); $item = str_replace($code['origin'],'',$item); } } // 先匹配留存文件名称 $filename = ''; preg_match('/filename="(\w?.*)"/U',str_replace(["\r","\n"]," ",$item),$filename); if(!empty($filename[1])){ // $filename = MailFun::decodeMimeStr($filename[1]); $filename = mb_decode_mimeheader($filename[1]); } // 附件 if(stripos($item,'Content-Disposition: attachment;')!==false){ $isfileattachment = 1; } // 删除不需要的tag属性,如果需要进进行解析 $item = $this->body_remove_tag($item,'Content-Description:'); $item = $this->body_remove_tag($item,'Content-Disposition:'); $item = $this->body_remove_tag($item,'Mime-Version:'); $item = $this->body_remove_tag($item,'Content-Location:'); $data['body'] = trim($item); if(!empty($data['type'])){ // 邮件头 if($data['type'] == 'multipart/alternative'){ } // 是文本还是附件 else if(strpos($data['type'],'text/') === 0 ){ // 这里还有可能出现附件 if($filename&&!empty($isfileattachment)){ $data = $this->parseFile($data,$filename); }else{ // tag $data['body'] = trim($this->body_remove_tag($data['body'],'Content-ID:')); $data['body'] = $this->body_remove_tag($data['body'],"Content-"); $data['body'] = $this->body_remove_tag($data['body'],"Content-"); $data['body'] = $this->body_remove_tag($data['body'],"Content-"); $data['body'] = $this->body_remove_tag($data['body'],"Content-"); // body解密 switch($data['encode']??''){ case 'base64': { $data['body'] = base64_decode($data['body']); break; } case 'quoted-printable': { $data['body'] = quoted_printable_decode($data['body']); break; } case '8bit': { try { // $data['body'] = DeCoding::de8bit($data['body']); // 暂时没有替换 imap_8bit的函数 $data['body'] = imap_8bit($data['body']); $data['body'] = quoted_printable_decode($data['body']); }catch (\Throwable $e){ } break; } } // 转码 // if(isset($data['charset']) && $data['charset']){ // $debody = @mb_convert_encoding($data['body'],'utf-8',$data['charset']); // if($debody){ // $data['body'] = $debody; // $debody = null; // } // } } } // 系统退信//里面包含了发送邮件所有内容,这里不记录 elseif (strpos($data['type'],'message') === 0){ $data['body'] = '';// 一般不需要这些内容,如有需要就要重新解析 } elseif (!empty($data['type']) && $data['body']){ // 解析附件 $data = $this->parseFile($data,$filename); } } $this->setItem($data); } } /** * 解析文件 * @param $item * @return array|mixed * @author:dc * @time 2022/8/12 10:40 */ private function parseFile($item,$filename=''){ $data = []; // 查找文件名 $data['filename'] = $this->file_save_name($item['body'],'filename'); $data['name'] = $this->file_save_name($item['body'],'name'); $data['name'] = $data['name'] ? : (empty($item['name'])?$filename:$item['name']); $data['filename'] = $data['filename'] ? : $data['name']; // 是否有文件名 if(empty($data['filename']) || strpos($data['filename'],'.')===false){ return $item; } $ext = explode('.',$data['filename']); $ext = end($ext); // if(!empty($item['type'])){ // // 文件类型来判断后缀 // // // download it from http://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.types // if(is_readable(__DIR__.'/mime.types')){ // $f = fopen(__DIR__.'/mime.types','r'); // while(!feof($f)){ // $fext = fgets($f); // if($fext){ // $fext = strtolower($fext); // $item['type'] = strtolower($item['type']); // // 找到了类型后缀 // if(strpos($fext,$item['type']) === 0){ // $ext = trim(str_replace($item['type'],'',$fext)); // break;//找到了要跳出循环 // } // } // } // // 关闭文件 // fclose($f); // } // } // 找不到后缀,说明不是文件 // if(empty($ext)){ // 文件后缀 // $ext = explode('.',$data['filename']); // $ext = count($ext) > 1 ? ($ext[count($ext)-1]??'') : ''; // 直接返回 // return $item['body']; // } // content id // preg_match("/Content-ID:[\s].*<[\w\W]{1,}>/i",$item['body'],$result); $result[0] = $this->getTag($item['body'],'Content-ID:'); if (!empty($result[0])){ $data['content-id'] = explode(' ',trim($result[0])); $data['content-id'] = $data['content-id'][1]; if($data['content-id']){ $data['content-id'] = @trim($data['content-id']); $data['content-id'] = @trim($data['content-id'],'>'); $data['content-id'] = @trim($data['content-id'],'<'); } $item['body'] = str_replace($result[0],'',$item['body']); } $item['body'] = str_replace($result,'',$item['body']); $filecontent = explode("\r\n",@trim($item['body'])); $ki = -1; foreach ($filecontent as $k=>$str){ if(!$str){ $ki = $k; } } $filecontent = array_slice($filecontent,$ki+1,count($filecontent)); $item['body'] = implode("",$filecontent); // $item['body'] = $this->body_remove_tag($item['body'],"Content-"); // $item['body'] = $this->body_remove_tag($item['body'],"Content-"); // $item['body'] = $this->body_remove_tag($item['body'],"Content-"); // $item['body'] = $this->body_remove_tag($item['body'],"Content-"); // $item['body'] = $this->body_remove_tag($item['body'],"X-"); $content = base64_decode(trim($item['body'])); if($content){ // 目录 $data['path'] = $this->fileSavePath; if(!is_dir($data['path'])){ mkdir($data['path'],0775,true); } $data['signName'] = md5($content).($ext ? '.'.$ext : ''); $data['path'] = str_replace('//','/',$data['path'].'/'.$data['signName']); // 保存文件 @file_put_contents($data['path'],$content); } return $data; } // 获取文件名称 private function file_save_name(&$body,$tag){ preg_match('/'.$tag.'="[(\S\W.*\s.*)]{1,}"/i',$body,$result); if(!empty($result[0])){ $body = str_replace($result[0],'',$body); } $val = trim(str_replace([$tag.'=','"',"'"],'',$result[0]??'')); if ($val && strpos($val,'=?')===0){ $val = iconv_mime_decode($val,ICONV_MIME_DECODE_CONTINUE_ON_ERROR,'utf-8'); } return $val; } /** * 获取某个tag * @param $content * @param $tag * @return string * @author:dc * @time 2023/10/30 22:45 */ public function getTag($content,$tag){ $items = explode("\n",$content); $ctstr = ''; foreach ($items as $str){ if($ctstr){ if(empty(trim($str))){ break; } if(str_starts_with($str, "\t")||str_starts_with($str, " ")){ $ctstr .= $str."\n"; }else{ break; } } if(str_starts_with(strtolower($str), strtolower($tag))){ $ctstr .= $str."\n"; } } return trim($ctstr); } /** * 删除tag * @param $body * @param $tag * @return mixed|string|string[] * @author:dc * @time 2022/8/12 10:34 */ private function body_remove_tag($body,$tag){ $str = $this->getTag($body,$tag); // preg_match("/{$tag}\r?[\w\W].*/i",$body,$result); // if(!empty($result[0])) { $body = str_replace($str, '', $body); // } return $body; } /** * 读取编码 * @param $item * @return array * @author:dc * @time 2022/8/12 10:28 */ private static function preg_match_charset($item){ // 匹配内容 type preg_match('/charset[ \t]{0,}=[ \t]{0,}"?[ \t0-9a-zA-Z-]{1,}"?/i',$item,$result); if(!empty($result[0])){ $ret['origin'] = trim($result[0]); // charset $ret['charset'] = trim(str_replace(['charset','=','"',"'"],'',$ret['origin'])); return $ret; } return []; } /** * 解析type * @param $item * @return array * @author:dc * @time 2022/8/12 10:26 */ private function preg_match_type($item){ // 匹配内容 type // preg_match("/Content-Type:[\w\W].*/i",$item,$result); $result[0] = $this->getTag($item,'Content-Type:'); if(!empty($result[0])){ $ret['origin'] = trim($result[0]); // type $type = str_replace(['Content-Type:','"',"'"],'',$ret['origin']); $type = explode(';',$type); // 类型 $ret['type'] = trim($type[0]); if(isset($type[1]) && $type[1]){ // 编码 $r = explode('=',$type[1]); $ret[strtolower(trim($r[0]))] = trim($r[1]??''); } return $ret; } return []; } /** * 匹配tag * @param $tag * @param $item * @return array * @author:dc * @time 2022/8/12 10:05 */ private function body_match_tag($tag,$item){ // tag Content-Transfer-Encoding: preg_match("/".$tag."[\w\W].*/i",$item,$result); if(!empty($result[0])){ $ret['origin'] = trim($result[0]); // charset $ret['text'] = trim(str_replace([$tag,'"',"'"],'',$ret['origin'])); return $ret; } return []; } }