作者 lyh

gx

@@ -29,6 +29,8 @@ class ProofreadingController extends BaseController @@ -29,6 +29,8 @@ class ProofreadingController extends BaseController
29 $list = $proofreadingModel->list(['url'=>$this->param['url'],'language_id'=>$this->param['language_id'],'type'=>1],'text',['text','translate']); 29 $list = $proofreadingModel->list(['url'=>$this->param['url'],'language_id'=>$this->param['language_id'],'type'=>1],'text',['text','translate']);
30 //获取当前URl的所有文本内容 30 //获取当前URl的所有文本内容
31 $new_list = $this->getUrlRead($this->param['url']); 31 $new_list = $this->getUrlRead($this->param['url']);
  32 + var_dump($new_list);
  33 + die();
32 if(empty($list)){ 34 if(empty($list)){
33 $data = []; 35 $data = [];
34 $translate_list = Translate::tran($new_list, $countryInfo['alias']); 36 $translate_list = Translate::tran($new_list, $countryInfo['alias']);
@@ -50,10 +52,6 @@ class ProofreadingController extends BaseController @@ -50,10 +52,6 @@ class ProofreadingController extends BaseController
50 ]; 52 ];
51 } 53 }
52 $arr2 = array_diff($new_list, $old_list); 54 $arr2 = array_diff($new_list, $old_list);
53 - var_dump($new_list);  
54 - var_dump($old_list);  
55 - var_dump($arr2);  
56 - die();  
57 if(!empty($arr2)){ 55 if(!empty($arr2)){
58 $translate_list = Translate::tran($arr2, $countryInfo['alias']); 56 $translate_list = Translate::tran($arr2, $countryInfo['alias']);
59 foreach ($arr2 as $k1=>$v1){ 57 foreach ($arr2 as $k1=>$v1){
@@ -160,17 +158,21 @@ class ProofreadingController extends BaseController @@ -160,17 +158,21 @@ class ProofreadingController extends BaseController
160 $strippedContent = preg_replace($pattern, '', $strippedContent); // 删除 `<link>` 标签 158 $strippedContent = preg_replace($pattern, '', $strippedContent); // 删除 `<link>` 标签
161 $pattern = '/<footer\b[^>]*>(.*?)<\/footer>/s'; // 定义匹配`<script>`标签及其内容的正则表达式 159 $pattern = '/<footer\b[^>]*>(.*?)<\/footer>/s'; // 定义匹配`<script>`标签及其内容的正则表达式
162 $strippedContent = preg_replace($pattern, '', $strippedContent); // 删除`<script>`标签及其内容 160 $strippedContent = preg_replace($pattern, '', $strippedContent); // 删除`<script>`标签及其内容
163 - $pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式  
164 - $matches = array();  
165 - preg_match_all($pattern, $strippedContent, $matches);  
166 - $textContentArray = array_filter($matches[1], function($item) {  
167 - $item = str_replace("\n", "", $item);  
168 - return !empty(trim($item));  
169 - });  
170 - $textContentArray = array_values($textContentArray);  
171 - $uniqueArray = array_unique($textContentArray);  
172 - $textContentArray = array_values($uniqueArray);  
173 - return $textContentArray; 161 +// $pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式
  162 +// $matches = array();
  163 +// preg_match_all($pattern, $strippedContent, $matches);
  164 +// $textContentArray = array_filter($matches[1], function($item) {
  165 +// $item = str_replace("\n", "", $item);
  166 +// return !empty(trim($item));
  167 +// });
  168 + $pattern = '/<(\w+)[^>]*>(.*?)<\/\1>/s'; // Define the regular expression pattern
  169 + preg_match_all($pattern, $strippedContent, $matches); // Match all tags and their content
  170 +
  171 + $tagContentArray = $matches[2]; // Extract the content part from the matches array
  172 +// $textContentArray = array_values($textContentArray);
  173 +// $uniqueArray = array_unique($textContentArray);
  174 +// $textContentArray = array_values($uniqueArray);
  175 + return $tagContentArray;
174 } 176 }
175 177
176 /** 178 /**