作者 lyh

gx

... ... @@ -158,21 +158,17 @@ class ProofreadingController extends BaseController
$strippedContent = preg_replace($pattern, '', $strippedContent); // 删除 `<link>` 标签
$pattern = '/<footer\b[^>]*>(.*?)<\/footer>/s'; // 定义匹配`<script>`标签及其内容的正则表达式
$strippedContent = preg_replace($pattern, '', $strippedContent); // 删除`<script>`标签及其内容
// $pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式
// $matches = array();
// preg_match_all($pattern, $strippedContent, $matches);
// $textContentArray = array_filter($matches[1], function($item) {
// $item = str_replace("\n", "", $item);
// return !empty(trim($item));
// });
$pattern = '/<(\w+)[^>]*>(.*?)<\/\1>/s'; // Define the regular expression pattern
preg_match_all($pattern, $strippedContent, $matches); // Match all tags and their content
$tagContentArray = $matches[2]; // Extract the content part from the matches array
// $textContentArray = array_values($textContentArray);
// $uniqueArray = array_unique($textContentArray);
// $textContentArray = array_values($uniqueArray);
return $tagContentArray;
$pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式
$matches = array();
preg_match_all($pattern, $strippedContent, $matches);
$textContentArray = array_filter($matches[1], function($item) {
$item = str_replace("\n", "", $item);
return !empty(trim($item));
});
$textContentArray = array_values($textContentArray);
$uniqueArray = array_unique($textContentArray);
$textContentArray = array_values($uniqueArray);
return $textContentArray;
}
/**
... ...