|
...
|
...
|
@@ -158,21 +158,17 @@ class ProofreadingController extends BaseController |
|
|
|
$strippedContent = preg_replace($pattern, '', $strippedContent); // 删除 `<link>` 标签
|
|
|
|
$pattern = '/<footer\b[^>]*>(.*?)<\/footer>/s'; // 定义匹配`<script>`标签及其内容的正则表达式
|
|
|
|
$strippedContent = preg_replace($pattern, '', $strippedContent); // 删除`<script>`标签及其内容
|
|
|
|
// $pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式
|
|
|
|
// $matches = array();
|
|
|
|
// preg_match_all($pattern, $strippedContent, $matches);
|
|
|
|
// $textContentArray = array_filter($matches[1], function($item) {
|
|
|
|
// $item = str_replace("\n", "", $item);
|
|
|
|
// return !empty(trim($item));
|
|
|
|
// });
|
|
|
|
$pattern = '/<(\w+)[^>]*>(.*?)<\/\1>/s'; // Define the regular expression pattern
|
|
|
|
preg_match_all($pattern, $strippedContent, $matches); // Match all tags and their content
|
|
|
|
|
|
|
|
$tagContentArray = $matches[2]; // Extract the content part from the matches array
|
|
|
|
// $textContentArray = array_values($textContentArray);
|
|
|
|
// $uniqueArray = array_unique($textContentArray);
|
|
|
|
// $textContentArray = array_values($uniqueArray);
|
|
|
|
return $tagContentArray;
|
|
|
|
$pattern = '/>([^<]+)</'; // 定义匹配中间内容不是标签的正则表达式
|
|
|
|
$matches = array();
|
|
|
|
preg_match_all($pattern, $strippedContent, $matches);
|
|
|
|
$textContentArray = array_filter($matches[1], function($item) {
|
|
|
|
$item = str_replace("\n", "", $item);
|
|
|
|
return !empty(trim($item));
|
|
|
|
});
|
|
|
|
$textContentArray = array_values($textContentArray);
|
|
|
|
$uniqueArray = array_unique($textContentArray);
|
|
|
|
$textContentArray = array_values($uniqueArray);
|
|
|
|
return $textContentArray;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
...
|
...
|
|