作者 刘锟

合并分支 'akun' 到 'master'

Akun



查看合并请求 !162
... ... @@ -39,14 +39,15 @@ class HtmlCollect extends Command
public function handle()
{
while (true) {
// while (true) {
$this->start_collect();
}
// }
}
protected function start_collect()
{
$task_id = $this->get_task();
// $task_id = $this->get_task();
$task_id = '437_5995';
if ($task_id === false) {
//所有项目采集完成
sleep(60);
... ... @@ -336,17 +337,21 @@ class HtmlCollect extends Command
]);
$html = str_replace($vs['url'], getImageUrl($new_source), $html);
if (substr($new_source, -3, 3) == 'css') {
// 下载css文件中的资源
$css_html = curl_c($vs['url_complete'], false);
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);
$css_source = $result_css_source[1] ?? [];
if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') {
$source_html = curl_c($vs['url_complete'], false);
if (substr($new_source, -3, 3) == 'css') {
preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $source_html, $result_source);
} else {
preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source);
}
$source_list = $result_source[1] ?? [];
$url_arr = explode('/', $vs['url_complete']);
$target_arr = explode('/', $new_source);
foreach ($css_source as $vcs) {
foreach ($source_list as $vcs) {
$vcs = str_replace('"', '', $vcs);
$vcs_arr = parse_url($vcs);
if (isset($vcs_arr['domain'])) {
//不是相对路径,不下载
continue;
... ...