正在显示
1 个修改的文件
包含
14 行增加
和
9 行删除
| @@ -39,14 +39,15 @@ class HtmlCollect extends Command | @@ -39,14 +39,15 @@ class HtmlCollect extends Command | ||
| 39 | 39 | ||
| 40 | public function handle() | 40 | public function handle() |
| 41 | { | 41 | { |
| 42 | - while (true) { | 42 | +// while (true) { |
| 43 | $this->start_collect(); | 43 | $this->start_collect(); |
| 44 | - } | 44 | +// } |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | protected function start_collect() | 47 | protected function start_collect() |
| 48 | { | 48 | { |
| 49 | - $task_id = $this->get_task(); | 49 | +// $task_id = $this->get_task(); |
| 50 | + $task_id = '437_5995'; | ||
| 50 | if ($task_id === false) { | 51 | if ($task_id === false) { |
| 51 | //所有项目采集完成 | 52 | //所有项目采集完成 |
| 52 | sleep(60); | 53 | sleep(60); |
| @@ -336,17 +337,21 @@ class HtmlCollect extends Command | @@ -336,17 +337,21 @@ class HtmlCollect extends Command | ||
| 336 | ]); | 337 | ]); |
| 337 | $html = str_replace($vs['url'], getImageUrl($new_source), $html); | 338 | $html = str_replace($vs['url'], getImageUrl($new_source), $html); |
| 338 | 339 | ||
| 339 | - if (substr($new_source, -3, 3) == 'css') { | ||
| 340 | - // 下载css文件中的资源 | ||
| 341 | - $css_html = curl_c($vs['url_complete'], false); | ||
| 342 | - preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source); | ||
| 343 | - $css_source = $result_css_source[1] ?? []; | 340 | + if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') { |
| 341 | + $source_html = curl_c($vs['url_complete'], false); | ||
| 342 | + if (substr($new_source, -3, 3) == 'css') { | ||
| 343 | + preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $source_html, $result_source); | ||
| 344 | + } else { | ||
| 345 | + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source); | ||
| 346 | + } | ||
| 347 | + $source_list = $result_source[1] ?? []; | ||
| 344 | 348 | ||
| 345 | $url_arr = explode('/', $vs['url_complete']); | 349 | $url_arr = explode('/', $vs['url_complete']); |
| 346 | $target_arr = explode('/', $new_source); | 350 | $target_arr = explode('/', $new_source); |
| 347 | - foreach ($css_source as $vcs) { | 351 | + foreach ($source_list as $vcs) { |
| 348 | $vcs = str_replace('"', '', $vcs); | 352 | $vcs = str_replace('"', '', $vcs); |
| 349 | $vcs_arr = parse_url($vcs); | 353 | $vcs_arr = parse_url($vcs); |
| 354 | + | ||
| 350 | if (isset($vcs_arr['domain'])) { | 355 | if (isset($vcs_arr['domain'])) { |
| 351 | //不是相对路径,不下载 | 356 | //不是相对路径,不下载 |
| 352 | continue; | 357 | continue; |
-
请 注册 或 登录 后发表评论