作者 刘锟

update

@@ -39,14 +39,15 @@ class HtmlCollect extends Command @@ -39,14 +39,15 @@ class HtmlCollect extends Command
39 39
40 public function handle() 40 public function handle()
41 { 41 {
42 - while (true) { 42 +// while (true) {
43 $this->start_collect(); 43 $this->start_collect();
44 - } 44 +// }
45 } 45 }
46 46
47 protected function start_collect() 47 protected function start_collect()
48 { 48 {
49 - $task_id = $this->get_task(); 49 +// $task_id = $this->get_task();
  50 + $task_id = '437_5995';
50 if ($task_id === false) { 51 if ($task_id === false) {
51 //所有项目采集完成 52 //所有项目采集完成
52 sleep(60); 53 sleep(60);
@@ -336,17 +337,21 @@ class HtmlCollect extends Command @@ -336,17 +337,21 @@ class HtmlCollect extends Command
336 ]); 337 ]);
337 $html = str_replace($vs['url'], getImageUrl($new_source), $html); 338 $html = str_replace($vs['url'], getImageUrl($new_source), $html);
338 339
339 - if (substr($new_source, -3, 3) == 'css') {  
340 - // 下载css文件中的资源  
341 - $css_html = curl_c($vs['url_complete'], false);  
342 - preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $css_html, $result_css_source);  
343 - $css_source = $result_css_source[1] ?? []; 340 + if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') {
  341 + $source_html = curl_c($vs['url_complete'], false);
  342 + if (substr($new_source, -3, 3) == 'css') {
  343 + preg_match_all("/url\(['\"](\s*[^>]+?)['\"]\)/i", $source_html, $result_source);
  344 + } else {
  345 + preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source);
  346 + }
  347 + $source_list = $result_source[1] ?? [];
344 348
345 $url_arr = explode('/', $vs['url_complete']); 349 $url_arr = explode('/', $vs['url_complete']);
346 $target_arr = explode('/', $new_source); 350 $target_arr = explode('/', $new_source);
347 - foreach ($css_source as $vcs) { 351 + foreach ($source_list as $vcs) {
348 $vcs = str_replace('"', '', $vcs); 352 $vcs = str_replace('"', '', $vcs);
349 $vcs_arr = parse_url($vcs); 353 $vcs_arr = parse_url($vcs);
  354 +
350 if (isset($vcs_arr['domain'])) { 355 if (isset($vcs_arr['domain'])) {
351 //不是相对路径,不下载 356 //不是相对路径,不下载
352 continue; 357 continue;