作者 lyh

gx

@@ -9,7 +9,9 @@ use App\Models\RouteMap\RouteMap; @@ -9,7 +9,9 @@ use App\Models\RouteMap\RouteMap;
9 use App\Services\CosService; 9 use App\Services\CosService;
10 use App\Services\ProjectServer; 10 use App\Services\ProjectServer;
11 use Illuminate\Console\Command; 11 use Illuminate\Console\Command;
  12 +use Illuminate\Support\Facades\Cache;
12 use Illuminate\Support\Facades\DB; 13 use Illuminate\Support\Facades\DB;
  14 +use Illuminate\Support\Facades\Log;
13 use Illuminate\Support\Facades\Redis; 15 use Illuminate\Support\Facades\Redis;
14 16
15 /** 17 /**
@@ -39,7 +41,7 @@ class HtmlCollect extends Command @@ -39,7 +41,7 @@ class HtmlCollect extends Command
39 public function handle() 41 public function handle()
40 { 42 {
41 while (true) { 43 while (true) {
42 - $this->start_collect(); 44 + $this->start_collect();
43 } 45 }
44 } 46 }
45 47
@@ -76,34 +78,28 @@ class HtmlCollect extends Command @@ -76,34 +78,28 @@ class HtmlCollect extends Command
76 $collect_info->save(); 78 $collect_info->save();
77 79
78 //获取站点正式和测试域名 80 //获取站点正式和测试域名
79 - $web_url_domain = $collect_info->domain;  
80 - $home_url = $collect_info->domain;  
81 - $url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';  
82 - $data_config = curl_c($url_web_config);  
83 - if ($data_config) {  
84 - $web_url_arr = parse_url($data_config['web_url_domain'] ?? '');  
85 - if (isset($web_url_arr['host'])) {  
86 - $web_url_domain = $web_url_arr['host'];  
87 - }  
88 -  
89 - $home_url_arr = parse_url($data_config['home_url'] ?? '');  
90 - if (isset($home_url_arr['host'])) {  
91 - $home_url = $home_url_arr['host'];  
92 - }  
93 - } 81 + $old_info = $this->getOldDomain($project_id, $collect_info->domain);
94 82
95 //采集html页面,下载资源到本地并替换 83 //采集html页面,下载资源到本地并替换
96 try { 84 try {
97 $html = curl_c('https://' . $collect_info->domain . $collect_info->route, false); 85 $html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
98 - if($html == '0'){ 86 + if ($html == '0') {
99 $collect_info->status = CollectTask::STATUS_FAIL; 87 $collect_info->status = CollectTask::STATUS_FAIL;
100 $collect_info->save(); 88 $collect_info->save();
101 - echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL; 89 + echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL;
102 sleep(2); 90 sleep(2);
103 return true; 91 return true;
104 } 92 }
105 93
106 - $source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url); 94 + //如果有base64图片,先替换掉,再进行资源匹配
  95 + $new_html = $html;
  96 + preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img);
  97 + $img_base64 = $result_img[2] ?? [];
  98 + foreach ($img_base64 as $v64) {
  99 + $new_html = str_replace($v64, '', $new_html);
  100 + }
  101 +
  102 + $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
107 103
108 if ($source_list) { 104 if ($source_list) {
109 $html = $this->upload_source($html, $source_list, $project_id); 105 $html = $this->upload_source($html, $source_list, $project_id);
@@ -187,6 +183,42 @@ class HtmlCollect extends Command @@ -187,6 +183,42 @@ class HtmlCollect extends Command
187 return $task_id; 183 return $task_id;
188 } 184 }
189 185
  186 + //获取站点老域名
  187 + protected function getOldDomain($project_id, $domain)
  188 + {
  189 + $key = 'project_collect_domain_' . $project_id;
  190 +
  191 + $data = Cache::get($key);
  192 +
  193 + if (!$data) {
  194 + $web_url_domain = $domain;
  195 + $home_url = $domain;
  196 +
  197 + $url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
  198 + $data_config = curl_c($url_web_config);
  199 + if ($data_config) {
  200 + $web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
  201 + if (isset($web_url_arr['host'])) {
  202 + $web_url_domain = $web_url_arr['host'];
  203 + }
  204 +
  205 + $home_url_arr = parse_url($data_config['home_url'] ?? '');
  206 + if (isset($home_url_arr['host'])) {
  207 + $home_url = $home_url_arr['host'];
  208 + }
  209 + }
  210 +
  211 + $data = [
  212 + 'web_url_domain' => $web_url_domain,
  213 + 'home_url' => $home_url,
  214 + ];
  215 +
  216 + Cache::add($key, $data, 3600);//缓存1小时
  217 + }
  218 +
  219 + return $data;
  220 + }
  221 +
190 //正则匹配html资源 222 //正则匹配html资源
191 protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url) 223 protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
192 { 224 {
@@ -236,7 +268,6 @@ class HtmlCollect extends Command @@ -236,7 +268,6 @@ class HtmlCollect extends Command
236 $check_vc_b && $source[] = $check_vc_b; 268 $check_vc_b && $source[] = $check_vc_b;
237 } 269 }
238 270
239 -  
240 return $source; 271 return $source;
241 } 272 }
242 273
@@ -319,7 +350,7 @@ class HtmlCollect extends Command @@ -319,7 +350,7 @@ class HtmlCollect extends Command
319 continue; 350 continue;
320 } 351 }
321 $path_arr = explode('.', $vcs); 352 $path_arr = explode('.', $vcs);
322 - if(end($path_arr) == 'html'){ 353 + if (end($path_arr) == 'html') {
323 continue; 354 continue;
324 } 355 }
325 356
@@ -296,7 +296,7 @@ class ProjectUpdate extends Command @@ -296,7 +296,7 @@ class ProjectUpdate extends Command
296 $id = $model->insertGetId([ 296 $id = $model->insertGetId([
297 'project_id' => $project_id, 297 'project_id' => $project_id,
298 'title' => $item['ttile'], 298 'title' => $item['ttile'],
299 - 'intro' => $item['description'] ?? '', 299 + 'intro' => $item['short_description'] ?? '',
300 'content' => $item['content'] ?? '', 300 'content' => $item['content'] ?? '',
301 'category_id' => $category_id, 301 'category_id' => $category_id,
302 'thumb' => isset($gallery[0]) ? Arr::a2s($gallery[0]) : '', 302 'thumb' => isset($gallery[0]) ? Arr::a2s($gallery[0]) : '',
@@ -462,6 +462,9 @@ class ProjectUpdate extends Command @@ -462,6 +462,9 @@ class ProjectUpdate extends Command
462 DB::disconnect('custom_mysql'); 462 DB::disconnect('custom_mysql');
463 463
464 $task->status = UpdateLog::STATUS_COM;//同步完成 464 $task->status = UpdateLog::STATUS_COM;//同步完成
  465 + if($api_type == 'post' || $api_type == 'page' || $api_type == 'news' || $api_type == 'blog'){
  466 + $task->collect_status = UpdateLog::COLLECT_STATUS_UN;
  467 + }
465 $task->save(); 468 $task->save();
466 469
467 echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update end ' . PHP_EOL; 470 echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update end ' . PHP_EOL;
@@ -374,7 +374,7 @@ class BTemplateLogic extends BaseLogic @@ -374,7 +374,7 @@ class BTemplateLogic extends BaseLogic
374 $newsInfo = $newsModel->read(['id'=>$source_id],['url']); 374 $newsInfo = $newsModel->read(['id'=>$source_id],['url']);
375 $route = $newsInfo['url']; 375 $route = $newsInfo['url'];
376 }else{ 376 }else{
377 - $type = 'index'; 377 + $type = 'all';
378 $route = 'all'; 378 $route = 'all';
379 } 379 }
380 } 380 }