|
...
|
...
|
@@ -9,7 +9,9 @@ use App\Models\RouteMap\RouteMap; |
|
|
|
use App\Services\CosService;
|
|
|
|
use App\Services\ProjectServer;
|
|
|
|
use Illuminate\Console\Command;
|
|
|
|
use Illuminate\Support\Facades\Cache;
|
|
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
use Illuminate\Support\Facades\Log;
|
|
|
|
use Illuminate\Support\Facades\Redis;
|
|
|
|
|
|
|
|
/**
|
|
...
|
...
|
@@ -76,26 +78,12 @@ class HtmlCollect extends Command |
|
|
|
$collect_info->save();
|
|
|
|
|
|
|
|
//获取站点正式和测试域名
|
|
|
|
$web_url_domain = $collect_info->domain;
|
|
|
|
$home_url = $collect_info->domain;
|
|
|
|
$url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
|
|
|
|
$data_config = curl_c($url_web_config);
|
|
|
|
if ($data_config) {
|
|
|
|
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
|
|
|
|
if (isset($web_url_arr['host'])) {
|
|
|
|
$web_url_domain = $web_url_arr['host'];
|
|
|
|
}
|
|
|
|
|
|
|
|
$home_url_arr = parse_url($data_config['home_url'] ?? '');
|
|
|
|
if (isset($home_url_arr['host'])) {
|
|
|
|
$home_url = $home_url_arr['host'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$old_info = $this->getOldDomain($project_id, $collect_info->domain);
|
|
|
|
|
|
|
|
//采集html页面,下载资源到本地并替换
|
|
|
|
try {
|
|
|
|
$html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
|
|
|
|
if($html == '0'){
|
|
|
|
if ($html == '0') {
|
|
|
|
$collect_info->status = CollectTask::STATUS_FAIL;
|
|
|
|
$collect_info->save();
|
|
|
|
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL;
|
|
...
|
...
|
@@ -103,7 +91,15 @@ class HtmlCollect extends Command |
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
|
|
|
|
//如果有base64图片,先替换掉,再进行资源匹配
|
|
|
|
$new_html = $html;
|
|
|
|
preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img);
|
|
|
|
$img_base64 = $result_img[2] ?? [];
|
|
|
|
foreach ($img_base64 as $v64) {
|
|
|
|
$new_html = str_replace($v64, '', $new_html);
|
|
|
|
}
|
|
|
|
|
|
|
|
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
|
|
|
|
|
|
|
|
if ($source_list) {
|
|
|
|
$html = $this->upload_source($html, $source_list, $project_id);
|
|
...
|
...
|
@@ -187,6 +183,42 @@ class HtmlCollect extends Command |
|
|
|
return $task_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
//获取站点老域名
|
|
|
|
protected function getOldDomain($project_id, $domain)
|
|
|
|
{
|
|
|
|
$key = 'project_collect_domain_' . $project_id;
|
|
|
|
|
|
|
|
$data = Cache::get($key);
|
|
|
|
|
|
|
|
if (!$data) {
|
|
|
|
$web_url_domain = $domain;
|
|
|
|
$home_url = $domain;
|
|
|
|
|
|
|
|
$url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
|
|
|
|
$data_config = curl_c($url_web_config);
|
|
|
|
if ($data_config) {
|
|
|
|
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
|
|
|
|
if (isset($web_url_arr['host'])) {
|
|
|
|
$web_url_domain = $web_url_arr['host'];
|
|
|
|
}
|
|
|
|
|
|
|
|
$home_url_arr = parse_url($data_config['home_url'] ?? '');
|
|
|
|
if (isset($home_url_arr['host'])) {
|
|
|
|
$home_url = $home_url_arr['host'];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$data = [
|
|
|
|
'web_url_domain' => $web_url_domain,
|
|
|
|
'home_url' => $home_url,
|
|
|
|
];
|
|
|
|
|
|
|
|
Cache::add($key, $data, 3600);//缓存1小时
|
|
|
|
}
|
|
|
|
|
|
|
|
return $data;
|
|
|
|
}
|
|
|
|
|
|
|
|
//正则匹配html资源
|
|
|
|
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
|
|
|
|
{
|
|
...
|
...
|
@@ -236,7 +268,6 @@ class HtmlCollect extends Command |
|
|
|
$check_vc_b && $source[] = $check_vc_b;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return $source;
|
|
|
|
}
|
|
|
|
|
|
...
|
...
|
@@ -319,7 +350,7 @@ class HtmlCollect extends Command |
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$path_arr = explode('.', $vcs);
|
|
|
|
if(end($path_arr) == 'html'){
|
|
|
|
if (end($path_arr) == 'html') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
...
|
...
|
|