作者 lyh

gx

... ... @@ -9,7 +9,9 @@ use App\Models\RouteMap\RouteMap;
use App\Services\CosService;
use App\Services\ProjectServer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Redis;
/**
... ... @@ -76,26 +78,12 @@ class HtmlCollect extends Command
$collect_info->save();
//获取站点正式和测试域名
$web_url_domain = $collect_info->domain;
$home_url = $collect_info->domain;
$url_web_config = 'https://' . $collect_info->domain . '/wp-content/cache/user_config.text';
$data_config = curl_c($url_web_config);
if ($data_config) {
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url'] ?? '');
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
$old_info = $this->getOldDomain($project_id, $collect_info->domain);
//采集html页面,下载资源到本地并替换
try {
$html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
if($html == '0'){
if ($html == '0') {
$collect_info->status = CollectTask::STATUS_FAIL;
$collect_info->save();
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL;
... ... @@ -103,7 +91,15 @@ class HtmlCollect extends Command
return true;
}
$source_list = $this->html_preg($html, $project_id, $collect_info->domain, $web_url_domain, $home_url);
//如果有base64图片,先替换掉,再进行资源匹配
$new_html = $html;
preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img);
$img_base64 = $result_img[2] ?? [];
foreach ($img_base64 as $v64) {
$new_html = str_replace($v64, '', $new_html);
}
$source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']);
if ($source_list) {
$html = $this->upload_source($html, $source_list, $project_id);
... ... @@ -187,6 +183,42 @@ class HtmlCollect extends Command
return $task_id;
}
//获取站点老域名
protected function getOldDomain($project_id, $domain)
{
$key = 'project_collect_domain_' . $project_id;
$data = Cache::get($key);
if (!$data) {
$web_url_domain = $domain;
$home_url = $domain;
$url_web_config = 'https://' . $domain . '/wp-content/cache/user_config.text';
$data_config = curl_c($url_web_config);
if ($data_config) {
$web_url_arr = parse_url($data_config['web_url_domain'] ?? '');
if (isset($web_url_arr['host'])) {
$web_url_domain = $web_url_arr['host'];
}
$home_url_arr = parse_url($data_config['home_url'] ?? '');
if (isset($home_url_arr['host'])) {
$home_url = $home_url_arr['host'];
}
}
$data = [
'web_url_domain' => $web_url_domain,
'home_url' => $home_url,
];
Cache::add($key, $data, 3600);//缓存1小时
}
return $data;
}
//正则匹配html资源
protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url)
{
... ... @@ -236,7 +268,6 @@ class HtmlCollect extends Command
$check_vc_b && $source[] = $check_vc_b;
}
return $source;
}
... ... @@ -319,7 +350,7 @@ class HtmlCollect extends Command
continue;
}
$path_arr = explode('.', $vcs);
if(end($path_arr) == 'html'){
if (end($path_arr) == 'html') {
continue;
}
... ...
... ... @@ -296,7 +296,7 @@ class ProjectUpdate extends Command
$id = $model->insertGetId([
'project_id' => $project_id,
'title' => $item['ttile'],
'intro' => $item['description'] ?? '',
'intro' => $item['short_description'] ?? '',
'content' => $item['content'] ?? '',
'category_id' => $category_id,
'thumb' => isset($gallery[0]) ? Arr::a2s($gallery[0]) : '',
... ... @@ -462,6 +462,9 @@ class ProjectUpdate extends Command
DB::disconnect('custom_mysql');
$task->status = UpdateLog::STATUS_COM;//同步完成
if($api_type == 'post' || $api_type == 'page' || $api_type == 'news' || $api_type == 'blog'){
$task->collect_status = UpdateLog::COLLECT_STATUS_UN;
}
$task->save();
echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update end ' . PHP_EOL;
... ...
... ... @@ -374,7 +374,7 @@ class BTemplateLogic extends BaseLogic
$newsInfo = $newsModel->read(['id'=>$source_id],['url']);
$route = $newsInfo['url'];
}else{
$type = 'index';
$type = 'all';
$route = 'all';
}
}
... ...