|
...
|
...
|
@@ -8,6 +8,7 @@ use App\Models\Collect\CollectSource; |
|
|
|
use App\Models\Collect\CollectTask;
|
|
|
|
use App\Models\Com\UpdateLog;
|
|
|
|
use App\Models\Com\UpdateOldInfo;
|
|
|
|
use App\Models\CustomModule\CustomModule;
|
|
|
|
use App\Models\CustomModule\CustomModuleContent;
|
|
|
|
use App\Models\News\News;
|
|
|
|
use App\Models\Product\Product;
|
|
...
|
...
|
@@ -16,6 +17,7 @@ use App\Models\Template\BCustomTemplate; |
|
|
|
use App\Services\CosService;
|
|
|
|
use App\Services\ProjectServer;
|
|
|
|
use Illuminate\Console\Command;
|
|
|
|
use Illuminate\Support\Facades\Cache;
|
|
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
use Illuminate\Support\Facades\Redis;
|
|
|
|
|
|
...
|
...
|
@@ -54,10 +56,14 @@ class HtmlCollect extends Command |
|
|
|
protected function start_collect()
|
|
|
|
{
|
|
|
|
$task_id = $this->get_task();
|
|
|
|
if (!$task_id) {
|
|
|
|
if ($task_id === false) {
|
|
|
|
//所有项目采集完成
|
|
|
|
sleep(60);
|
|
|
|
return true;
|
|
|
|
} elseif ($task_id === 0) {
|
|
|
|
//队列任务已处理完,有进程正在查询数据库,等待2秒后执行
|
|
|
|
sleep(2);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
$task_arr = explode('_', $task_id);
|
|
...
|
...
|
@@ -85,12 +91,42 @@ class HtmlCollect extends Command |
|
|
|
//采集html页面,下载资源到本地并替换
|
|
|
|
try {
|
|
|
|
$html = curl_c('https://' . $collect_info->domain . $collect_info->route, false);
|
|
|
|
if ($html == '0' || strpos($html, '404 Not Found') !== false) {
|
|
|
|
if (strlen($html) < 4) {
|
|
|
|
$collect_info->status = CollectTask::STATUS_FAIL;
|
|
|
|
$collect_info->save();
|
|
|
|
|
|
|
|
$error = $html == '0' ? 'no html' : '404 not found';
|
|
|
|
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: ' . $error . PHP_EOL;
|
|
|
|
if ($html == 404) {
|
|
|
|
//原数据页面404,需要将6.0数据存入草稿箱
|
|
|
|
switch ($collect_info->source) {
|
|
|
|
//产品
|
|
|
|
case RouteMap::SOURCE_PRODUCT:
|
|
|
|
$model = new Product();
|
|
|
|
$status_draft = Product::STATUS_DRAFT;
|
|
|
|
break;
|
|
|
|
//博客
|
|
|
|
case RouteMap::SOURCE_BLOG:
|
|
|
|
$model = new Blog();
|
|
|
|
$status_draft = Blog::STATUS_DRAFT;
|
|
|
|
break;
|
|
|
|
//新闻
|
|
|
|
case RouteMap::SOURCE_NEWS:
|
|
|
|
$model = new News();
|
|
|
|
$status_draft = News::STATUS_DRAFT;
|
|
|
|
break;
|
|
|
|
//自定义模块详情
|
|
|
|
case RouteMap::SOURCE_MODULE:
|
|
|
|
$model = new CustomModule();
|
|
|
|
$status_draft = CustomModule::STATUS_DRAFT;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
//单页详情
|
|
|
|
$model = new BCustomTemplate();
|
|
|
|
$status_draft = BCustomTemplate::STATUS_DRAFT;
|
|
|
|
}
|
|
|
|
$model->edit(['status' => $status_draft], ['project_id' => $project_id, 'id' => $collect_info->source_id]);
|
|
|
|
}
|
|
|
|
|
|
|
|
echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: ' . $html . PHP_EOL;
|
|
|
|
sleep(2);
|
|
|
|
return true;
|
|
|
|
}
|
|
...
|
...
|
@@ -147,6 +183,11 @@ class HtmlCollect extends Command |
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Cache::add('html_collect_select_mysql', true, 10)) {
|
|
|
|
//如果存在数据锁,表示有其他进程已往下执行,当前进程直接返回
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach ($update_log_list as $update_log) {
|
|
|
|
switch ($update_log->api_type) {
|
|
|
|
case 'page':
|
...
|
...
|
|