正在显示
9 个修改的文件
包含
280 行增加
和
392 行删除
| @@ -119,6 +119,8 @@ class ProjectImport extends Command | @@ -119,6 +119,8 @@ class ProjectImport extends Command | ||
| 119 | } | 119 | } |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | + $v[0] = $this->special2str($v[0]); | ||
| 123 | + | ||
| 122 | $total_count += 1; | 124 | $total_count += 1; |
| 123 | try { | 125 | try { |
| 124 | if ($task->type == ImportTask::TYPE_NEWS) { | 126 | if ($task->type == ImportTask::TYPE_NEWS) { |
| @@ -200,6 +202,30 @@ class ProjectImport extends Command | @@ -200,6 +202,30 @@ class ProjectImport extends Command | ||
| 200 | return false; | 202 | return false; |
| 201 | } | 203 | } |
| 202 | 204 | ||
| 205 | + //特殊字符转换 | ||
| 206 | + protected function special2str($str) | ||
| 207 | + { | ||
| 208 | + if (strpos($str, ';') === false) { | ||
| 209 | + return $str; | ||
| 210 | + } | ||
| 211 | + | ||
| 212 | + $list = [ | ||
| 213 | + '<' => '<', | ||
| 214 | + '>' => '>', | ||
| 215 | + '&' => '&', | ||
| 216 | + '´' => "'", | ||
| 217 | + '"' => '"', | ||
| 218 | + ' ' => ' ', | ||
| 219 | + ''' => "'" | ||
| 220 | + ]; | ||
| 221 | + | ||
| 222 | + foreach ($list as $k => $v) { | ||
| 223 | + $str = str_replace($k, $v, $str); | ||
| 224 | + } | ||
| 225 | + | ||
| 226 | + return $str; | ||
| 227 | + } | ||
| 228 | + | ||
| 203 | //发送站内通知 | 229 | //发送站内通知 |
| 204 | protected function send_mail($user_list, $time, $type, $success_count, $repeat_count, $fail_count, $reason, $fail_line = []) | 230 | protected function send_mail($user_list, $time, $type, $success_count, $repeat_count, $fail_count, $reason, $fail_line = []) |
| 205 | { | 231 | { |
| @@ -136,7 +136,7 @@ class HtmlCollect extends Command | @@ -136,7 +136,7 @@ class HtmlCollect extends Command | ||
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | 138 | ||
| 139 | - $update_log = UpdateLog::where('project_id', '<', 799)->where('status', UpdateLog::STATUS_COM)->where('collect_status', UpdateLog::COLLECT_STATUS_UN)->orderBy('project_id', 'asc')->first(); | 139 | + $update_log = UpdateLog::where('status', UpdateLog::STATUS_COM)->where('collect_status', UpdateLog::COLLECT_STATUS_UN)->orderBy('project_id', 'asc')->first(); |
| 140 | if (!$update_log) { | 140 | if (!$update_log) { |
| 141 | return false; | 141 | return false; |
| 142 | } | 142 | } |
| 1 | -<?php | ||
| 2 | - | ||
| 3 | -namespace App\Console\Commands\Update; | ||
| 4 | - | ||
| 5 | -use App\Models\Collect\CollectSource; | ||
| 6 | -use App\Models\Collect\CollectTask; | ||
| 7 | -use App\Models\Com\UpdateLog; | ||
| 8 | -use App\Models\Com\UpdateOldInfo; | ||
| 9 | -use App\Models\RouteMap\RouteMap; | ||
| 10 | -use App\Services\CosService; | ||
| 11 | -use App\Services\ProjectServer; | ||
| 12 | -use Illuminate\Console\Command; | ||
| 13 | -use Illuminate\Support\Facades\DB; | ||
| 14 | -use Illuminate\Support\Facades\Redis; | ||
| 15 | - | ||
| 16 | -/** | ||
| 17 | - * 4.0,5.0升级到6.0,主站页面采集 | ||
| 18 | - * Class ProjectImport | ||
| 19 | - * @package App\Console\Commands | ||
| 20 | - * @author Akun | ||
| 21 | - * @date 2023/11/10 16:04 | ||
| 22 | - */ | ||
| 23 | -class HtmlCollectNew extends Command | ||
| 24 | -{ | ||
| 25 | - /** | ||
| 26 | - * The name and signature of the console command. | ||
| 27 | - * | ||
| 28 | - * @var string | ||
| 29 | - */ | ||
| 30 | - protected $signature = 'project_html_collect_new'; | ||
| 31 | - | ||
| 32 | - /** | ||
| 33 | - * The console command description. | ||
| 34 | - * | ||
| 35 | - * @var string | ||
| 36 | - */ | ||
| 37 | - protected $description = '执行项目html页面采集'; | ||
| 38 | - | ||
| 39 | - | ||
| 40 | - public function handle() | ||
| 41 | - { | ||
| 42 | - ini_set('memory_limit', '512M'); | ||
| 43 | - while (true) { | ||
| 44 | - $this->start_collect(); | ||
| 45 | - } | ||
| 46 | - } | ||
| 47 | - | ||
| 48 | - protected function start_collect() | ||
| 49 | - { | ||
| 50 | - $task_id = $this->get_task(); | ||
| 51 | - if ($task_id === false) { | ||
| 52 | - //所有项目采集完成 | ||
| 53 | - sleep(60); | ||
| 54 | - return true; | ||
| 55 | - } elseif ($task_id === 0) { | ||
| 56 | - //当前项目采集完成 | ||
| 57 | - sleep(2); | ||
| 58 | - return true; | ||
| 59 | - } | ||
| 60 | - | ||
| 61 | - $task_arr = explode('_', $task_id); | ||
| 62 | - $project_id = $task_arr[0]; | ||
| 63 | - $collect_id = $task_arr[1]; | ||
| 64 | - | ||
| 65 | - //设置数据库 | ||
| 66 | - $project = ProjectServer::useProject($project_id); | ||
| 67 | - if ($project) { | ||
| 68 | - $collect_info = CollectTask::select(['id', 'domain', 'route'])->where('id', $collect_id)->where('status', CollectTask::STATUS_UN)->where('language', '')->first(); | ||
| 69 | - | ||
| 70 | - if (!$collect_info) { | ||
| 71 | - sleep(2); | ||
| 72 | - return true; | ||
| 73 | - } | ||
| 74 | - | ||
| 75 | - echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', collect start' . PHP_EOL; | ||
| 76 | - | ||
| 77 | - $collect_info->status = CollectTask::STATUS_ING; | ||
| 78 | - $collect_info->save(); | ||
| 79 | - | ||
| 80 | - //获取站点原始域名信息 | ||
| 81 | - $old_info = UpdateOldInfo::getOldDomain($project_id, $collect_info->domain); | ||
| 82 | - | ||
| 83 | - //采集html页面,下载资源到本地并替换 | ||
| 84 | - try { | ||
| 85 | - $html = curl_c('https://' . $collect_info->domain . $collect_info->route, false); | ||
| 86 | - if ($html == '0') { | ||
| 87 | - $collect_info->status = CollectTask::STATUS_FAIL; | ||
| 88 | - $collect_info->save(); | ||
| 89 | - echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: no html' . PHP_EOL; | ||
| 90 | - sleep(2); | ||
| 91 | - return true; | ||
| 92 | - } | ||
| 93 | - | ||
| 94 | - //如果有base64图片,先替换掉,再进行资源匹配 | ||
| 95 | - $new_html = $html; | ||
| 96 | - preg_match_all("/data:([^;]*);base64,(.*)?\"/", $new_html, $result_img); | ||
| 97 | - $img_base64 = $result_img[2] ?? []; | ||
| 98 | - foreach ($img_base64 as $v64) { | ||
| 99 | - $new_html = str_replace($v64, '', $new_html); | ||
| 100 | - } | ||
| 101 | - | ||
| 102 | - $source_list = $this->html_preg($new_html, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); | ||
| 103 | - | ||
| 104 | - if ($source_list) { | ||
| 105 | - $html = $this->upload_source($html, $source_list, $project_id, $collect_info->domain, $old_info['web_url_domain'], $old_info['home_url']); | ||
| 106 | - } | ||
| 107 | - } catch (\Exception $e) { | ||
| 108 | - $collect_info->status = CollectTask::STATUS_FAIL; | ||
| 109 | - $collect_info->save(); | ||
| 110 | - | ||
| 111 | - echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', error: ' . $e->getMessage() . PHP_EOL; | ||
| 112 | - sleep(2); | ||
| 113 | - return true; | ||
| 114 | - } | ||
| 115 | - | ||
| 116 | - $collect_info->html = $html; | ||
| 117 | - $collect_info->status = CollectTask::STATUS_COM; | ||
| 118 | - $collect_info->save(); | ||
| 119 | - | ||
| 120 | - echo 'date:' . date('Y-m-d H:i:s') . ', project_id: ' . $project_id . ', collect_id: ' . $collect_id . ', collect end' . PHP_EOL; | ||
| 121 | - } | ||
| 122 | - //关闭数据库 | ||
| 123 | - DB::disconnect('custom_mysql'); | ||
| 124 | - | ||
| 125 | - sleep(2); | ||
| 126 | - return true; | ||
| 127 | - } | ||
| 128 | - | ||
| 129 | - //获取任务 | ||
| 130 | - protected function get_task() | ||
| 131 | - { | ||
| 132 | - $key = 'console_html_collect_new_task'; | ||
| 133 | - $task_id = Redis::rpop($key); | ||
| 134 | - if ($task_id) { | ||
| 135 | - return $task_id; | ||
| 136 | - } | ||
| 137 | - | ||
| 138 | - | ||
| 139 | - $update_log = UpdateLog::where('project_id', '>=', 799)->where('status', UpdateLog::STATUS_COM)->where('collect_status', UpdateLog::COLLECT_STATUS_UN)->orderBy('project_id', 'asc')->first(); | ||
| 140 | - if (!$update_log) { | ||
| 141 | - return false; | ||
| 142 | - } | ||
| 143 | - | ||
| 144 | - switch ($update_log->api_type) { | ||
| 145 | - case 'page': | ||
| 146 | - $source = RouteMap::SOURCE_PAGE; | ||
| 147 | - break; | ||
| 148 | - case 'news': | ||
| 149 | - $source = RouteMap::SOURCE_NEWS; | ||
| 150 | - break; | ||
| 151 | - case 'blog': | ||
| 152 | - $source = RouteMap::SOURCE_BLOG; | ||
| 153 | - break; | ||
| 154 | - default: | ||
| 155 | - $source = RouteMap::SOURCE_PRODUCT; | ||
| 156 | - break; | ||
| 157 | - } | ||
| 158 | - | ||
| 159 | - $complete = false; | ||
| 160 | - //设置数据库 | ||
| 161 | - $project = ProjectServer::useProject($update_log->project_id); | ||
| 162 | - if ($project) { | ||
| 163 | - $collect_list = CollectTask::select(['id', 'project_id'])->where('project_id', $update_log['project_id'])->where('source', $source)->where('language', '')->where('status', CollectTask::STATUS_UN)->orderBy('id', 'asc')->limit(50)->get(); | ||
| 164 | - | ||
| 165 | - if ($collect_list->count() == 0) { | ||
| 166 | - $complete = true; | ||
| 167 | - } else { | ||
| 168 | - foreach ($collect_list as $collect) { | ||
| 169 | - Redis::lpush($key, $collect['project_id'] . '_' . $collect['id']); | ||
| 170 | - } | ||
| 171 | - } | ||
| 172 | - } | ||
| 173 | - //关闭数据库 | ||
| 174 | - DB::disconnect('custom_mysql'); | ||
| 175 | - | ||
| 176 | - if ($complete) { | ||
| 177 | - $update_log->collect_status = UpdateLog::COLLECT_STATUS_MAIN; | ||
| 178 | - $update_log->save(); | ||
| 179 | - return 0; | ||
| 180 | - } | ||
| 181 | - | ||
| 182 | - $task_id = Redis::rpop($key); | ||
| 183 | - return $task_id; | ||
| 184 | - } | ||
| 185 | - | ||
| 186 | - //正则匹配html资源 | ||
| 187 | - protected function html_preg($html, $project_id, $domain, $web_url_domain, $home_url) | ||
| 188 | - { | ||
| 189 | - $source = []; | ||
| 190 | - | ||
| 191 | - if (!$html) { | ||
| 192 | - return $source; | ||
| 193 | - } | ||
| 194 | - | ||
| 195 | - //image | ||
| 196 | - preg_match_all('/<img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_img); | ||
| 197 | - $img = $result_img[2] ?? []; | ||
| 198 | - foreach ($img as $vi) { | ||
| 199 | - $check_vi = $this->url_check($vi, $project_id, $domain, $web_url_domain, $home_url); | ||
| 200 | - $check_vi && $source[] = $check_vi; | ||
| 201 | - } | ||
| 202 | - | ||
| 203 | - //js | ||
| 204 | - preg_match_all('/<script\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_js); | ||
| 205 | - $js = $result_js[2] ?? []; | ||
| 206 | - foreach ($js as $vj) { | ||
| 207 | - $check_vj = $this->url_check($vj, $project_id, $domain, $web_url_domain, $home_url); | ||
| 208 | - $check_vj && $source[] = $check_vj; | ||
| 209 | - } | ||
| 210 | - | ||
| 211 | - //video | ||
| 212 | - preg_match_all('/<source\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_video); | ||
| 213 | - $video = $result_video[2] ?? []; | ||
| 214 | - foreach ($video as $vv) { | ||
| 215 | - $check_vv = $this->url_check($vv, $project_id, $domain, $web_url_domain, $home_url); | ||
| 216 | - $check_vv && $source[] = $check_vv; | ||
| 217 | - } | ||
| 218 | - | ||
| 219 | - //css | ||
| 220 | - preg_match_all('/<link\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_css); | ||
| 221 | - $css = $result_css[2] ?? []; | ||
| 222 | - foreach ($css as $vc) { | ||
| 223 | - $check_vc = $this->url_check($vc, $project_id, $domain, $web_url_domain, $home_url); | ||
| 224 | - $check_vc && $source[] = $check_vc; | ||
| 225 | - } | ||
| 226 | - | ||
| 227 | - //css background | ||
| 228 | - preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $html, $result_css_b); | ||
| 229 | - $css_b = $result_css_b[1] ?? []; | ||
| 230 | - foreach ($css_b as $vc_b) { | ||
| 231 | - $check_vc_b = $this->url_check($vc_b, $project_id, $domain, $web_url_domain, $home_url); | ||
| 232 | - $check_vc_b && $source[] = $check_vc_b; | ||
| 233 | - } | ||
| 234 | - | ||
| 235 | - //a标签下载资源 | ||
| 236 | - preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a); | ||
| 237 | - $down = $result_a[2] ?? []; | ||
| 238 | - foreach ($down as $vd) { | ||
| 239 | - $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url); | ||
| 240 | - $check_vd && $source[] = $check_vd; | ||
| 241 | - } | ||
| 242 | - | ||
| 243 | - return $source; | ||
| 244 | - } | ||
| 245 | - | ||
| 246 | - //判断资源是否需要下载 | ||
| 247 | - protected function url_check($url, $project_id, $domain, $web_url_domain, $home_url) | ||
| 248 | - { | ||
| 249 | - if ($url) { | ||
| 250 | - $url = str_replace('"', '', $url); | ||
| 251 | - $arr = parse_url($url); | ||
| 252 | - $scheme = $arr['scheme'] ?? ''; | ||
| 253 | - $host = $arr['host'] ?? ''; | ||
| 254 | - $path = $arr['path'] ?? ''; | ||
| 255 | - $query = $arr['query'] ?? ''; | ||
| 256 | - | ||
| 257 | - $path_arr = explode('.', $path); | ||
| 258 | - $path_end = end($path_arr); | ||
| 259 | - if ( | ||
| 260 | - (empty($scheme) || $scheme == 'https' || $scheme == 'http') | ||
| 261 | - && (empty($host) || (strpos($web_url_domain, $host) !== false) || (strpos($home_url, $host) !== false)) | ||
| 262 | - && $path | ||
| 263 | - && (substr($path, 0, 1) == '/') | ||
| 264 | - && (strpos($path, '.') !== false) | ||
| 265 | - && (strpos($path_end, 'html') === false) | ||
| 266 | - && (strpos($path_end, 'php') === false) | ||
| 267 | - && (strpos($path_end, 'com') === false) | ||
| 268 | - && (strpos($path_end, 'xml') === false) | ||
| 269 | - ) { | ||
| 270 | - $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); | ||
| 271 | - if (!$source) { | ||
| 272 | - return [ | ||
| 273 | - 'download' => true, | ||
| 274 | - 'url' => $url, | ||
| 275 | - 'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path . ($query ? '?' . $query : '') | ||
| 276 | - ]; | ||
| 277 | - } else { | ||
| 278 | - return [ | ||
| 279 | - 'download' => false, | ||
| 280 | - 'url' => $url, | ||
| 281 | - 'url_complete' => $source['target'] | ||
| 282 | - ]; | ||
| 283 | - } | ||
| 284 | - } else { | ||
| 285 | - return false; | ||
| 286 | - } | ||
| 287 | - } else { | ||
| 288 | - return false; | ||
| 289 | - } | ||
| 290 | - } | ||
| 291 | - | ||
| 292 | - //下载并替换资源 | ||
| 293 | - protected function upload_source($html, $source, $project_id, $domain, $web_url_domain, $home_url) | ||
| 294 | - { | ||
| 295 | - foreach ($source as $vs) { | ||
| 296 | - | ||
| 297 | - if ($vs['download']) { | ||
| 298 | - $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']); | ||
| 299 | - if ($new_source) { | ||
| 300 | - CollectSource::insert([ | ||
| 301 | - 'project_id' => $project_id, | ||
| 302 | - 'origin' => $vs['url'], | ||
| 303 | - 'target' => $new_source, | ||
| 304 | - 'created_at' => date('Y-m-d H:i:s'), | ||
| 305 | - 'updated_at' => date('Y-m-d H:i:s'), | ||
| 306 | - ]); | ||
| 307 | - $html = str_replace($vs['url'], getImageUrl($new_source), $html); | ||
| 308 | - | ||
| 309 | - if (substr($new_source, -3, 3) == 'css' || substr($new_source, -2, 2) == 'js') { | ||
| 310 | - | ||
| 311 | - $source_html = curl_c(getImageUrl($new_source), false); | ||
| 312 | - | ||
| 313 | - if (substr($new_source, -3, 3) == 'css') { | ||
| 314 | - preg_match_all("/url\(['\"]?(\s*[^>]+?)['\"]?\)/i", $source_html, $result_source); | ||
| 315 | - } else { | ||
| 316 | - preg_match_all("/[large|thumb]+URL:['\"]+(\s*[^>]+?)['\"]+,/i", $source_html, $result_source); | ||
| 317 | - } | ||
| 318 | - | ||
| 319 | - $js_css_source = $result_source[1] ?? []; | ||
| 320 | - if ($js_css_source) { | ||
| 321 | - foreach ($js_css_source as $vjs) { | ||
| 322 | - if (strpos($vjs, 'URL:"') !== false) { | ||
| 323 | - $vjs = substr($vjs, strpos($vjs, 'URL:"') + 5); | ||
| 324 | - } | ||
| 325 | - | ||
| 326 | - $vjs_down = str_replace('"', '', $vjs); | ||
| 327 | - if (strpos($vjs_down, 'data:') !== false) { | ||
| 328 | - //过滤二进制文件 | ||
| 329 | - continue; | ||
| 330 | - } | ||
| 331 | - if (strlen($vjs_down) > 255) { | ||
| 332 | - //过滤太长文件 | ||
| 333 | - continue; | ||
| 334 | - } | ||
| 335 | - | ||
| 336 | - $vjs_down_arr = parse_url($vjs_down); | ||
| 337 | - $vjs_down_host = $vjs_down_arr['host'] ?? ''; | ||
| 338 | - | ||
| 339 | - $cos = config('filesystems.disks.cos'); | ||
| 340 | - $cosCdn = $cos['cdn']; | ||
| 341 | - | ||
| 342 | - if ($vjs_down_host && $vjs_down_host == $cosCdn) { | ||
| 343 | - //过滤已经下载的 | ||
| 344 | - continue; | ||
| 345 | - } | ||
| 346 | - | ||
| 347 | - if (empty($vjs_down_host) && substr($vjs_down, 0, 1) != '/') { | ||
| 348 | - //相对路径 | ||
| 349 | - $url_arr = explode('/', $vs['url']); | ||
| 350 | - $url_arr[count($url_arr) - 1] = $vjs_down; | ||
| 351 | - $vjs_down = implode('/', $url_arr); | ||
| 352 | - } | ||
| 353 | - | ||
| 354 | - $vjs_result = $this->url_check($vjs_down, $project_id, $domain, $web_url_domain, $home_url); | ||
| 355 | - if (!$vjs_result) { | ||
| 356 | - continue; | ||
| 357 | - } | ||
| 358 | - | ||
| 359 | - if ($vjs_result['download']) { | ||
| 360 | - $new_vjs = CosService::uploadRemote($project_id, 'source', $vjs_result['url_complete']); | ||
| 361 | - if ($new_vjs) { | ||
| 362 | - CollectSource::insert([ | ||
| 363 | - 'project_id' => $project_id, | ||
| 364 | - 'origin' => $vjs_result['url'], | ||
| 365 | - 'target' => $new_vjs, | ||
| 366 | - 'created_at' => date('Y-m-d H:i:s'), | ||
| 367 | - 'updated_at' => date('Y-m-d H:i:s'), | ||
| 368 | - ]); | ||
| 369 | - $source_html = str_replace($vjs, getImageUrl($new_vjs), $source_html); | ||
| 370 | - } | ||
| 371 | - } else { | ||
| 372 | - $source_html = str_replace($vjs, getImageUrl($vjs_result['url_complete']), $source_html); | ||
| 373 | - } | ||
| 374 | - } | ||
| 375 | - | ||
| 376 | - CosService::uploadRemote($project_id, 'source', $new_source, $new_source, $source_html); | ||
| 377 | - } | ||
| 378 | - } | ||
| 379 | - } | ||
| 380 | - } else { | ||
| 381 | - $html = str_replace($vs['url'], getImageUrl($vs['url_complete']), $html); | ||
| 382 | - } | ||
| 383 | - } | ||
| 384 | - | ||
| 385 | - return $html; | ||
| 386 | - } | ||
| 387 | -} |
app/Console/Commands/Update/ProjectVisit.php
0 → 100644
| 1 | +<?php | ||
| 2 | + | ||
| 3 | +namespace App\Console\Commands\Update; | ||
| 4 | + | ||
| 5 | +use App\Models\Com\UpdateVisit; | ||
| 6 | +use App\Models\Visit\Visit; | ||
| 7 | +use App\Models\Visit\VisitItem; | ||
| 8 | +use App\Services\ProjectServer; | ||
| 9 | +use Illuminate\Console\Command; | ||
| 10 | +use Illuminate\Support\Facades\DB; | ||
| 11 | +use Illuminate\Support\Facades\Redis; | ||
| 12 | + | ||
| 13 | +/** | ||
| 14 | + * 4.0,5.0升级到6.0,访问同步 | ||
| 15 | + * Class ProjectImport | ||
| 16 | + * @package App\Console\Commands | ||
| 17 | + * @author Akun | ||
| 18 | + * @date 2023/12/18 15:52 | ||
| 19 | + */ | ||
| 20 | +class ProjectVisit extends Command | ||
| 21 | +{ | ||
| 22 | + /** | ||
| 23 | + * The name and signature of the console command. | ||
| 24 | + * | ||
| 25 | + * @var string | ||
| 26 | + */ | ||
| 27 | + protected $signature = 'project_visit'; | ||
| 28 | + | ||
| 29 | + /** | ||
| 30 | + * The console command description. | ||
| 31 | + * | ||
| 32 | + * @var string | ||
| 33 | + */ | ||
| 34 | + protected $description = '执行项目升级访问任务'; | ||
| 35 | + | ||
| 36 | + | ||
| 37 | + public function handle() | ||
| 38 | + { | ||
| 39 | + ini_set('memory_limit', '512M'); | ||
| 40 | + while (true) { | ||
| 41 | + $this->start_visit(); | ||
| 42 | + } | ||
| 43 | + } | ||
| 44 | + | ||
| 45 | + protected function start_visit() | ||
| 46 | + { | ||
| 47 | + $task_id = $this->get_task(); | ||
| 48 | + if (!$task_id) { | ||
| 49 | + sleep(60); | ||
| 50 | + return true; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + $task = UpdateVisit::where('id', $task_id)->where('status', UpdateVisit::STATUS_UN)->first(); | ||
| 54 | + if (!$task) { | ||
| 55 | + sleep(2); | ||
| 56 | + return true; | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + $project_id = $task->project_id; | ||
| 60 | + $api_type = $task->api_type; | ||
| 61 | + $api_url = $task->api_url; | ||
| 62 | + | ||
| 63 | + $page_size = 200; | ||
| 64 | + | ||
| 65 | + echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update start' . PHP_EOL; | ||
| 66 | + | ||
| 67 | + $task->status = UpdateVisit::STATUS_ING;//同步中 | ||
| 68 | + $task->save(); | ||
| 69 | + | ||
| 70 | + //设置数据库 | ||
| 71 | + $project = ProjectServer::useProject($project_id); | ||
| 72 | + if ($project) { | ||
| 73 | + if ($api_type == 'visit_list') { | ||
| 74 | + //访问列表 | ||
| 75 | + $url = $api_url . '?' . http_build_query(['w' => 'visit_list', 'page' => 1, 'pagesize' => 1]); | ||
| 76 | + $data = curl_c($url); | ||
| 77 | + if (isset($data['count']) && $data['count'] > 0) { | ||
| 78 | + $count = $data['count']; | ||
| 79 | + | ||
| 80 | + $total_page = ceil($count / $page_size); | ||
| 81 | + for ($page = 1; $page <= $total_page; $page++) { | ||
| 82 | + $url_page = $api_url . '?' . http_build_query(['w' => 'visit_list', 'page' => $page, 'pagesize' => $page_size]); | ||
| 83 | + $data_page = curl_c($url_page); | ||
| 84 | + if (isset($data_page['data']) && $data_page['data']) { | ||
| 85 | + $items = $data_page['data']; | ||
| 86 | + $model = new Visit(); | ||
| 87 | + foreach ($items as $item) { | ||
| 88 | + if (isset($item['id']) && $item['id']) { | ||
| 89 | + $visit = $model->read(['original_id' => $item['id']], 'id'); | ||
| 90 | + if (!$visit) { | ||
| 91 | + try { | ||
| 92 | + $url_arr = parse_url($item['request'] ?? ''); | ||
| 93 | + $model->insert([ | ||
| 94 | + 'url' => $item['request'] ?? '', | ||
| 95 | + 'referrer_url' => $item['referrer'] ?? '', | ||
| 96 | + 'device_port' => isset($item['is_moblie']) && $item['is_moblie'] == 1 ? 2 : 1, | ||
| 97 | + 'country' => $item['ip_area'] ?? '', | ||
| 98 | + 'ip' => $item['ip'] ?? '', | ||
| 99 | + 'depth' => $item['pv'], | ||
| 100 | + 'domain' => $url_arr['host'] ?? '', | ||
| 101 | + 'is_inquiry' => $item['is_cf'] ?? 0, | ||
| 102 | + 'created_at' => date('Y-m-d H:i:s', isset($item['update']) && $item['update'] ? $item['update'] : time()), | ||
| 103 | + 'updated_at' => date('Y-m-d H:i:s', isset($item['update']) && $item['update'] ? $item['update'] : time()), | ||
| 104 | + 'updated_date' => date('Y-m-d', isset($item['c_time']) && $item['c_time'] ? strtotime($item['c_time']) : time()), | ||
| 105 | + 'original_id' => $item['id'], | ||
| 106 | + ]); | ||
| 107 | + } catch (\Exception $e) { | ||
| 108 | + echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL; | ||
| 109 | + continue; | ||
| 110 | + } | ||
| 111 | + } | ||
| 112 | + } | ||
| 113 | + } | ||
| 114 | + } | ||
| 115 | + } | ||
| 116 | + } else { | ||
| 117 | + return true; | ||
| 118 | + } | ||
| 119 | + } else { | ||
| 120 | + //访问明细 | ||
| 121 | + $url = $api_url . '?' . http_build_query(['w' => 'visit_detail_list', 'page' => 1, 'pagesize' => 1]); | ||
| 122 | + $data = curl_c($url); | ||
| 123 | + if (isset($data['count']) && $data['count'] > 0) { | ||
| 124 | + $count = $data['count']; | ||
| 125 | + | ||
| 126 | + $total_page = ceil($count / $page_size); | ||
| 127 | + for ($page = 1; $page <= $total_page; $page++) { | ||
| 128 | + $url_page = $api_url . '?' . http_build_query(['w' => 'visit_detail_list', 'page' => $page, 'pagesize' => $page_size]); | ||
| 129 | + $data_page = curl_c($url_page); | ||
| 130 | + if (isset($data_page['data']) && $data_page['data']) { | ||
| 131 | + $items = $data_page['data']; | ||
| 132 | + $model = new VisitItem(); | ||
| 133 | + $p_model = new Visit(); | ||
| 134 | + foreach ($items as $item) { | ||
| 135 | + if (isset($item['id']) && $item['id']) { | ||
| 136 | + $visit = $model->read(['original_id' => $item['id']], 'id'); | ||
| 137 | + if (!$visit) { | ||
| 138 | + try { | ||
| 139 | + $p_info = $p_model->read(['ip' => $item['ip'] ?? '', 'updated_date' => $item['day_at'] ?? '']); | ||
| 140 | + if ($p_info) { | ||
| 141 | + $model->insert([ | ||
| 142 | + 'customer_visit_id' => $p_info['id'], | ||
| 143 | + 'url' => $p_info['url'], | ||
| 144 | + 'referrer_url' => $p_info['referrer_url'], | ||
| 145 | + 'device_port' => $p_info['device_port'], | ||
| 146 | + 'country' => $p_info['country'], | ||
| 147 | + 'ip' => $p_info['ip'], | ||
| 148 | + 'domain' => $p_info['domain'], | ||
| 149 | + 'created_at' => $item['time_str'] ?? $p_info['created_at'], | ||
| 150 | + 'updated_at' => $item['time_str'] ?? $p_info['updated_at'], | ||
| 151 | + 'updated_date' => $item['day_at'] ?? $p_info['updated_date'], | ||
| 152 | + 'original_id' => $item['id'], | ||
| 153 | + ]); | ||
| 154 | + } | ||
| 155 | + } catch (\Exception $e) { | ||
| 156 | + echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', error: ' . $e->getMessage() . PHP_EOL; | ||
| 157 | + continue; | ||
| 158 | + } | ||
| 159 | + } | ||
| 160 | + } | ||
| 161 | + } | ||
| 162 | + } | ||
| 163 | + } | ||
| 164 | + } else { | ||
| 165 | + return true; | ||
| 166 | + } | ||
| 167 | + } | ||
| 168 | + } | ||
| 169 | + //关闭数据库 | ||
| 170 | + DB::disconnect('custom_mysql'); | ||
| 171 | + | ||
| 172 | + $task->status = UpdateVisit::STATUS_COM;//同步完成 | ||
| 173 | + $task->save(); | ||
| 174 | + | ||
| 175 | + echo 'date:' . date('Y-m-d H:i:s') . ', task_id: ' . $task->id . ', task_type: ' . $api_type . ', update end ' . PHP_EOL; | ||
| 176 | + | ||
| 177 | + sleep(2); | ||
| 178 | + } | ||
| 179 | + | ||
| 180 | + //获取任务 | ||
| 181 | + protected function get_task() | ||
| 182 | + { | ||
| 183 | + $key = 'console_visit_task'; | ||
| 184 | + $task_id = Redis::rpop($key); | ||
| 185 | + if ($task_id) { | ||
| 186 | + return $task_id; | ||
| 187 | + } | ||
| 188 | + | ||
| 189 | + $task_list = UpdateVisit::where('status', UpdateVisit::STATUS_UN)->orderBy('sort', 'asc')->orderBy('project_id', 'asc')->limit(20)->get(); | ||
| 190 | + if ($task_list->count() == 0) { | ||
| 191 | + return false; | ||
| 192 | + } | ||
| 193 | + | ||
| 194 | + foreach ($task_list as $value) { | ||
| 195 | + Redis::lpush($key, $value->id); | ||
| 196 | + } | ||
| 197 | + | ||
| 198 | + $task_id = Redis::rpop($key); | ||
| 199 | + return $task_id; | ||
| 200 | + } | ||
| 201 | +} |
| @@ -53,9 +53,19 @@ class UpdateRoute extends Command | @@ -53,9 +53,19 @@ class UpdateRoute extends Command | ||
| 53 | * @time :2023/11/20 15:13 | 53 | * @time :2023/11/20 15:13 |
| 54 | */ | 54 | */ |
| 55 | public function handle(){ | 55 | public function handle(){ |
| 56 | +<<<<<<< HEAD | ||
| 56 | ProjectServer::useProject(91); | 57 | ProjectServer::useProject(91); |
| 57 | $this->getProductKeyword(); | 58 | $this->getProductKeyword(); |
| 58 | DB::disconnect('custom_mysql'); | 59 | DB::disconnect('custom_mysql'); |
| 60 | +======= | ||
| 61 | + $projectModel = new Project(); | ||
| 62 | + $list = $projectModel->list(['type'=>['in',[1,2,3,4]]]); | ||
| 63 | + foreach ($list as $v){ | ||
| 64 | + ProjectServer::useProject($v['id']); | ||
| 65 | + $this->getProductKeyword($v['id']); | ||
| 66 | + DB::disconnect('custom_mysql'); | ||
| 67 | + } | ||
| 68 | +>>>>>>> 66efe5cd4b835d715f00e164b101185541dd9f83 | ||
| 59 | echo date('Y-m-d H:i:s') . 'end' . PHP_EOL; | 69 | echo date('Y-m-d H:i:s') . 'end' . PHP_EOL; |
| 60 | } | 70 | } |
| 61 | 71 | ||
| @@ -66,11 +76,12 @@ class UpdateRoute extends Command | @@ -66,11 +76,12 @@ class UpdateRoute extends Command | ||
| 66 | * @method :post | 76 | * @method :post |
| 67 | * @time :2023/12/8 11:13 | 77 | * @time :2023/12/8 11:13 |
| 68 | */ | 78 | */ |
| 69 | - public function getProductKeyword(){ | 79 | + public function getProductKeyword($project_id){ |
| 70 | $keywordModel = new Keyword(); | 80 | $keywordModel = new Keyword(); |
| 71 | $lists = $keywordModel->list(['status'=>1]); | 81 | $lists = $keywordModel->list(['status'=>1]); |
| 72 | if(!empty($lists)){ | 82 | if(!empty($lists)){ |
| 73 | foreach ($lists as $v){ | 83 | foreach ($lists as $v){ |
| 84 | +<<<<<<< HEAD | ||
| 74 | $tag = "-tag"; | 85 | $tag = "-tag"; |
| 75 | if (!substr($v['route'], -strlen($tag)) === $tag) { | 86 | if (!substr($v['route'], -strlen($tag)) === $tag) { |
| 76 | $route = $v['route'].$tag; | 87 | $route = $v['route'].$tag; |
| @@ -79,6 +90,18 @@ class UpdateRoute extends Command | @@ -79,6 +90,18 @@ class UpdateRoute extends Command | ||
| 79 | $keywordModel->edit(['route'=>$route],['id'=>$v['id']]); | 90 | $keywordModel->edit(['route'=>$route],['id'=>$v['id']]); |
| 80 | } | 91 | } |
| 81 | echo date('Y-m-d H:i:s') . 'end' . PHP_EOL; | 92 | echo date('Y-m-d H:i:s') . 'end' . PHP_EOL; |
| 93 | +======= | ||
| 94 | + echo date('Y-m-d H:i:s') . '关键字id:'.$v['id'] . PHP_EOL; | ||
| 95 | + $tag = "-tag"; | ||
| 96 | + if (!(substr($v['route'], -strlen($tag)) === $tag)) { | ||
| 97 | + echo date('Y-m-d H:i:s') . '拼接'.$tag . PHP_EOL; | ||
| 98 | + $route = $v['route'].$tag; | ||
| 99 | + // 如果不是以 '-tag' 结尾,则拼接上 '-tag' | ||
| 100 | + $routeModel = new RouteMap(); | ||
| 101 | + $routeModel->edit(['route'=>$route],['source'=>RouteMap::SOURCE_PRODUCT_KEYWORD,'source_id'=>$v['id']]); | ||
| 102 | + $keywordModel->edit(['route'=>$route],['id'=>$v['id']]); | ||
| 103 | + } | ||
| 104 | +>>>>>>> 66efe5cd4b835d715f00e164b101185541dd9f83 | ||
| 82 | } | 105 | } |
| 83 | } | 106 | } |
| 84 | } | 107 | } |
| @@ -29,12 +29,21 @@ class ImportLogic extends BaseLogic | @@ -29,12 +29,21 @@ class ImportLogic extends BaseLogic | ||
| 29 | if (end($ext) != 'csv') { | 29 | if (end($ext) != 'csv') { |
| 30 | $this->fail('导入文件格式必须为csv'); | 30 | $this->fail('导入文件格式必须为csv'); |
| 31 | } | 31 | } |
| 32 | + $domain = $this->param['domain']; | ||
| 33 | + if (strpos($domain, 'https') === false || strpos($domain, 'http') == false) { | ||
| 34 | + $this->fail('请输入完整的采集页面地址'); | ||
| 35 | + } | ||
| 36 | + $domain_arr = parse_url($domain); | ||
| 37 | + if (!isset($domain_arr['host'])) { | ||
| 38 | + $this->fail('采集页面地址输入有误'); | ||
| 39 | + } | ||
| 32 | 40 | ||
| 41 | + $this->param['domain'] = $domain_arr['host']; | ||
| 33 | $this->param['project_id'] = $this->user['project_id']; | 42 | $this->param['project_id'] = $this->user['project_id']; |
| 34 | $this->param['user_id'] = $this->user['id']; | 43 | $this->param['user_id'] = $this->user['id']; |
| 35 | $this->param['status'] = 9; | 44 | $this->param['status'] = 9; |
| 36 | $rs = $this->model->add($this->param); | 45 | $rs = $this->model->add($this->param); |
| 37 | - if($rs === false){ | 46 | + if ($rs === false) { |
| 38 | $this->fail('error'); | 47 | $this->fail('error'); |
| 39 | } | 48 | } |
| 40 | return $this->success(); | 49 | return $this->success(); |
| @@ -35,6 +35,7 @@ class ImportTaskRequest extends FormRequest | @@ -35,6 +35,7 @@ class ImportTaskRequest extends FormRequest | ||
| 35 | return [ | 35 | return [ |
| 36 | 'type' => ['required', Rule::in([ImportTask::TYPE_PROJECT, ImportTask::TYPE_NEWS, ImportTask::TYPE_BLOG])], | 36 | 'type' => ['required', Rule::in([ImportTask::TYPE_PROJECT, ImportTask::TYPE_NEWS, ImportTask::TYPE_BLOG])], |
| 37 | 'file_url' => ['required'], | 37 | 'file_url' => ['required'], |
| 38 | + 'domain' => ['required'], | ||
| 38 | ]; | 39 | ]; |
| 39 | } | 40 | } |
| 40 | 41 | ||
| @@ -44,6 +45,7 @@ class ImportTaskRequest extends FormRequest | @@ -44,6 +45,7 @@ class ImportTaskRequest extends FormRequest | ||
| 44 | 'type.required' => '导入类型必须', | 45 | 'type.required' => '导入类型必须', |
| 45 | 'type.in' => '导入类型错误', | 46 | 'type.in' => '导入类型错误', |
| 46 | 'file_url.required' => '文件地址必须', | 47 | 'file_url.required' => '文件地址必须', |
| 48 | + 'domain.required' => '采集页面地址必须填写', | ||
| 47 | ]; | 49 | ]; |
| 48 | } | 50 | } |
| 49 | } | 51 | } |
app/Models/Com/UpdateVisit.php
0 → 100644
| @@ -58,6 +58,7 @@ class RouteMap extends Base | @@ -58,6 +58,7 @@ class RouteMap extends Base | ||
| 58 | $i=1; | 58 | $i=1; |
| 59 | $sign = generateRoute($title); | 59 | $sign = generateRoute($title); |
| 60 | $info = self::where(['project_id' => $project_id, 'source' => $source, 'source_id'=>$source_id])->first(); | 60 | $info = self::where(['project_id' => $project_id, 'source' => $source, 'source_id'=>$source_id])->first(); |
| 61 | + $suffix = ''; | ||
| 61 | if(empty($info)){ | 62 | if(empty($info)){ |
| 62 | if($source == self::SOURCE_PRODUCT_KEYWORD){ | 63 | if($source == self::SOURCE_PRODUCT_KEYWORD){ |
| 63 | $suffix = '-tag'; | 64 | $suffix = '-tag'; |
| @@ -65,8 +66,6 @@ class RouteMap extends Base | @@ -65,8 +66,6 @@ class RouteMap extends Base | ||
| 65 | if($source == self::SOURCE_PRODUCT){ | 66 | if($source == self::SOURCE_PRODUCT){ |
| 66 | $suffix = '-product'; | 67 | $suffix = '-product'; |
| 67 | } | 68 | } |
| 68 | - }else{ | ||
| 69 | - $suffix = ''; | ||
| 70 | } | 69 | } |
| 71 | $route = $sign.$suffix; | 70 | $route = $sign.$suffix; |
| 72 | while(self::isExist($route, $source_id, $project_id)){ | 71 | while(self::isExist($route, $source_id, $project_id)){ |
-
请 注册 或 登录 后发表评论