作者 lyh
@@ -79,7 +79,13 @@ class ProjectImport extends Command @@ -79,7 +79,13 @@ class ProjectImport extends Command
79 //读取csv文件 79 //读取csv文件
80 $line_of_text = []; 80 $line_of_text = [];
81 try { 81 try {
82 - $file_handle = fopen($task->file_url, 'r'); 82 + $opts = [
  83 + 'http' => [
  84 + 'method' => 'GET',
  85 + 'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'
  86 + ]
  87 + ];
  88 + $file_handle = fopen($task->file_url, 'r', null, stream_context_create($opts));
83 while (!feof($file_handle)) { 89 while (!feof($file_handle)) {
84 $line_of_text[] = fgetcsv($file_handle, 0, ','); 90 $line_of_text[] = fgetcsv($file_handle, 0, ',');
85 } 91 }
@@ -183,7 +189,7 @@ class ProjectImport extends Command @@ -183,7 +189,7 @@ class ProjectImport extends Command
183 protected function get_code_type($file) 189 protected function get_code_type($file)
184 { 190 {
185 $list = array('GBK', 'UTF-8'); 191 $list = array('GBK', 'UTF-8');
186 - $str = curl_c($file,false); 192 + $str = curl_c($file, false);
187 foreach ($list as $item) { 193 foreach ($list as $item) {
188 $tmp = mb_convert_encoding($str, $item, $item); 194 $tmp = mb_convert_encoding($str, $item, $item);
189 if (md5($tmp) == md5($str)) { 195 if (md5($tmp) == md5($str)) {
@@ -268,6 +268,14 @@ class HtmlCollect extends Command @@ -268,6 +268,14 @@ class HtmlCollect extends Command
268 $check_vc_b && $source[] = $check_vc_b; 268 $check_vc_b && $source[] = $check_vc_b;
269 } 269 }
270 270
  271 + //a标签下载资源
  272 + preg_match_all('/<a\s+[^>]*?href\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $html, $result_a);
  273 + $down = $result_a[2] ?? [];
  274 + foreach ($down as $vd) {
  275 + $check_vd = $this->url_check($vd, $project_id, $domain, $web_url_domain, $home_url);
  276 + $check_vd && $source[] = $check_vd;
  277 + }
  278 +
271 return $source; 279 return $source;
272 } 280 }
273 281
@@ -286,7 +294,7 @@ class HtmlCollect extends Command @@ -286,7 +294,7 @@ class HtmlCollect extends Command
286 (empty($host) || $host == $web_url_domain || $host == $home_url) 294 (empty($host) || $host == $web_url_domain || $host == $home_url)
287 && $path 295 && $path
288 && (strpos($path, '.') !== false) 296 && (strpos($path, '.') !== false)
289 - && (end($path_arr) != 'html') 297 + && (!in_array(end($path_arr), ['html', 'com', 'xml']))
290 ) { 298 ) {
291 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); 299 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
292 if (!$source) { 300 if (!$source) {