作者 刘锟

html采集

@@ -213,11 +213,16 @@ class HtmlCollect extends Command @@ -213,11 +213,16 @@ class HtmlCollect extends Command
213 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first(); 213 $source = CollectSource::where('project_id', $project_id)->where('origin', $url)->first();
214 if (!$source) { 214 if (!$source) {
215 return [ 215 return [
  216 + 'download' => true,
216 'url' => $url, 217 'url' => $url,
217 'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path 218 'url_complete' => ($scheme ?: 'https') . '://' . ($host ?: $domain) . $path
218 ]; 219 ];
219 } else { 220 } else {
220 - return false; 221 + return [
  222 + 'download' => false,
  223 + 'url' => $url,
  224 + 'url_complete' => $source['target']
  225 + ];
221 } 226 }
222 } else { 227 } else {
223 return false; 228 return false;
@@ -232,16 +237,20 @@ class HtmlCollect extends Command @@ -232,16 +237,20 @@ class HtmlCollect extends Command
232 { 237 {
233 foreach ($source as $vs) { 238 foreach ($source as $vs) {
234 239
235 - $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);  
236 - if ($new_source) {  
237 - CollectSource::insert([  
238 - 'project_id' => $project_id,  
239 - 'origin' => $vs['url'],  
240 - 'target' => $new_source,  
241 - 'created_at' => date('Y-m-d H:i:s'),  
242 - 'updated_at' => date('Y-m-d H:i:s'),  
243 - ]);  
244 - $html = str_replace($vs['url'], $new_source, $html); 240 + if ($vs['download']) {
  241 + $new_source = CosService::uploadRemote($project_id, 'source', $vs['url_complete']);
  242 + if ($new_source) {
  243 + CollectSource::insert([
  244 + 'project_id' => $project_id,
  245 + 'origin' => $vs['url'],
  246 + 'target' => $new_source,
  247 + 'created_at' => date('Y-m-d H:i:s'),
  248 + 'updated_at' => date('Y-m-d H:i:s'),
  249 + ]);
  250 + $html = str_replace($vs['url'], $new_source, $html);
  251 + }
  252 + } else {
  253 + $html = str_replace($vs['url'], $vs['url_complete'], $html);
245 } 254 }
246 } 255 }
247 256