//多线程采集
function CurlMulti($urls){
//$urls = array(
// 'http://www.sina.com.cn/',
// 'http://www.sohu.com/',
// 'http://www.163.com/'
//);
//$save_to='ssss.txt'; //文件操作,暂时关掉
//$st = fopen($save_to,"a"); //文件操作,暂时关掉
$mh = curl_multi_init();
foreach ($urls as $i => $url) {
$conn[$i] = curl_init($url);
curl_setopt($conn[$i], CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)");
curl_setopt($conn[$i], CURLOPT_HEADER ,0);
curl_setopt($conn[$i], CURLOPT_CONNECTTIMEOUT,60);
curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,true); // 设置不将爬取代码写到浏览器,而是转化为字符串
curl_multi_add_handle ($mh,$conn[$i]);
}
do {
curl_multi_exec($mh,$active);
} while ($active);
$arr_data = array(); //初始化返回数组
foreach ($urls as $i => $url) {
$data = curl_multi_getcontent($conn[$i]); // 获得爬取的代码字符串
$arr_data[] = $data;
//fwrite($st,$data); //文件操作,暂时关掉
} // 获得数据变量,并写入文件
foreach ($urls as $i => $url) {
curl_multi_remove_handle($mh,$conn[$i]);
curl_close($conn[$i]);
}
curl_multi_close($mh);
//fclose($st); //文件操作,暂时关掉
//var_dump($arr_data);
return $arr_data;
} |