php使用curl代理实现抓取数据的方法
三月 22, 2019 | views
Comments 0
- <?php
- define ('IS_PROXY', true );//是否启用代理
- functionasync_get_url($url_array,$wait_usec= 0)
- {
- if(!is_array($url_array))
- returnfalse;
- $wait_usec=intval($wait_usec);
- $data =array();
- $handle=array();
- $running= 0;
- $mh= curl_multi_init();// 开启多线程
- $i= 0;
- foreach($url_arrayas$url) {
- $ch= curl_init();
- if(IS_PROXY) {
- //以下代码设置代理服务器
- //代理服务器地址http://www.cnproxy.com/proxy1.html !!Hong Kong, China的速度比较好
- curl_setopt ($ch, CURLOPT_PROXY,'110.4.12.170:80');
- }
- curl_setopt($ch, CURLOPT_URL,$url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);// return don't print
- curl_setopt($ch, CURLOPT_TIMEOUT, 30);//设置超时时间
- curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);// 302 redirect
- curl_setopt($ch, CURLOPT_MAXREDIRS, 7);//HTTp定向级别
- curl_multi_add_handle($mh,$ch);// 把 curl resource 放进 multi curl handler 里
- $handle[$i++] =$ch;
- }
- /* 执行 */
- do{
- $mrc= curl_multi_exec($mh,$running);
- if($wait_usec> 0)/* 每个 connect 要间隔多久 */
- usleep($wait_usec);// 250000 = 0.25 sec
- }while($mrc== CURLM_CALL_MULTI_PERFORM);
- while($running&&$mrc== CURLM_OK) {
- if(curl_multi_select($mh) != -1) {
- do{
- $mrc= curl_multi_exec($mh,$running);
- }while($mrc== CURLM_CALL_MULTI_PERFORM);
- }
- }
- /* 读取资料 */
- foreach($handleas$i=>$ch) {
- $content= curl_multi_getcontent($ch);
- $data[$i] = (curl_errno($ch) == 0) ?$content: false;
- }
- /* 移除 handle*/
- foreach($handleas$ch) {
- curl_multi_remove_handle($mh,$ch);
- }
- curl_multi_close($mh);
- return$data;
- }
- $urls=array('http://map.baidu.com');
- $re= async_get_url($urls);
- echo$re[0];
- ?>
zend