下文為各位介紹一個(gè)PHP定時(shí)任務(wù)通過(guò)CURL圖片的抓取例子,希望例子對(duì)大家?guī)椭?基本思路就是通過(guò)一個(gè)URL連接,將所有圖片的地址抓取下來(lái),然后循環(huán)打開(kāi)圖片,利用文件操作函數(shù)下載下來(lái),保存到本地,并且把圖片的alt屬性也抓取下來(lái),最后將數(shù)據(jù)保存到自己數(shù)據(jù)庫(kù).
廢話(huà)不多說(shuō),看程序就能明白了,其中,需要用到PHP定時(shí)任務(wù)和PHP的一個(gè)第三方插件simple_html_dom.php 的使用,參考simple_html_dom的下載和使用.
- <?php
- function getLink($url){
- <a href="/tags.php/include/" target="_blank">include</a>_once('simple_html_dom.php');
- $ch = curl_init();
- <a href="/tags.php/curl_setopt/" target="_blank">curl_setopt</a>($ch,CURLOPT_URL,$url);
- curl_setopt($ch,CURLOPT_HEADER,false);
- curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
- $output = curl_exec($ch);
- curl_close($ch);
- $html = new simple_html_dom();
- $html->load($output);
- $links = array();
- $arr = array();
- $title = array();
- <a href="/tags.php/foreach/" target="_blank">foreach</a>($html->find('a') as $element){
- if( <a href="/tags.php/preg_match/" target="_blank">preg_match</a>('#^//content_[0-9]+_1/.html$#i',$element->href)){
- array_push($links,'http://www.survivalescaperooms.com'.$element->href);
- array_push($title,$element->title);
- }
- }
- $links = array_values(array_unique($links));
- $title = array_values(array_unique($title));
- $arr['links'] = $links;
- $arr['title'] = $title;
- return $arr;
- }
- function loadimg($url,$dirname){
- include_once('simple_html_dom.php');
- $ch = curl_init();
- curl_setopt($ch,CURLOPT_URL,$url);
- curl_setopt($ch,CURLOPT_HEADER,false);
- curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
- $output = curl_exec($ch);
- curl_close($ch);
- $html = new simple_html_dom();
- $html->load($output);
- $arr = array();
- foreach($html->find('img[w]') as $element){
- $image = $element->src;
- }
- $data = file_get_contents($image);
- $info = getimagesize($image);//獲取圖片信息,大小,格式
- switch($info[2]){
- case 1:
- $str = 'gif';
- break;
- case 2:
- $str = 'jpg';
- break;
- case 3:
- $str = 'png';
- break;
- default:
- continue;
- break;
- }
- if($info[1] < 10 || $info[0] < 10) continue;//圖片太小,不是有價(jià)值的圖片,跳過(guò)本次循環(huán)
- $filename = time().rand(1,999999).'.'.$str;
- if(!is_dir($dirname)){
- mkdir($dirname,0777,true);
- }
- $fp = <a href="/tags.php/fopen/" target="_blank">fopen</a>($dirname.$filename,'w');
- fwrite($fp,$data);
- fclose($fp);
- return $dirname.$filename;
- }
- do{
- set_time_limit(0);
- ignore_user_abort();
- $img = getLink('http://www.survivalescaperooms.com /qutu_1.html');
- $count = count($img['links']);
- $arr = array();
- for($i=0;$i<$count;$i++){
- $arr[]=loadimg($img['links'][$i],'images/');
- }
- $img['url'] = $arr;
- echo '<br/>';
- $img['title'];
- $res = array();
- $len = count($img['title']);
- //重新將數(shù)據(jù)組裝成我們常用的二維數(shù)組,方便數(shù)據(jù)的數(shù)據(jù)庫(kù)處理
- for($i=0;$i<$len;$i++){
- $res[$i]['title'] = $img['title'][$i];
- $res[$i]['url'] = $img['url'][$i];
- }
- foreach($res as $item){
- echo '<img src='.$item["url"].'>'.$item["title"].'<br />';
- }
- $interval = 24*3600;
- sleep($interval);
- }while(true);
- ?>
新聞熱點(diǎn)
疑難解答