首页 > 学院 > 开发设计 > 正文

基于webmagic爬取并下载百度图片

2019-11-11 07:34:21
字体:
来源:转载
供稿:网友

最近想下载一些图片,在一次一次另存为之后实在受不了了,于是写了一个小程序直接把图片下载下来

现把代码贴出,以供参考吧

import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import us.codecraft.webmagic.Page;import us.codecraft.webmagic.Spider;import java.io.DataInputStream;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.net.MalformedURLException;import java.net.URL;import java.util.ArrayList;import java.util.List;/** * 下载百度图片 * * @author bruce_q * @create 2017-02-03 11:12 **/public class DownloadPicture extends BasePRocessor{    static List<String> urls;    static List<String> names;    public void setUrls(List<String> urls) {        this.urls = urls;    }    public void setNames(List<String> names) {        this.names = names;    }    /**     * 下载图片     * author:bruce_q     * 2017-2-5 20:47     *     * @param urlList     * @param nameList     */    private void downloadPicture(ArrayList<String> urlList,ArrayList<String> nameList,String key) {        URL url = null;        for (int i=0;i<urlList.size();i++) {            try {                url = new URL(urlList.get(i));                DataInputStream dataInputStream = new DataInputStream(url.openStream());                String imageName = i + ".jpg";                File file=new File("d://pic//"+key);    //设置下载路径                if(!file.isDirectory()){                    file.mkdirs();                }                FileOutputStream fileOutputStream = new FileOutputStream(new File("d://pic//"+ key +"//"+ imageName.trim()));                byte[] buffer = new byte[1024];                int length;                while ((length = dataInputStream.read(buffer)) > 0) {                    fileOutputStream.write(buffer, 0, length);                }                dataInputStream.close();                fileOutputStream.close();            } catch (MalformedURLException e) {                e.printStackTrace();            } catch (IOException e) {                e.printStackTrace();            }        }    }    @Override    public void process(Page page) {        List<String> url_list = new ArrayList<>();        List<String> name_list = new ArrayList<>();        JSONObject jsonObject = (JSONObject) JSONObject.parse(page.getRawText());        JSONArray data = (JSONArray) jsonObject.get("data");        for(int i=0;i<data.size();i++){            String url = (String) data.getJSONObject(i).get("thumbURL");            String name = (String) data.getJSONObject(i).get("fromPageTitleEnc");            if(url!=null){                url_list.add(url);                name_list.add(name);            }        }        setUrls(url_list);        setNames(name_list);    }    public static void main(String[] args) {        String key = "乘风破浪";    //百度图片 关键词        DownloadPicture downloadPicture = new DownloadPicture();        ArrayList<String> nameList = new ArrayList<>();        ArrayList<String> urlList = new ArrayList<>();        for(int i=0;i<2;i++){   //控制爬取页数,一页30张图片            String url = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&queryWord="+key+"&word="+key+"&pn="+i*3+"0";            Spider.create(new DownloadPicture())                .addUrl(url)                .run();            urlList.addAll(urls);            nameList.addAll(names);        }        downloadPicture.downloadPicture(urlList,nameList,key);    }}

图片是能直接下载下来,谁有好的关键字(你懂得(*^__^*) )记得留言
发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表