java爬虫入门教程

java爬虫之爬基金增长情况
先上pom文件

 <dependencies><dependency><groupId>org.jsoupgroupId><artifactId>jsoupartifactId><version>1.12.1version>dependency><dependency><groupId>org.fusesourcegroupId><artifactId>sigarartifactId><version>1.6.4version>dependency><dependency><groupId>org.apache.httpcomponentsgroupId><artifactId>httpclientartifactId><version>4.5.2version>dependency><dependency><groupId>net.sourceforge.htmlunitgroupId><artifactId>htmlunitartifactId><version>2.23version><exclusions><exclusion><artifactId>httpclientartifactId><groupId>org.apache.httpcomponentsgroupId>exclusion>exclusions>dependency>dependencies>

代码

package com.test;import java.util.ArrayList;
import java.util.List;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;public class HelloWorld {public static void main(String[] args) throws Exception {// 需要爬取的基金编号List<String> codes = new ArrayList<String>();codes.add("003834");codes.add("002251");codes.add("161725");codes.add("003095");List<String>  result = new ArrayList<String>();for (int i = 0; i < codes.size(); i++) {String url = "http://fundgz.1234567.com.cn/js/"+codes.get(i)+".js?rt="+System.currentTimeMillis();HttpGet httpGet = new HttpGet(url);CloseableHttpClient httpclient = HttpClients.createDefault();httpGet.setHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36");CloseableHttpResponse response = httpclient.execute(httpGet);int statusCode = response.getStatusLine().getStatusCode();try {HttpEntity entity = response.getEntity();if (statusCode == 200) {String html = EntityUtils.toString(entity, Consts.UTF_8);Document doc = null;doc = Jsoup.parse(html);Elements ulList = doc.select("body");String now = ulList.toString();now = now.substring(now.indexOf("{"), now.indexOf("}")+1);result.add(now);EntityUtils.consume(response.getEntity());}else {EntityUtils.consume(response.getEntity());}} finally {response.close();}}System.out.println(result);for (int i = 0; i < result.size(); i++) {System.out.println(result.get(i));}}}


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部