从统计局抓取2016年最新的全国区县数据!!

using System;
using System.Collections.Generic;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using HtmlAgilityPack;
using System.Text;
public partial class 抓取区县 : System.Web.UI.Page
{protected void Page_Load(object sender, EventArgs e){}protected void Button1_Click(object sender, EventArgs e){string Url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2015/index.html";HtmlWeb hw = new HtmlWeb();hw.OverrideEncoding = System.Text.Encoding.GetEncoding("GB2312");HtmlDocument doc = hw.Load(Url); doc.OptionReadEncoding = true;StringBuilder sb = new StringBuilder();HtmlNodeCollection hrefList = doc.DocumentNode.SelectNodes(".//a[@href]");if (hrefList != null){foreach (HtmlNode href in hrefList){int dep = 1;//深度
HtmlAttribute att = href.Attributes["href"];if (att.Value.Contains("miibeian"))//去除备案continue;string depurl = Url.Replace("index.html", att.Value);Response.Write(href.InnerText + "(" + att.Value.Replace(".html","").PadRight(12, '0') + ")
");// +GetArea(depurl, dep, att.Value.Replace(".html", "").PadRight(12, '0'));// break;}}// GetArea("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2015/14/1401.html",2); }/// /// 递归读取方法/// /// 下级地址/// 下级深度/// 上级代码void GetArea(string Url, int dep, string parentCode){//递归读取方法 HtmlWeb hw = new HtmlWeb();hw.OverrideEncoding = System.Text.Encoding.GetEncoding("GB2312");HtmlDocument doc = hw.Load(Url);//是你需要解析的urldoc.OptionReadEncoding = true;dep++;StringBuilder sb = new StringBuilder();HtmlNodeCollection hrefList = doc.DocumentNode.SelectNodes(".//tr[@class='" + classname(dep) + "']/td[last()]");if (hrefList != null){foreach (HtmlNode href in hrefList){HtmlNodeCollection hrefNode = href.SelectNodes(".//a[@href]");if (hrefNode != null)//链接存在 {HtmlAttribute att = hrefNode[0].Attributes["href"];if (att.Value.Contains("miibeian"))//去除备案continue;string codeNum = href.PreviousSibling.InnerText.PadRight(12, '0');Response.Write(joinstr(dep) + href.InnerText + "(" + codeNum + ")
");string depurl = Url.Replace(parentCode.Substring(0, 2 * (dep-1)) + ".html", att.Value);if(dep<4)//到乡镇即可 GetArea(depurl, dep, codeNum);}else{Response.Write(joinstr(dep) + href.InnerText + "(" + href.PreviousSibling.InnerText.PadRight(12, '0') + ")
");}}}}/// /// 分级连接符/// /// /// string joinstr(int dep){string str = "";for (int i = 1; i < dep; i++){str += "-----|";}return str;}/// /// 不同级别不同的class名称/// /// /// string classname(int dep){string str = "";switch (dep){case 2:str = "citytr";break;case 3:str = "countytr";break;case 4:str = "towntr";break;default:break;};return str;} }

以上是我写的完整代码,用的HtmlAgilityPack组件解析html,直接开源下载就可以。关键的数据都已经获得到 如:区号  和名称  

你可以用来写入你的数据库即可。

效果如下:

转载于:https://www.cnblogs.com/efreer/p/6230624.html


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部