在前程无忧 目前年收入上目前收入这一栏不想显示,怎么办

2511人阅读
&找工作神器的主要原理是,根据查询条件去访问相应的网站,通过程序拿到相应网站的HTML代码,再通过相应的正则表达式取相应的信息,再去截取相应的重要信息,再将相应的信息显示在表格里。這里有使用到线程池异步的方式,同时会去三个网站抓取数据,并且会抓取一条解析一条就会在表格里显示出来,这样就避免等待太长时间还看不到结果的尴尬,程序发布后各位园友可以下载程序看看效果如何,还请您能提出宝贵的意见。
& & & &整个程序显示的界面效果图:
& & & &图片上面显示的是查询条件,输入查询条件后点击查询,下面显示的查询出的数据,分别有三个页签(猎聘网、智联招聘和前程无忧),表格分别显示职位名称、公司名称、公司性质、公司规模、月薪/年薪、工作地点、工作经验、最低学历和发布时间等等信息,日后根据需要还可以继续扩充想要看到的信息,实现看到信息的基本上跟网站上的信息差不多。
& & & &解析:现在输入的条件有工作地点、薪水范围(上限、下限),关键词、必须包含的关键词,现在暂时只支持以上几种条件,日后可能会继续加入更多的的查询条件(公司名称、公司性质、工作经验、学历要求等等条件,日后再扩展),使查询更方便。
启动查询的代码如下:
& & & & 通过启动线程池异步的方式同时启动三个网站数据的加载,这样增强用户体验的效果,并且会拿到一条数据解析一条数据,并且及时显示在表格了,这样用户不需要等待太长的时间而看不到结果。
& & & & 程序启动首先会加载城市对应的ID的一个字典,数据加载如下:
1.前程无忧
&&&&&& 前程无忧我相信应该是很多园友找工作的首选,博主就是在这上面注册了简历,并且每次换工作都是在这上面取得了成功,感觉还挺不错,祝愿各位园友都能找到自己称心如意的工作,只要我们大家都一起努力应该都没有问题的。
&&&&&& 下面介绍实现逻辑:
#region * 前程无忧
/// &summary&
/// 线程池启动调用的方法
/// &/summary&
/// &param name=&obj&&&/param&
private void Get51JobData(object obj)
string workAddress = this.txtAddress.Text.Trim();//工作地点
string workAddressId = string.E//工作地点ID
string keyWord = this.txtKeyWord.Text.Trim();//关键词
string upperSalary = this.txtSalary1.Text.Trim();//薪水范围
string lowerSalary = this.txtSalary2.Text.Trim();//薪水范围
string mustKey = string.E//是否包含关键词
jobInfoList2.Clear();
curJobInfo2 = null;
dt2.Rows.Clear();
this.Invoke((MethodInvoker)delegate
this.gcJob2.DataSource = dt2;
Thread th = null;//搜索线程
if (th != null)
th.Abort();
th = null;
//根据输入的城市找出城市ID
KeyValuePair&string, string& kv = dic2.FirstOrDefault(t =& t.Value.Contains(workAddress));
if (kv.Key == null)
XtraMessageBox.Show(&无法搜索该工作地点&, &警告&, MessageBoxButtons.OK, MessageBoxIcon.Warning);
workAddressId = kv.K
//勾选包含关键词
if (this.chkMustKey.Checked)
mustKey = this.txtMustKey.Text.Trim();
//调用接口
JobFactory tws = new JobFactory(&51Job&, workAddress, workAddressId, keyWord, upperSalary, lowerSalary, mustKey);
IJob job = tws.GetJob();
if (job != null)
job.GetJobEnd -= new GetJobEndEventHandler(job_GetJob2End);
job.GetJobEnd += new GetJobEndEventHandler(job_GetJob2End);
th = new Thread(new ThreadStart(job.GetJobInfoList));
th.IsBackground = true;
th.Start();
/// &summary&
/// 表格增加一行数据
/// &/summary&
/// &param name=&o&&&/param&
/// &param name=&e&&&/param&
private void job_GetJob2End(object o, JobInfo e)
this.Invoke((MethodInvoker)delegate
if (e != null)
jobInfoList2.Add(e);
curJobInfo2 =
this.gvJob2.AddNewRow();
this.layoutControlGroup2.Enabled = true;
/// &summary&
/// 表格增加行
/// &/summary&
/// &param name=&sender&&&/param&
/// &param name=&e&&&/param&
private void gvJob2_InitNewRow(object sender, DevExpress.XtraGrid.Views.Grid.InitNewRowEventArgs e)
DataRowView dr = this.gvJob2.GetRow(e.RowHandle) as DataRowV
dr[&Url&] = curJobInfo2.U//网站链接
dr[&Position&] = curJobInfo2.P//职位名称
dr[&Company&] =//公司名称
dr[&Nature&] = curJobInfo2.N//公司性质
dr[&Scale&] = curJobInfo2.S//公司规模
dr[&Salary&] = curJobInfo2.S//月薪/年薪
dr[&Address&] = curJobInfo2.A//工作地点
dr[&Experience&] = curJobInfo2.E//工作经验
dr[&Education&] = curJobInfo2.E//最低学历
dr[&Time&] = curJobInfo2.T//发布时间
this.gvJob2.UpdateCurrentRow();
this.gvJob2.RefreshData();
this.gvJob2.MoveLast();
XtraMessageBox.Show(&添加行失败&);
/// &summary&
/// 双击行打开当前行链接
/// &/summary&
/// &param name=&sender&&&/param&
/// &param name=&e&&&/param&
private void gcJob2_DoubleClick(object sender, EventArgs e)
string uri = this.gvJob2.GetFocusedDataRow()[&Url&].ToString();
System.Diagnostics.Process.Start(uri);
#endregion
& & & & 以上三个函数的作用分别是线程池启动调用的方法、表格增加一行数据、表格增加行和双击行打开当前行链接四个方法,实现这四个方法即可获取前程无忧的数据,那么获取HTML内容和解析HTML需用另外一个类实现,实现这个类如下:
public class JobFrom51Job : IJob
#region * 私有字段
private string url = @&/jobsearch/search_result.php?&;
/// &summary&
/// 工作地点
/// &/summary&
private string workA
/// &summary&
/// 工作地点ID
/// &/summary&
private string workAddressId;
/// &summary&
/// 关键词
/// &/summary&
private string keyW
/// &summary&
/// 包含词
/// &/summary&
private string mustK
#endregion
public JobFrom51Job(string workAddress, string workAddressId, string keyWord, string mustKey)
this.workAddress = workA
this.workAddressId = workAddressId;
this.keyWord = keyW
this.mustKey = mustK
public event GetJobEndEventHandler GetJobE
public void GetJobInfoList()
StringBuilder condition = new StringBuilder();
condition.Append(&jobarea=& + workAddressId);
if (!string.IsNullOrEmpty(keyWord))
keyWord = System.Web.HttpUtility.UrlEncode(keyWord, Encoding.GetEncoding(&gb2312&));
condition.Append(&&keyword=& + keyWord);
condition.Append(&&keywordtype=2&);
url = url + condition.ToString();
string html = GetHtmlCode.GetByget(url, &gb2312&);
GetJobInfoFromPage(html);
int pageCount = 0;
//页面数量
string pageCountRegexStr = &(?&=name=\&jobid_count\&\\s*?value=\&)\\d+(?=\&&)&;
Regex pageCountRegex = new Regex(pageCountRegexStr);
pageCount = (int.Parse(pageCountRegex.Match(html).Value) + 29) / 30;
for (int i = 2; i &= pageC i++)
string url0 = url + string.Format(&&curr_page={0}&, i);
html = GetHtmlCode.GetByget(url0, &gb2312&);
GetJobInfoFromPage(html);
if (GetJobEnd != null)
GetJobEnd(null, null);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
private void GetJobInfoFromPage(string pageStr)
pageStr = Regex.Replace(pageStr, &\\s&, &&);
//职位所有信息
string jobInfoRegexStr = &(?&=&trclass=\&tr0\&).+?(?=&/tr&)&;
Regex jobInfoRegex = new Regex(jobInfoRegexStr);
MatchCollection jobInfoMC = jobInfoRegex.Matches(pageStr);
foreach (Match m in jobInfoMC)
if (m.Value.Contains(workAddress))
string urlRegexStr = &(?&=&aadid=\&\&href=\&).+?(?=\&)&;
string url0 = Regex.Match(m.Value, urlRegexStr).V
GetJobInfoFromUrl(url0);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
// 正则表达式过滤:正则表达式,要替换成的文本
private static readonly string[][] Filters =
new[] { @&(?is)&script.*?&.*?&/script&&, && },
new[] { @&(?is)&style.*?&.*?&/style&&, && },
new[] { @&(?is)&!--.*?--&&, && },
// 过滤Html代码中的注释
new[] { @&(?is)&footer.*?&.*?&/footer&&,&&},
new[] { &(?is) &div style=\&width:470 padding-left:5\&&.*?&/div&&,&&},
new[] { &(?is)&div id=\&top\&&.*?&/iframe&
&/div&&/div&&,&&},
new[] { &(?is)&div class=\&grayline\& id=\&announcementbody\&&.*?&/li&&/ul&
&/div&&,&&}
private void GetJobInfoFromUrl(string url)
JobInfo info = new JobInfo();
string pageStr = GetHtmlCode.GetByget(url, &gb2312&);
if (string.IsNullOrEmpty(pageStr))
pageStr = pageStr.Replace(&\r\n&, &&);//替换换行符
// 获取html,body标签内容
string body = string.E
string bodyFilter = @&(?is)&body.*?&/body&&;
Match m = Regex.Match(pageStr, bodyFilter);
if (m.Success)
body = m.ToString().Replace(&&tr &&, &&tr&&).Replace(&\r\n&, &&);
// 过滤样式,脚本等不相干标签
foreach (var filter in Filters)
body = Regex.Replace(body, filter[0], filter[1]);
if (!string.IsNullOrEmpty(mustKey) && !body.Contains(mustKey))
body = Regex.Replace(body, &\\s&, &&);
info.Url =
string basicInfoRegexStr0 = &&tdclass=\&sr_bt\&colspan=\&2\&&(.*?)&/td&&; //职位名称
string position = Regex.Match(body, basicInfoRegexStr0).V
if (string.IsNullOrEmpty(position))
basicInfoRegexStr0 = &&tdclass=\&sr_bt\&colspan=\&3\&&(.*?)&/td&&;
position = Regex.Match(body, basicInfoRegexStr0).V
info.Position = string.IsNullOrEmpty(position) ? && : position.Substring(position.IndexOf(&&&) + 1, position.IndexOf(&&/&) - position.IndexOf(&&&) - 1);
string basicInfoRegexStr1 = &.html\&&(.*?)&/a&&;//公司名称
string company = Regex.Match(body, basicInfoRegexStr1).V
pany = string.IsNullOrEmpty(company) ? && : company.Substring(company.IndexOf(&&&) + 1, company.IndexOf(&&/a&&) - company.IndexOf(&&&) - 1);
string basicInfoRegexStr2 = &工作地点:&/td&&tdclass=\&txt_2\&&(.*?)&/td&&;//工作地点
string address = Regex.Match(body, basicInfoRegexStr2).V
info.Address = string.IsNullOrEmpty(address) ? && : address.Substring(address.IndexOf(&\&&&) + 2, address.LastIndexOf(&&/td&&) - address.IndexOf(&\&&&) - 2);
string basicInfoRegexStr3 = &公司性质:&/strong&&&(.*?)&br&&br&&strong&&;//公司性质
string nature = Regex.Match(body, basicInfoRegexStr3).V
if (string.IsNullOrEmpty(nature))
basicInfoRegexStr3 = &公司行业:&/strong&&&(.*?)&br&&br&&strong&&;
nature = Regex.Match(body, basicInfoRegexStr3).V
info.Nature = string.IsNullOrEmpty(nature) ? && : nature.Substring(26, nature.IndexOf(&&br&&) - 26);//公司性质
string basicInfoRegexStr4 = &公司规模:&/strong&&&(.*?)&/td&&;//公司规模
string scale = Regex.Match(body, basicInfoRegexStr4).V
info.Scale = string.IsNullOrEmpty(scale) ? && : scale.Substring(26, scale.IndexOf(&&/td&&) - 26);
string basicInfoRegexStr5 = &工作年限:&/td&&tdclass=\&txt_2\&&(.*?)&/td&&;//工作经验
string experience = Regex.Match(body, basicInfoRegexStr5).V
info.Experience = string.IsNullOrEmpty(experience) ? && : experience.Substring(experience.IndexOf(&\&&&) + 2, experience.LastIndexOf(&&/td&&) - experience.IndexOf(&\&&&) - 2);
string basicInfoRegexStr6 = &学&&&&历:&/td&&tdclass=\&txt_2\&&(.*?)&/td&&;//学历
string education = Regex.Match(body, basicInfoRegexStr6).V
info.Education = string.IsNullOrEmpty(education) ? && : education.Substring(education.IndexOf(&\&&&) + 2, education.LastIndexOf(&&/td&&) - education.IndexOf(&\&&&) - 2);
string basicInfoRegexStr7 = &薪水范围:&/td&&tdclass=\&txt_2\&&(.*?)&/td&&;//月薪
string salary = Regex.Match(body, basicInfoRegexStr7).V
info.Salary = string.IsNullOrEmpty(salary) ? && : salary.Substring(salary.IndexOf(&\&&&) + 2, salary.LastIndexOf(&&/td&&) - salary.IndexOf(&\&&&) - 2);
string basicInfoRegexStr8 = &发布日期:&/td&&tdclass=\&txt_2\&&(.*?)&/td&&;//发布时间
string time = Regex.Match(body, basicInfoRegexStr8).V
info.Time = string.IsNullOrEmpty(time) ? && : time.Substring(time.IndexOf(&\&&&) + 2, time.LastIndexOf(&&/td&&) - time.IndexOf(&\&&&) - 2); ;
if (GetJobEnd != null)
GetJobEnd(pageStr, info);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
&& & & &以上这个类的作用是分别根据网址获取HTML内容,再根据正则表达式获取招聘相关信息,再通过函数截取相关字段的信息,再组装到前台界面,实现数据的显示,这个里面有一个逻辑就是动态每一条招聘信息的连接,再根据连接去获取HTML信息,相当于这中间有两层解析XML的过程。
2.智联招聘
&&&&&& 智联招聘是我自己每次找工作的备选项,每次把前程无忧上的所有招聘信息全部看完后,就会在智联招聘上浏览下,感觉还挺不错的,不知各位园友有没有试下,不过会有很多与前程无忧是重复的招聘信息,所以还得靠自己去区分。
&&&&&& 下面介绍实现逻辑:
public class JobFromZhiLian : IJob
#region 私有字段
private string url = @&/Jobs/SearchResult.ashx?&;
/// &summary&
/// 工作地点
/// &/summary&
private string workA
/// &summary&
/// 关键词
/// &/summary&
private string keyW
/// &summary&
/// 工资范围
/// &/summary&
private string upperS
/// &summary&
/// 工资范围
/// &/summary&
private string lowerS
/// &summary&
/// 包含词
/// &/summary&
private string mustK
#endregion
public JobFromZhiLian(string workAddress, string keyWord, string upperSalary, string lowerSalary, string mustKey)
this.workAddress = workA
this.keyWord = keyW
this.upperSalary = upperS
this.lowerSalary = lowerS
this.mustKey = mustK
public event GetJobEndEventHandler GetJobE
public void GetJobInfoList()
StringBuilder condition = new StringBuilder();
workAddress = HttpUtility.UrlEncode(workAddress, Encoding.GetEncoding(&utf-8&));
condition.Append(&jl=& + workAddress);
if (!string.IsNullOrEmpty(keyWord))
keyWord = HttpUtility.UrlEncode(keyWord, Encoding.GetEncoding(&utf-8&));
condition.Append(&&kw=& + keyWord);
condition.Append(&&sm=1&);
if (!string.IsNullOrEmpty(upperSalary))
condition.Append(&&sf=& + upperSalary);
if (!string.IsNullOrEmpty(lowerSalary))
condition.Append(&&st=& + lowerSalary);
url = url + condition.ToString();
string html = GetHtmlCode.GetByget(url, &utf-8&);
GetJobInfoFromPage(html);
//页面数量
string pageCountRegexStr = &(?&=onkeypress=\&zlapply.searchjob.enter2Page\\(this,event,)\\d+&;
Regex pageCountRegex = new Regex(pageCountRegexStr);
string pageCountStr = pageCountRegex.Match(html).Groups[0].V
int pageCount = 0;
int.TryParse(pageCountStr, out pageCount);
for (int i = 2; i &= pageC i++)
string url0 = url + string.Format(&&p={0}&, i);
html = GetHtmlCode.GetByget(url0, &utf-8&);
GetJobInfoFromPage(html);
if (GetJobEnd != null)
GetJobEnd(null, null);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
// 正则表达式过滤:正则表达式,要替换成的文本
private static readonly string[][] Filters =
new[] { @&(?is)&script.*?&.*?&/script&&, && },
new[] { @&(?is)&style.*?&.*?&/style&&, && },
new[] { @&(?is)&!--.*?--&&, && }
// 过滤Html代码中的注释
private void GetJobInfoFromPage( string pageStr)
JobInfo info = new JobInfo();
if (string.IsNullOrEmpty(pageStr))
pageStr = pageStr.Replace(&\r\n&, &&);//替换换行符
// 获取html,body标签内容
string body = string.E
string bodyFilter = @&(?is)&body.*?&/body&&;
Match m = Regex.Match(pageStr, bodyFilter);
if (m.Success)
body = m.ToString().Replace(&&tr &&, &&tr&&).Replace(&\r\n&, &&);
// 过滤样式,脚本等不相干标签
foreach (var filter in Filters)
body = Regex.Replace(body, filter[0], filter[1]);
//if (!string.IsNullOrEmpty(mustKey) && !body.Contains(mustKey))
body = Regex.Replace(body, &\\s&, &&);
bodyFilter = &(?is)&divclass=\&newlist_list_content\&id=\&newlist_list_content_table\&&.*?&/dd&&/dl&&/div&&/div&&/div&&;
Match m1 = Regex.Match(body, bodyFilter);
if (m1.Success)
body = m1.ToString();
//info.Url =
if (GetJobEnd != null)
GetJobEnd(pageStr, info);
//pageStr = Regex.Replace(pageStr, &\\s|&|&br&|&strong&|&/strong&|&b&|&/b&&, &&);
////职位所有信息
//string jobInfoRegexStr = &(?&=&tableclass=\&search-result-tab\&&)[\\S\\s]+?(?=&/table&)&;
//Regex jobInfoRegex = new Regex(jobInfoRegexStr);
//MatchCollection jobInfoMC = jobInfoRegex.Matches(pageStr);
//foreach (Match m in jobInfoMC)
if (!string.IsNullOrEmpty(mustKey) && !m.Value.Contains(mustKey))
JobInfo info = new JobInfo();
//职位名称,url和公司名称
string basicInfoRegexStr = &(?&=&ahref=\&)([\\w.:+?()/%=#&]+)\&target=\&_blank\&.*?&([\\s\\S]+?)(?=&/a&)&;
//地点、公司性质、公司规模、经验、学历、职位月薪
string basicInfoRegexStr0 = &(?&=地点:)[-/\\w]+(?=&/span&)&;
string basicInfoRegexStr1 = &(?&=公司性质:)[-/\\w]+(?=&/span&)&;
string basicInfoRegexStr2 = &(?&=公司规模:)[-/\\w]+(?=&/span&)&;
string basicInfoRegexStr3 = &(?&=经验:)[-/\\w]+(?=&/span&)&;
string basicInfoRegexStr4 = &(?&=学历:)[-/\\w]+(?=&/span&)&;
string basicInfoRegexStr5 = &(?&=职位月薪:)[-/\\w]+(?=&/span&)&;
//发布时间
string timeInfoRegexStr = &(?&=releasetime\&&)\\d{1,2}-\\d{1,2}-\\d{1,2}&;
Regex basicInfoRegex = new Regex(basicInfoRegexStr);
MatchCollection basicInfoMC = basicInfoRegex.Matches(m.Value);
info.Url = basicInfoMC[0].Groups[1].V
info.Position = basicInfoMC[0].Groups[2].V
pany = basicInfoMC[1].Groups[2].V
Regex basicInfoRegex0 = new Regex(basicInfoRegexStr0);
info.Address = new Regex(basicInfoRegexStr0).Match(m.Value).V
info.Nature = new Regex(basicInfoRegexStr1).Match(m.Value).V
info.Scale = new Regex(basicInfoRegexStr2).Match(m.Value).V
info.Experience = new Regex(basicInfoRegexStr3).Match(m.Value).V
info.Education = new Regex(basicInfoRegexStr4).Match(m.Value).V
info.Salary = new Regex(basicInfoRegexStr5).Match(m.Value).V
Regex timeInfoRegex = new Regex(timeInfoRegexStr);
info.Time = timeInfoRegex.Match(m.Value).V
if (GetJobEnd != null)
GetJobEnd(pageStr, info);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
& & & &以上为智联招聘解析HTML相关类,以上逻辑中正则表达式还在完善中,还未完全实现成功,正则表达式还有问题。
&&&&&& &猎聘网也是最近一两年才兴起的,這个网站上基本上都是很多猎头发布的信息,开的工资大多是都是十多二十万年薪的岗位,只要你具备這个实力可以去這个网站看看,应该会有所收获的,不过這个网站也有部分企业自己发布的招聘信息,如果前面两个网站都没有看到自己满意的求职信息,那么這个网站也可以是自己求职的一个补充,不知各位博友是不是支持我这种观点。
&&&&&& 下面介绍实现逻辑:
public class JobFromLiePin : IJob
#region * 私有字段
private string url = @&/zhaopin/?&;
//基本信息
private string basicInfoRegexStr = &&a title=[\\s\\S]+?&/a&&;
/// &summary&
/// 工作地点
/// &/summary&
private string workA
/// &summary&
/// 工作地点ID
/// &/summary&
private string workAddressId;
/// &summary&
/// 关键词
/// &/summary&
private string keyW
/// &summary&
/// 包含词
/// &/summary&
private string mustK
#endregion
public JobFromLiePin(string workAddress, string workAddressId, string keyWord, string mustKey)
this.workAddress = workA
this.workAddressId = workAddressId;
this.keyWord = keyW
this.mustKey = mustK
public event GetJobEndEventHandler GetJobE
public void GetJobInfoList()
StringBuilder condition = new StringBuilder();
condition.AppendFormat(&dqs={0}&, workAddressId);
condition.Append(&&searchField=3&);
if (!string.IsNullOrEmpty(keyWord))
keyWord = HttpUtility.UrlEncode(keyWord, Encoding.GetEncoding(&utf-8&));
condition.Append(&&key=& + keyWord);
condition.Append(&&pubTime=30&);
string xurl = string.E
for (int i = 0; i & 100; i++)
if (i & 0)
xurl = url + condition.ToString() + &&curPage=& +
xurl = url + condition.ToString();
string html = GetHtmlCode.GetByget(xurl, &utf-8&);
if (string.IsNullOrEmpty(html))
GetJobInfoFromPage(html);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
private void GetJobInfoFromPage(string pageStr)
MatchCollection ms = Regex.Matches(pageStr, basicInfoRegexStr);
string urlRegex = &(?&=href=\&)([\\w.:+?()/%=#&]+)&;
foreach (Match m in ms)
if (m.Value.Contains(workAddress))
string url0 = Regex.Match(m.Value, urlRegex).V
GetJobInfoFromUrl(url0);
if (GetJobEnd != null)
GetJobEnd(null, null);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
// 正则表达式过滤:正则表达式,要替换成的文本
private static readonly string[][] Filters =
new[] { @&(?is)&script.*?&.*?&/script&&, && },
new[] { @&(?is)&style.*?&.*?&/style&&, && },
new[] { @&(?is)&!--.*?--&&, && },
// 过滤Html代码中的注释
new[] { @&(?is)&footer.*?&.*?&/footer&&,&&},
//new[] { &(?is)&div class=\&job-require bottom-job-require\&&.*?&/div&&/div&&,&&}
new[] { @&(?is)&h3&常用链接:.*?&/ul&&,&&}
private void GetJobInfoFromUrl(string url)
JobInfo info = new JobInfo();
string pageStr = GetHtmlCode.GetByget(url, &utf-8&);
if (string.IsNullOrEmpty(pageStr))
pageStr = pageStr.Replace(&\r\n&, &&);//替换换行符
// 获取html,body标签内容
string body = string.E
string bodyFilter = @&(?is)&body.*?&/body&&;
Match m = Regex.Match(pageStr, bodyFilter);
if (m.Success)
body = m.ToString().Replace(&&tr &&, &&tr&&).Replace(&\r\n&, &&);
// 过滤样式,脚本等不相干标签
foreach (var filter in Filters)
body = Regex.Replace(body, filter[0], filter[1]);
if (!string.IsNullOrEmpty(mustKey) && !body.Contains(mustKey))
body = Regex.Replace(body, &\\s&, &&);
info.Url =
string basicInfoRegexStr0 = &&h1title=([\\s\\S]+?)&(.*?)&/h1&&; //职位名称
string position = Regex.Match(body, basicInfoRegexStr0).V
info.Position = string.IsNullOrEmpty(position) ? && : position.Substring(position.IndexOf(&&&) + 1, position.IndexOf(&&/&) - position.IndexOf(&&&) - 1);//职位名称
string basicInfoRegexStr1 = &&/h1&&h3&(.*?)&/h3&&;//公司名称
string company = Regex.Match(body, basicInfoRegexStr1).V
pany = string.IsNullOrEmpty(company) ? && : company.Substring(company.IndexOf(&&h3&&) + 4, company.IndexOf(&&/h3&&) - company.IndexOf(&&h3&&) - 4);//公司名称
string basicInfoRegexStr2 = &&divclass=\&resumeclearfix\&&&span&(.*?)&/span&&;//工作地点
string address = Regex.Match(body, basicInfoRegexStr2).V
info.Address = string.IsNullOrEmpty(address) ? && : address.Substring(address.IndexOf(&&span&&) + 6, address.IndexOf(&&/&) - address.IndexOf(&&span&&) - 6);//工作地点
string basicInfoRegexStr3 = &&li&&span&企业性质:&/span&(.*?)&/li&&;//公司性质
string nature = Regex.Match(body, basicInfoRegexStr3).V
info.Nature = string.IsNullOrEmpty(nature) ? && : nature.Substring(nature.IndexOf(&&/span&&) + 7, nature.IndexOf(&&/li&&) - nature.IndexOf(&&/span&&) - 7);//公司性质
if (string.IsNullOrEmpty(info.Nature))
string basicInfoRegexStr3_1 = &&br&&span&性质:&/span&(.*?)&br&&;
string nature_1 = Regex.Match(body, basicInfoRegexStr3_1).V
info.Nature = string.IsNullOrEmpty(nature_1) ? && : nature_1.Substring(nature_1.IndexOf(&&/span&&) + 7, nature_1.LastIndexOf(&&br&&) - nature_1.IndexOf(&&/span&&) - 7);//公司性质
string basicInfoRegexStr4 = &&li&&span&企业规模:&/span&(.*?)&/li&&;//公司规模
string scale = Regex.Match(body, basicInfoRegexStr4).V
info.Scale = string.IsNullOrEmpty(scale) ? && : scale.Substring(scale.IndexOf(&&/span&&) + 7, scale.IndexOf(&&/li&&) - scale.IndexOf(&&/span&&) - 7);//公司规模
if (string.IsNullOrEmpty(info.Scale))
string basicInfoRegexStr4_1 = &&br&&span&规模:&/span&(.*?)&br&&;
string scale_1 = Regex.Match(body, basicInfoRegexStr4_1).V
info.Scale = info.Nature = string.IsNullOrEmpty(scale_1) ? && : scale_1.Substring(scale_1.IndexOf(&&/span&&) + 7, scale_1.LastIndexOf(&&br&&) - scale_1.IndexOf(&&/span&&) - 7);//公司规模
string basicInfoRegexStr5 = &&spanclass=\&noborder\&&(.*?)&/span&&;//工作经验
string experience = Regex.Match(body, basicInfoRegexStr5).V
info.Experience = string.IsNullOrEmpty(experience) ? && : experience.Substring(experience.IndexOf(&&&) + 1, experience.IndexOf(&&/&) - experience.IndexOf(&&&) - 1);//工作经验
string basicInfoRegexStr6 = &&/span&&span&(.*?)&/span&&spanclass=\&noborder\&&&;//最低学历
string education = Regex.Match(body, basicInfoRegexStr6).V
info.Education = string.IsNullOrEmpty(education) ? && : education.Substring(education.IndexOf(&&span&&) + 6, education.IndexOf(&&/span&&spanclass=&) - education.IndexOf(&&span&&) - 6);//最低学历
string basicInfoRegexStr7 = &&pclass=\&job-main-title\&&(.*?)&&;//月薪
string salary = Regex.Match(body, basicInfoRegexStr7).V
info.Salary = string.IsNullOrEmpty(salary) ? && : salary.Substring(salary.IndexOf(&&&) + 1, salary.LastIndexOf(&&&) - salary.IndexOf(&&&) - 1);//月薪
string timeInfoRegexStr = &&pclass=\&release-time\&&发布时间:&em&(.*?)&/em&&/p&&;//发布时间
string time = Regex.Match(body, timeInfoRegexStr).V
info.Time = string.IsNullOrEmpty(time) ? && : time.Substring(time.IndexOf(&&em&&) + 4, time.IndexOf(&&/em&&) - time.IndexOf(&&em&&) - 4);//发布时间
if (GetJobEnd != null)
GetJobEnd(pageStr, info);
catch (Exception exMsg)
throw new Exception(exMsg.Message);
以上为解析猎聘网招聘信息的类。以下为猎聘网解析出的数据:
&&相关文章推荐
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
访问:48771次
排名:千里之外
原创:26篇
(2)(3)(27)

我要回帖

更多关于 前程无忧显示筛选中 的文章

 

随机推荐