按顺序在浏览器上访问可以得到正确的页面,但在spider中使用时却得不到。。。
1 http://200.96.97.90/grid2008/request/search.aspx?PageName=ASP.brief_libraryresult_aspx&DBViewType=FullText&DbPrefix=SCDB
2http://200.96.97.90/grid2008/DataCenter/DoGridTable.aspx?action=gridtablepage&turnpage=1&dbcode=&isselectorder=&prio=true&viewtype=&itype=0&showtype=&returnid=&pagemode=3&recordsperpage=1&dbcatalog=%E4%B8%AD%E5%9B%BD%E5%AD%A6%E6%9C%AF%E6%96%87%E7%8C%AE%E7%BD%91%E7%BB%9C%E5%87%BA%E7%89%88%E6%80%BB%E5%BA%93&dbprefix=SCDB&pagename=ASP.brief_libraryresult_aspx&queryid=0
3
http://200.96.97.90/kcms/detail/detail.aspx?filename=JCHX201111001019&dbname=CPFD2012&dbcode=CPFD
在spider上时会有如下的跳转
[DEBUG] ctx@2 execute http request: GET http://200.96.97.90/grid2008/brief/detailj.aspx?filename=ZGDN201109001107&dbname=CPFD2012&dbcode=CPFD&uid=aDNXbjV5WHRVd25SbXIwNEorbDVpOW9lNUROanFxN1ZMYmxvdzN5b2UzOU5mZjZB <--[usearch worker 1] HttpStepExecutor.java(initRequestProducer:189)-->
[DEBUG] Redirect requested to location '/kns55/loginid.aspx?p=detail%2fdetail.aspx%3ffilename%3dZGDN201109001107%26dbname%3dCPFD2012%26dbcode%3dCPFD&uid=aUM1a0dBNlQzcTRUVFgxakpDeGwwY003YUkxdURaYmdhTGZhWThFWTFrMmxoZTFM' <--[I/O dispatcher 3] MyRedirectStrategy.java(getLocationURI:87)-->
[DEBUG] Redirect requested to location '/kns55/detail/2fdetail.aspx/3ffilename/3dZGDN201109001107/26dbname/3dCPFD2012/26dbcode/3dCPFD' <--[I/O dispatcher 3] MyRedirectStrategy.java(getLocationURI:87)-->
[DEBUG] Redirect requested to location '/Kns55/error.aspx?err=Error+Message%3a%e6%96%87%e4%bb%b6%e2%80%9c%2fkns55%2fdetail%2f2fdetail.aspx%e2%80%9d%e4%b8%8d%e5%ad%98%e5%9c%a8%e3%80%82' <--[I/O dispatcher 3] MyRedirectStrategy.java(getLocationURI:87)
附
知网spider配置文件
{
adapterId: 20,
adapterName: '知网。会议论文',
adapterType: "http",
recordPerpage:10,
proxy:{
host:'115.239.234.44',
port: "808",
user:'',
password:''
},
fieldMaps:[{
name: 'chtitle',
description:'中文标题',
searchable: true,
fieldId: 1
},{
name: 'entitle',
description:'英文标题',
searchable: true,
fieldId: 2
},{
name: 'cajurl',
description:'caj下载',
searchable: true,
fieldId: 3
},{
name: 'pdfurl',
description:'pdf下载',
searchable: true,
fieldId: 4
},{
name: 'chauthors',
description:'中文作者',
searchable: true,
fieldId: 5
},{
name: 'enauthors',
description:'英文作者',
searchable: true,
fieldId: 6
},{
name: 'institution',
description:'机构',
searchable: true,
fieldId: 7
},{
name: 'chabstract',
description:'中文摘要',
searchable: true,
fieldId: 8
},{
name: 'enabstract',
description:'英文摘要',
searchable: false,
fieldId: 9
},{
name: 'chkeyword',
description:'中文关键词',
searchable: false,
fieldId: 10
},{
name: 'enkeyword',
description:'英文关键词',
searchable: false,
fieldId: 11
},{
name: 'jname',
description:'会议录名称',
searchable: false,
fieldId: 12
},{
name: 'meetingname',
description:'会议名称',
searchable: false,
fieldId: 13
},{
name: 'meetingtime',
description:'会议时间',
searchable: false,
fieldId: 14
},{
name: 'meetingplace',
description:'会议地址',
searchable: false,
fieldId: 15
},{
name: 'classnumber',
description:'分类号',
searchable: false,
fieldId: 16
},{
name: 'hostunit',
description:'主办单位',
searchable: false,
fieldId: 17
},{
name: 'url',
description:'详情地址',
searchable: false,
fieldId: 18
},{
name: 'detail',
description:'详情地址',
searchable: false,
fieldId: 10001
}],
prepareSteps:[{
url: 'http://200.96.97.90/grid2008/request/search.aspx?PageName=ASP.brief_libraryresult_aspx&DBViewType=FullText&DbPrefix=SCDB',
method: 'GET',
reqCharset: 'utf-8',
respCharset: 'utf-8',
containingRecords: false,
}
],
fetchSteps:[
{
url: 'http://200.96.97.90/grid2008/DataCenter/DoGridTable.aspx?action=gridtablepage&turnpage=1&dbcode=&isselectorder=&prio=true&viewtype=&itype=0&showtype=&returnid=&pagemode=3&recordsperpage=1&dbcatalog=%E4%B8%AD%E5%9B%BD%E5%AD%A6%E6%9C%AF%E6%96%87%E7%8C%AE%E7%BD%91%E7%BB%9C%E5%87%BA%E7%89%88%E6%80%BB%E5%BA%93&dbprefix=SCDB&pagename=ASP.brief_libraryresult_aspx&queryid=0',
method: 'GET',
reqCharset: 'UTF-8',
respCharset: 'UTF-8',
containingRecords: true,
reqParameters:[
{
type: 'page',
paramKey: 'curpage',
startIndex: '1',
saveTo: 'GET',
encode:false
}]
}],
rsFormat:{
hitExpr:{
startTag: '',
endTag: '$end',
includeStartTag: false,
includeEndTag: false,
script:'relVat="51504882"',
maxValueLength: 64,
escapeHtml: true
},
rsAreaExpr: {
startTag: '下载频次',
endTag: '',
includeStartTag: false,
includeEndTag: false,
maxValueLength: 100000
},
recAreaExpr:{
loopType: 'tag',
startTag: ' endTag: 'οnclick="checkMark(this)"',
includeStartTag: false,
includeEndTag: false
},
fieldExprs:[{
fieldId: 'detail',
startTag: 'value="',
endTag:'"',
includeStartTag: false,
includeEndTag: false,
script:'retVal="http://200.96.97.90/grid2008/brief/detailj.aspx?filename="+(retVal.split("!"))[1]+"&dbname="+(retVal.split("!"))[0]+"&dbcode=CPFD&uid=aDNXbjV5WHRVd25SbXIwNEorbDVpOW9lNUROanFxN1ZMYmxvdzN5b2UzOU5mZjZB"',
escapeHtml: true,
maxValueLength: 10000
},{
fieldId: 'url',
startTag: 'value="',
endTag:'"',
includeStartTag: false,
includeEndTag: false,
script:'retVal="http://200.96.97.90/grid2008/brief/detailj.aspx?filename="+(retVal.split("!"))[1]+"&dbname="+(retVal.split("!"))[0]+"&dbcode=CPFD&uid=aDNXbjV5WHRVd25SbXIwNEorbDVpOW9lNUROanFxN1ZMYmxvdzN5b2UzOU5mZjZB"',
escapeHtml: true,
maxValueLength: 10000
}]
},
detailSteps:[{
url: '$URL',
method: 'GET',
reqCharset: 'utf-8',
respCharset: 'utf-8',
respParameters:[{
paramKey: 'dt_chtitle',
startTag: ' id="chTitle">',
endTag: '',
escapeHtml: true
},{
paramKey: 'dt_entitle',
startTag: 'id="enTitle">',
escapeHtml: true,
endTag: ''
},{
paramKey: 'dt_cajurl',
startTag: '',
escapeHtml: true,
maxValueLength: 4000
},{
paramKey: 'dt_pdfurl',
startTag: '',
escapeHtml: true,
maxValueLength: 4000
},{
paramKey: 'dt_chauthors',
startTag: '【作者】',
endTag: '