-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtemplate.list.py
More file actions
58 lines (44 loc) · 1.88 KB
/
Copy pathtemplate.list.py
File metadata and controls
58 lines (44 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import sys
sys.path.append("modules")
from core.framework import *
class XXXXXXListAnalyst(PythonContentAnalyst):
def setConfig(self,url):
self.config.cfgUrl=url
self.config.cfgCharset=self.enum.Charset.gb2312
self.config.cfgSelector=self.enum.Selector.csquery
self.config.cfgTitle=[]
self.config.cfgAuthor=[]
self.config.cfgSummary=[]
self.config.cfgSource=[]
self.config.cfgIssuedate=[]
self.config.cfgContent.Type=self.enum.ContentType.index
self.config.cfgContent.Path=[]
self.config.cfgContent.Options.Excludes=[]
self.config.cfgContent.Options.Lamda=lambda *csblock:csblock[0].Attr("href")#客户端确定的链接块
self.config.cfgContent.Pages=[]
self.config.cfgContent.Options.PageLamda=lambda *x:x[1]
self.config.cfgContent.Options.PageNum=0
self.config.cfgContent.Options.PageSimilarity=0.95
return self.config
def csqueryPagination(self, csdom, pagesPath):
return super(XXXXXXListAnalyst, self).csqueryPagination(csdom, pagesPath)
def getresult(url=''):
contentAnalyst=XXXXXXContentAnalyst()
contentAnalyst.execute(url);
content=contentAnalyst.result.content
echo(content)
return contentAnalyst.result.toJsonString()
#getresult(URL)
####################### test case ######################
class XXXXXXTC(unittest.TestCase):
def __init__(self, methodName = 'runTest'):
super(XXXXXXTC, self).__init__(methodName)
self.contentAnalyst=XXXXXXListAnalyst()
def testConfig(self):
result=getresult("http://epaper.bjnews.com.cn/")
result=FunctionHelper.string2object(result)
count=len(result.content.split("|"))
self.assertGreater(count,50)
pass
if __name__=="__main__":#sys.exit(0)
unittest.main()