forked from flyyuan/GetBlockchainArticle
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMain.java
More file actions
84 lines (79 loc) · 2.97 KB
/
Copy pathMain.java
File metadata and controls
84 lines (79 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import com.github.kevinsawicki.http.HttpRequest;
import com.google.gson.Gson;
import entity.DataList;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.List;
/**
* Demo class
*
* @author Benny Shi
* @date 2018/10/20
*/
public class Main {
public static void main(String[] args) {
ArtData("./bcdataSentence1.txt");
}
private static DataList GetBMList(int i){
Gson gson = new Gson();
String listBody = HttpRequest.get("https://app.blockmeta.com/w1/news/list?num=100&page="+i).body();
DataList dataList = gson.fromJson(listBody, DataList.class);
return dataList;
}
private static void ArtData(String pathName) {
FileWriter fw = null;
File fSentence = new File(pathName);
try {
fw = new FileWriter(fSentence, true);
} catch (IOException e) {
e.printStackTrace();
}
PrintWriter pw = new PrintWriter(fw);
for (int page = 0; page < 230; page++){
DataList dataList = GetBMList(page);
List<DataList.ListBean> list = dataList.getList();
for(int i = 0; i < list.size(); i++){
DataList.ListBean listBean = list.get(i);
System.out.println(page+"页---"+listBean.getId()+"----"+listBean.getTitle());
Connection con = Jsoup.connect("https://www.8btc.com/article/"+listBean.getId());
//请求头设置
con.header("Accept", "text/html, application/xhtml+xml, */*");
con.header("Content-Type", "application/x-www-form-urlencoded");
con.header("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0))");
//解析请求结果
Document document= null;
try {
document = con.get();
} catch (IOException e) {
e.printStackTrace();
}
Elements element = null;
try {
element = document.getElementsByClass("bbt-html").first().getElementsByTag("p");
for (int e = 0; e < element.size(); e++){
if(element.get(e).text().length() != 0){
System.out.println(element.get(e).text());
pw.println(element.get(e).text());
pw.flush();
}
}
}catch (Exception e){
System.out.println(e);
}
}
}
try {
fw.flush();
pw.close();
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}