Java爬虫一般使用的是JsoupHttpClient这两个jar包[推荐使用Jsoup]
关于HttpClient的使用:HttpClient
效果图什么的我就懒得展示了,因为没啥可看的,代码里注释写的也很明白,看不懂的就copy代码,直接整一遍

正文

Server

首先得有个网站让你爬取,此处就自己用SpringBoot写几个接口来调用演示(Server)
具体创建步骤就不在赘述,直接上源码
注意:由于此篇文章内容仅演示如何使用,并没有写拦截器,使用登陆的方式进行操作,就不会被有拦截器的网站拦截请求了
ServerController.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
package top.lete114.testserver.Controller;

import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.*;

/**
* @author Lete乐特
* @createDate 2021- 03-19 18:26
*/

@Controller
public class ServerController {
@RequestMapping({"/","index","index.html"})
public String index(){
return "index";
}

@PostMapping("/login")
public String login(String username,String password){
if("admin".equals(username) && "111111".equals(password)){
System.out.println("登陆成功!");
return "info";
}
System.out.println("登陆失败!");
return "redirect:/index";
}

@GetMapping("/info")
public String info(){
return "info";
}

@PostMapping("/QianDao")
@ResponseBody
public String QianDao(){
System.out.println("签到成功!");
return "{\"return\":\"签到成功!\"}";
}
@PostMapping("/Param")
@ResponseBody
public String Param(String name,int age){
System.out.println("带参请求成功!naem="+name+",age="+age+"");
return "{\"return\":\"带参请求成功!naem="+name+",age="+age+"\"}";
}
}

index.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>Index</title>
</head>
<body>
<style>
.main{
height: 300px;
width: 300px;
}
input[type="submit"]{
display: flex;
}
</style>
<div class="main">
<form action="/login" method="post">
<input type="text" placeholder="请输入用户名" name="username">
<input type="password" placeholder="请输入密码" name="password">
<input type="submit" value="登陆" >
</form>
</div>

</body>
</html>

info.html

1
2
3
4
5
6
7
8
9
10
11
12
13
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>登陆成功</title>
</head>
<body>
<h1>用户名:<span>Lete乐特</span></h1>
<form action="/QianDao" method="post">
<input type="submit" value="签到">
</form>
</body>
</html>

Client

新建一个maven项目,并导入HttpClient依赖

1
2
3
4
5
6
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>

直接不废话,新建main.java
登陆并获取用户昵称,并执行签到操作
main.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package top.lete114.testclient;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import javax.annotation.PostConstruct;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;

/**
* @author Lete乐特
* @createDate 2021- 03-19 19:17
*/
public class main {
public static void main(String[] args) throws IOException {
// 设置用户名|密码
Map<String, String> data = new HashMap();
data.put("username", "admin");
data.put("password", "111111");

// 登陆 模拟浏览器请求
Connection.Response login = Jsoup.connect("http://127.0.0.1:4000/login")
.ignoreContentType(true)
.followRedirects(false)
.postDataCharset("utf-8")
.header("Upgrade-Insecure-Requests", "1")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
.header("Content-Type", "application/x-www-form-urlencoded")
.header("X-Requested-With", "XMLHttpRequest")
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
.data(data)
.method(Connection.Method.POST).execute(); // post请求登陆
login.charset("UTF-8"); // 编码

// 判断是否登陆成功
if(login.statusCode()==200) {

// 获取用户名
Document info = Jsoup.connect("http://127.0.0.1:4000/info")
.cookies(login.cookies()).get();
String username = info.select("body h1 span").text();
System.out.println(username);

// 执行签到操作
Document QianDao = Jsoup.connect("http://127.0.0.1:4000/QianDao")
.cookies(login.cookies()).post();
String bodyQianDao = QianDao.select("body").text();
System.out.println(bodyQianDao);
}else{
System.out.println("登陆失败,用户名或密码错误");
}
}
}

在一些特殊情况下,执行某些请求时,网站需要传入参数,即如下代码,此处只演示post请求,因为get请求可以直接写参数在url后面
main.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package top.lete114.testclient;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import javax.annotation.PostConstruct;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;

/**
* @author Lete乐特
* @createDate 2021- 03-19 19:17
*/
public class main {
public static void main(String[] args) throws IOException, URISyntaxException {
// 设置用户名|密码
Map<String, String> data = new HashMap();
data.put("username", "admin");
data.put("password", "111111");

// 登陆 模拟浏览器请求
Connection.Response login = Jsoup.connect("http://127.0.0.1:4000/login")
.ignoreContentType(true)
.followRedirects(false)
.postDataCharset("utf-8")
.header("Upgrade-Insecure-Requests", "1")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
.header("Content-Type", "application/x-www-form-urlencoded")
.header("X-Requested-With", "XMLHttpRequest")
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
.data(data)
.method(Connection.Method.POST).execute(); // post请求登陆
login.charset("UTF-8"); // 编码

// 判断是否登陆成功
if(login.statusCode()==200) {

// 获取用户名
Document info = Jsoup.connect("http://127.0.0.1:4000/info")
.cookies(login.cookies()).get();
String username = info.select("body h1 span").text();
System.out.println(username);

// 执行签到操作
Document QianDao = Jsoup.connect("http://127.0.0.1:4000/QianDao")
.cookies(login.cookies()).post();
String bodyQianDao = QianDao.select("body").text();
System.out.println(bodyQianDao);

// 带参请求
Connection connect = Jsoup.connect("http://127.0.0.1:4000/Param");
Connection.Response execute = connect.data("name","Lete114").data("age","18")
.method(Connection.Method.POST)
.ignoreContentType(true)
.execute();
Document parse = execute.parse();
String body = parse.select("body").text();
System.out.println(body);
}else{
System.out.println("登陆失败,用户名或密码错误");
}
}
}

此篇内容就到此为止,你可以发挥你的脑洞进行更深入的使用,不仅仅可以爬取内容这么简单
如果你配合corn定时任务可以实现自动签到,自动打卡等效果