Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public class CrawlerBootController {
@PostMapping("/search")
@ResponseBody
public void dispatchSearchTask(@RequestBody String wrapperJson) {
//创建爬虫线程并且获取爬取页面(由taskWrapper封装)的各种信息的URI
//创建爬虫线程并且获取爬取页面(由taskWrapper封装)的各种信息的URL
accomplishTask(wrapperJson, (taskWrapper) ->
Spider.create(new JDSearchProcessor())
.addUrl(taskWrapper.getMetaData())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@

import java.io.IOException;

import static com.crawler.hbase.HBaseDDL.*;
import static com.crawler.hbase.HBaseDML.*;
import static com.crawler.spider.entity.hbase.HBaseDDL.*;
import static com.crawler.spider.entity.hbase.HBaseDML.*;


public class JDItemClient {
Expand Down
55 changes: 55 additions & 0 deletions crawler-node/src/main/java/com/crawler/spider/entity/Item.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,59 @@ public Item(SearchItemInfo info) {
createTime = new Date();
}

public String getSpu() {
return spu;
}

public void setSpu(String spu) {
this.spu = spu;
}

public String getSku() {
return sku;
}

public void setSku(String sku) {
this.sku = sku;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public Double getPrice() {
return price;
}

public void setPrice(Double price) {
this.price = price;
}

public String getPic() {
return pic;
}

public void setPic(String pic) {
this.pic = pic;
}

public String getUrl() {
return url;
}

public void setUrl(String url) {
this.url = url;
}

public Date getCreateTime() {
return createTime;
}

public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,36 @@ public class SearchItemInfo implements Serializable {
商品详细地址
*/
private String url;

public String getSpu() {
return spu;
}

public void setSpu(String spu) {
this.spu = spu;
}

public String getSku() {
return sku;
}

public void setSku(String sku) {
this.sku = sku;
}

public String getPic() {
return pic;
}

public void setPic(String pic) {
this.pic = pic;
}

public String getUrl() {
return url;
}

public void setUrl(String url) {
this.url = url;
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.crawler.hbase;
package com.crawler.spider.entity.hbase;

import org.apache.hadoop.hbase.client.Connection;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.crawler.hbase;
package com.crawler.spider.entity.hbase;

import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.crawler.hbase;
package com.crawler.spider.entity.hbase;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.crawler.hbase;
package com.crawler.spider.entity.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ public void process(Page page) {
if (page.getResultItems().get("skuName") == null) {
// skip this page
page.setSkip(true);
throw new Exception("获取商品详细信息失败!");
try {
throw new Exception("获取商品详细信息失败!");
} catch (Exception e) {
e.printStackTrace();
}
} else {
this.skuName = page.getResultItems().get("skuName").toString().trim();
}
Expand All @@ -51,4 +55,12 @@ public void process(Page page) {
public Site getSite() {
return site;
}

public String getSkuName() {
return skuName;
}

public void setSkuName(String skuName) {
this.skuName = skuName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ public void process(Page page) {
throw new Exception("获取商品价格失败!");
} else {
String priceJson = page.getResultItems().get("JD_productPrice").toString().trim();
List<JDPriceInfo> jdPriceInfos = GsonHolder.G.fromJson(priceJson, new TypeToken<List<JDPriceInfo>>() {}.getType());
List<JDPriceInfo> jdPriceInfos = GsonHolder.G.fromJson(priceJson, new TypeToken<List<JDPriceInfo>>() {
}.getType());
priceInfo = jdPriceInfos.get(0);
}
}
Expand Down
4 changes: 3 additions & 1 deletion 在云服务器上部署的命令.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
--- 部署命令 ---
nohup java -jar crawler-node.jar \
nohup
java - jar
crawler - node.jar \
--spring.cloud.nacos.discovery.ip=你的ip \
--server.port=你的端口 \
> 你随意.txt &
Expand Down
19 changes: 11 additions & 8 deletions 测试流程.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
本项目3个模块:
crawler-dispatcher: 调度器
crawler-service: 爬虫节点
feign-api: 以上两个模块都依赖的模块
crawler - dispatcher:调度器
crawler - service:爬虫节点
feign - api:以上两个模块都依赖的模块

测试流程

1. 启动crawler-service模块下的CrawlerApplication应用(爬虫节点)
2. 启动crawler-dispatcher模块下的CrawlerDispatchApplication应用(调度器)
3. 向CrawlerDispatchApplication应用的Controller发送http请求,使用postman或者别的,Controller的api如下:
@PostMapping("/search")
public void dispatchSearchTask(String url)
1.
启动crawler - service模块下的CrawlerApplication应用(爬虫节点)
2.
启动crawler - dispatcher模块下的CrawlerDispatchApplication应用(调度器)
3.
向CrawlerDispatchApplication应用的Controller发送http请求,使用postman或者别的,Controller的api如下:
@PostMapping("/search")
public void dispatchSearchTask(String url)

你需要往http://localhost:8070/search发送post请求,携带参数为京东的url

Expand Down