feat(dify):
- 新增爬虫结果缓存 - 添加邮件通知功能,当爬虫完成时发送邮件给用户 - 实现爬虫启动事件和监听器,用于启动轮询服务 - 优化 application-dev.yml,移除不必要的配置项 -移除原本的爬虫3
This commit is contained in:
parent
aeb978ef11
commit
d0953a1af2
@ -0,0 +1,25 @@
|
||||
package com.zsc.edu.dify.framework.event;
|
||||
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.data.redis.core.StringRedisTemplate;
|
||||
import org.springframework.stereotype.Component;
|
||||
/**
|
||||
* @description 事件监听者
|
||||
* @author vivid
|
||||
*
|
||||
* */
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class EventListener {
|
||||
|
||||
private final SpiderPollingService spiderPollingService;
|
||||
|
||||
/**
|
||||
* 监听爬虫启动事件
|
||||
* */
|
||||
@org.springframework.context.event.EventListener
|
||||
public void handleStartPollingEvent(SpiderStartPollingEvent event) {
|
||||
spiderPollingService.startPolling(event.getSpiderId(), event.getEmail());
|
||||
}
|
||||
}
|
@ -0,0 +1,73 @@
|
||||
package com.zsc.edu.dify.framework.event;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.zsc.edu.dify.common.util.RedisUtils;
|
||||
import com.zsc.edu.dify.framework.message.email.EmailSender;
|
||||
import com.zsc.edu.dify.framework.spider.SpiderConfig;
|
||||
import com.zsc.edu.dify.framework.spider.SpiderProperty;
|
||||
import com.zsc.edu.dify.modules.dify.service.SpiderService;
|
||||
import com.zsc.edu.dify.modules.message.entity.Notice;
|
||||
import com.zsc.edu.dify.modules.message.entity.NoticeType;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
/**
|
||||
* @description 轮询查看爬虫结果
|
||||
* @author vivid
|
||||
* */
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class SpiderPollingService {
|
||||
|
||||
private ScheduledExecutorService scheduler;
|
||||
|
||||
private final RedisUtils redisUtils;
|
||||
|
||||
private final SpiderService spiderService;
|
||||
|
||||
private final EmailSender emailSender;
|
||||
|
||||
private final SpiderConfig spiderConfig;
|
||||
|
||||
private static final HashMap<String, SpiderProperty> PROPERTY_MAP = new HashMap<>();
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
for (SpiderProperty property : spiderConfig.getConfigs()) {
|
||||
PROPERTY_MAP.put(property.getId(), property);
|
||||
}
|
||||
}
|
||||
|
||||
public void startPolling(String spiderId, String email) {
|
||||
scheduler = Executors.newSingleThreadScheduledExecutor();
|
||||
scheduler.scheduleAtFixedRate(() -> {
|
||||
executeBusinessLogic(spiderId, email);
|
||||
}, 400, 3000, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
private void executeBusinessLogic(String spiderId, String email) {
|
||||
JSONObject jsonObject = spiderService.status(spiderId);
|
||||
JSONObject data = jsonObject.getJSONObject("data");
|
||||
if (!data.getBoolean("is_running")) {
|
||||
//获取文件
|
||||
String url = data.getString("download_url");
|
||||
//添加爬虫请求头,并且将前缀去掉
|
||||
String path = PROPERTY_MAP.get(spiderId).getUrl().replace("/api/v1", "") + url;
|
||||
//发送邮件给用户
|
||||
emailSender.send(email, new Notice(NoticeType.MESSAGE, false, true, false, false, "文件已到达", path, 0L));
|
||||
//将整个对象写入redis
|
||||
redisUtils.set(spiderId + ":result", jsonObject, 60 * 60 * 24);
|
||||
//停止任务
|
||||
scheduler.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
public void stopPolling() {
|
||||
scheduler.shutdown();
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package com.zsc.edu.dify.framework.event;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.springframework.context.ApplicationEvent;
|
||||
/**
|
||||
*
|
||||
* @description 定义爬虫启动的事件
|
||||
* @author vivid
|
||||
*
|
||||
* */
|
||||
@Getter
|
||||
public class SpiderStartPollingEvent extends ApplicationEvent {
|
||||
|
||||
private String spiderId;
|
||||
|
||||
private String email;
|
||||
|
||||
public SpiderStartPollingEvent(Object source, String spiderId, String email) {
|
||||
super(source);
|
||||
this.spiderId = spiderId;
|
||||
this.email = email;
|
||||
}
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
package com.zsc.edu.dify.modules.dify.controller;
|
||||
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import jakarta.annotation.Resource;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/spider3")
|
||||
public class Spider3Controller {
|
||||
@Resource
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@Value("${spider3.url}")
|
||||
private String SPIDER_URL;
|
||||
|
||||
@Value("${spider3.api-key}")
|
||||
private String API_KEY;
|
||||
|
||||
@PostMapping("/run")
|
||||
public JSONObject run() throws JsonProcessingException {
|
||||
return WebClient.create(SPIDER_URL).post().uri("/start_crawl")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@PostMapping("/status")
|
||||
public JSONObject status() {
|
||||
return WebClient.create(SPIDER_URL).post().uri("/crawl_status")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@PostMapping("/logs")
|
||||
public JSONObject logs() {
|
||||
return WebClient.create(SPIDER_URL).post().uri("/logs")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@PostMapping("/stop")
|
||||
public JSONObject stop() {
|
||||
return WebClient.create(SPIDER_URL).post().uri("/stop_crawl")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@ -2,10 +2,15 @@ package com.zsc.edu.dify.modules.dify.controller;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.zsc.edu.dify.framework.event.SpiderPollingService;
|
||||
import com.zsc.edu.dify.framework.event.SpiderStartPollingEvent;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import com.zsc.edu.dify.modules.dify.service.SpiderService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.ApplicationEventPublisher;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
|
||||
/**
|
||||
* @description: 自定义爬虫
|
||||
* @author: yao
|
||||
@ -17,9 +22,23 @@ public class SpiderController {
|
||||
@Autowired
|
||||
private SpiderService spiderService;
|
||||
|
||||
@Autowired
|
||||
private ApplicationEventPublisher applicationEventPublisher;
|
||||
|
||||
@Autowired
|
||||
private SpiderPollingService spiderPollingService;
|
||||
|
||||
@PostMapping("/run/{spiderId}")
|
||||
public JSONObject run(@RequestBody(required = false) SpiderDto dto, @PathVariable String spiderId) throws JsonProcessingException {
|
||||
return spiderService.run(dto, spiderId);
|
||||
JSONObject data = spiderService.isExistCache(spiderId, dto.getEmail());
|
||||
if (data != null) {
|
||||
// 如果缓存存在,直接返回,不执行 spiderService.run()
|
||||
return data;
|
||||
} else {
|
||||
// 如果缓存不存在,正常执行,并且发布事件
|
||||
applicationEventPublisher.publishEvent(new SpiderStartPollingEvent(this, spiderId, dto.getEmail()));
|
||||
return spiderService.run(dto, spiderId);
|
||||
}
|
||||
}
|
||||
|
||||
@PostMapping("/status/{spiderId}")
|
||||
@ -34,6 +53,7 @@ public class SpiderController {
|
||||
|
||||
@PostMapping("/stop/{spiderId}")
|
||||
public JSONObject stop(@PathVariable String spiderId) {
|
||||
spiderPollingService.stopPolling();
|
||||
return spiderService.stop(spiderId);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
package com.zsc.edu.dify.modules.dify.dto;
|
||||
|
||||
import jakarta.validation.constraints.Email;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import lombok.Data;
|
||||
|
||||
@ -12,4 +13,6 @@ public class SpiderDto {
|
||||
private String keyword;
|
||||
private Integer[] site_codes;
|
||||
private String llm_api_key;
|
||||
@Email
|
||||
private String email;
|
||||
}
|
||||
|
@ -3,10 +3,15 @@ package com.zsc.edu.dify.modules.dify.service.Impl;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.zsc.edu.dify.common.util.RedisUtils;
|
||||
import com.zsc.edu.dify.framework.event.SpiderPollingService;
|
||||
import com.zsc.edu.dify.framework.message.email.EmailSender;
|
||||
import com.zsc.edu.dify.framework.spider.SpiderConfig;
|
||||
import com.zsc.edu.dify.framework.spider.SpiderProperty;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import com.zsc.edu.dify.modules.dify.service.SpiderService;
|
||||
import com.zsc.edu.dify.modules.message.entity.Notice;
|
||||
import com.zsc.edu.dify.modules.message.entity.NoticeType;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
@ -26,6 +31,12 @@ public class SpiderServiceImpl implements SpiderService {
|
||||
|
||||
private static final HashMap<String, SpiderProperty> PROPERTY_MAP = new HashMap<>();
|
||||
|
||||
private final RedisUtils redisUtils;
|
||||
|
||||
private final EmailSender emailSender;
|
||||
|
||||
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
for (SpiderProperty property : spiderConfig.getConfigs()) {
|
||||
@ -74,4 +85,23 @@ public class SpiderServiceImpl implements SpiderService {
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject isExistCache(String spiderId, String email) {
|
||||
JSONObject result = (JSONObject) redisUtils.get(spiderId + ":result");
|
||||
if (result != null) {
|
||||
//获取文件路径
|
||||
String url = result.getJSONObject("data").getString("download_url");
|
||||
//拼接爬虫路径
|
||||
String path = PROPERTY_MAP.get(spiderId).getUrl().replace("/api/v1", "") + url;
|
||||
//发送邮件给用户
|
||||
emailSender.send(email, new Notice(NoticeType.MESSAGE, false, true, false, false, "文件已到达", path, 0L));
|
||||
//构造响应体
|
||||
JSONObject response = new JSONObject();
|
||||
response.put("msg", "爬虫任务已接受");
|
||||
response.put("code", "0");
|
||||
return response;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -13,4 +13,7 @@ public interface SpiderService {
|
||||
JSONObject logs(String spiderId);
|
||||
|
||||
JSONObject stop(String spiderId);
|
||||
|
||||
|
||||
JSONObject isExistCache(String spiderId, String email);
|
||||
}
|
||||
|
@ -83,16 +83,6 @@ dify:
|
||||
dataset:
|
||||
api-key: dataset-kN5WTJ8jR877YfN1A34JceVg # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||
|
||||
quanguo: &quanguo
|
||||
spider-id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1}
|
||||
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
|
||||
spider3: &spider3
|
||||
spider-id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
|
||||
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
|
||||
api-key:
|
||||
|
||||
spider:
|
||||
configs:
|
||||
# 全国爬虫
|
||||
@ -103,5 +93,10 @@ spider:
|
||||
- id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
|
||||
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
|
||||
api-key:
|
||||
# 爬虫D
|
||||
- id: ${SPIDERD_ID:e1f90005-8c42-4736-a598-9Bebe49c8e12}
|
||||
url: http://${SPIDER3_HOST:47.112.173.8:6258/api/v1}
|
||||
api-key:
|
||||
# - *quanguo
|
||||
# - *spider3
|
||||
# - *spiderD
|
||||
|
Loading…
Reference in New Issue
Block a user