diff --git a/src/main/java/com/zsc/edu/dify/framework/event/EventListener.java b/src/main/java/com/zsc/edu/dify/framework/event/EventListener.java new file mode 100644 index 0000000..bef7e73 --- /dev/null +++ b/src/main/java/com/zsc/edu/dify/framework/event/EventListener.java @@ -0,0 +1,25 @@ +package com.zsc.edu.dify.framework.event; + + +import lombok.RequiredArgsConstructor; +import org.springframework.data.redis.core.StringRedisTemplate; +import org.springframework.stereotype.Component; +/** + * @description 事件监听者 + * @author vivid + * + * */ +@Component +@RequiredArgsConstructor +public class EventListener { + + private final SpiderPollingService spiderPollingService; + + /** + * 监听爬虫启动事件 + * */ + @org.springframework.context.event.EventListener + public void handleStartPollingEvent(SpiderStartPollingEvent event) { + spiderPollingService.startPolling(event.getSpiderId(), event.getEmail()); + } +} diff --git a/src/main/java/com/zsc/edu/dify/framework/event/SpiderPollingService.java b/src/main/java/com/zsc/edu/dify/framework/event/SpiderPollingService.java new file mode 100644 index 0000000..1b83106 --- /dev/null +++ b/src/main/java/com/zsc/edu/dify/framework/event/SpiderPollingService.java @@ -0,0 +1,73 @@ +package com.zsc.edu.dify.framework.event; + +import com.alibaba.fastjson.JSONObject; +import com.zsc.edu.dify.common.util.RedisUtils; +import com.zsc.edu.dify.framework.message.email.EmailSender; +import com.zsc.edu.dify.framework.spider.SpiderConfig; +import com.zsc.edu.dify.framework.spider.SpiderProperty; +import com.zsc.edu.dify.modules.dify.service.SpiderService; +import com.zsc.edu.dify.modules.message.entity.Notice; +import com.zsc.edu.dify.modules.message.entity.NoticeType; +import jakarta.annotation.PostConstruct; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; + +import java.util.HashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +/** + * @description 轮询查看爬虫结果 + * @author vivid + * */ +@Service +@RequiredArgsConstructor +public class SpiderPollingService { + + private ScheduledExecutorService scheduler; + + private final RedisUtils redisUtils; + + private final SpiderService spiderService; + + private final EmailSender emailSender; + + private final SpiderConfig spiderConfig; + + private static final HashMap PROPERTY_MAP = new HashMap<>(); + + @PostConstruct + public void init() { + for (SpiderProperty property : spiderConfig.getConfigs()) { + PROPERTY_MAP.put(property.getId(), property); + } + } + + public void startPolling(String spiderId, String email) { + scheduler = Executors.newSingleThreadScheduledExecutor(); + scheduler.scheduleAtFixedRate(() -> { + executeBusinessLogic(spiderId, email); + }, 400, 3000, TimeUnit.MILLISECONDS); + } + + private void executeBusinessLogic(String spiderId, String email) { + JSONObject jsonObject = spiderService.status(spiderId); + JSONObject data = jsonObject.getJSONObject("data"); + if (!data.getBoolean("is_running")) { + //获取文件 + String url = data.getString("download_url"); + //添加爬虫请求头,并且将前缀去掉 + String path = PROPERTY_MAP.get(spiderId).getUrl().replace("/api/v1", "") + url; + //发送邮件给用户 + emailSender.send(email, new Notice(NoticeType.MESSAGE, false, true, false, false, "文件已到达", path, 0L)); + //将整个对象写入redis + redisUtils.set(spiderId + ":result", jsonObject, 60 * 60 * 24); + //停止任务 + scheduler.shutdown(); + } + } + + public void stopPolling() { + scheduler.shutdown(); + } +} diff --git a/src/main/java/com/zsc/edu/dify/framework/event/SpiderStartPollingEvent.java b/src/main/java/com/zsc/edu/dify/framework/event/SpiderStartPollingEvent.java new file mode 100644 index 0000000..ca91469 --- /dev/null +++ b/src/main/java/com/zsc/edu/dify/framework/event/SpiderStartPollingEvent.java @@ -0,0 +1,23 @@ +package com.zsc.edu.dify.framework.event; + +import lombok.Getter; +import org.springframework.context.ApplicationEvent; +/** + * + * @description 定义爬虫启动的事件 + * @author vivid + * + * */ +@Getter +public class SpiderStartPollingEvent extends ApplicationEvent { + + private String spiderId; + + private String email; + + public SpiderStartPollingEvent(Object source, String spiderId, String email) { + super(source); + this.spiderId = spiderId; + this.email = email; + } +} diff --git a/src/main/java/com/zsc/edu/dify/modules/dify/controller/Spider3Controller.java b/src/main/java/com/zsc/edu/dify/modules/dify/controller/Spider3Controller.java deleted file mode 100644 index 1a707e5..0000000 --- a/src/main/java/com/zsc/edu/dify/modules/dify/controller/Spider3Controller.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.zsc.edu.dify.modules.dify.controller; - - -import com.alibaba.fastjson.JSONObject; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.zsc.edu.dify.modules.dify.dto.SpiderDto; -import jakarta.annotation.Resource; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.http.MediaType; -import org.springframework.web.bind.annotation.*; -import org.springframework.web.reactive.function.client.WebClient; - - -@RestController -@RequestMapping("/api/spider3") -public class Spider3Controller { - @Resource - private ObjectMapper objectMapper; - - @Value("${spider3.url}") - private String SPIDER_URL; - - @Value("${spider3.api-key}") - private String API_KEY; - - @PostMapping("/run") - public JSONObject run() throws JsonProcessingException { - return WebClient.create(SPIDER_URL).post().uri("/start_crawl") - .contentType(MediaType.APPLICATION_JSON) - .retrieve() - .bodyToMono(JSONObject.class) - .block(); - } - - @PostMapping("/status") - public JSONObject status() { - return WebClient.create(SPIDER_URL).post().uri("/crawl_status") - .retrieve() - .bodyToMono(JSONObject.class) - .block(); - } - - @PostMapping("/logs") - public JSONObject logs() { - return WebClient.create(SPIDER_URL).post().uri("/logs") - .retrieve() - .bodyToMono(JSONObject.class) - .block(); - } - - @PostMapping("/stop") - public JSONObject stop() { - return WebClient.create(SPIDER_URL).post().uri("/stop_crawl") - .retrieve() - .bodyToMono(JSONObject.class) - .block(); - } - - - -} diff --git a/src/main/java/com/zsc/edu/dify/modules/dify/controller/SpiderController.java b/src/main/java/com/zsc/edu/dify/modules/dify/controller/SpiderController.java index 1dde14e..7be8d2b 100644 --- a/src/main/java/com/zsc/edu/dify/modules/dify/controller/SpiderController.java +++ b/src/main/java/com/zsc/edu/dify/modules/dify/controller/SpiderController.java @@ -2,10 +2,15 @@ package com.zsc.edu.dify.modules.dify.controller; import com.alibaba.fastjson.JSONObject; import com.fasterxml.jackson.core.JsonProcessingException; +import com.zsc.edu.dify.framework.event.SpiderPollingService; +import com.zsc.edu.dify.framework.event.SpiderStartPollingEvent; import com.zsc.edu.dify.modules.dify.dto.SpiderDto; import com.zsc.edu.dify.modules.dify.service.SpiderService; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.ApplicationEventPublisher; import org.springframework.web.bind.annotation.*; + + /** * @description: 自定义爬虫 * @author: yao @@ -17,9 +22,23 @@ public class SpiderController { @Autowired private SpiderService spiderService; + @Autowired + private ApplicationEventPublisher applicationEventPublisher; + + @Autowired + private SpiderPollingService spiderPollingService; + @PostMapping("/run/{spiderId}") public JSONObject run(@RequestBody(required = false) SpiderDto dto, @PathVariable String spiderId) throws JsonProcessingException { - return spiderService.run(dto, spiderId); + JSONObject data = spiderService.isExistCache(spiderId, dto.getEmail()); + if (data != null) { + // 如果缓存存在,直接返回,不执行 spiderService.run() + return data; + } else { + // 如果缓存不存在,正常执行,并且发布事件 + applicationEventPublisher.publishEvent(new SpiderStartPollingEvent(this, spiderId, dto.getEmail())); + return spiderService.run(dto, spiderId); + } } @PostMapping("/status/{spiderId}") @@ -34,6 +53,7 @@ public class SpiderController { @PostMapping("/stop/{spiderId}") public JSONObject stop(@PathVariable String spiderId) { + spiderPollingService.stopPolling(); return spiderService.stop(spiderId); } diff --git a/src/main/java/com/zsc/edu/dify/modules/dify/dto/SpiderDto.java b/src/main/java/com/zsc/edu/dify/modules/dify/dto/SpiderDto.java index 7d45260..9d8817e 100644 --- a/src/main/java/com/zsc/edu/dify/modules/dify/dto/SpiderDto.java +++ b/src/main/java/com/zsc/edu/dify/modules/dify/dto/SpiderDto.java @@ -1,5 +1,6 @@ package com.zsc.edu.dify.modules.dify.dto; +import jakarta.validation.constraints.Email; import jakarta.validation.constraints.NotBlank; import lombok.Data; @@ -12,4 +13,6 @@ public class SpiderDto { private String keyword; private Integer[] site_codes; private String llm_api_key; + @Email + private String email; } diff --git a/src/main/java/com/zsc/edu/dify/modules/dify/service/Impl/SpiderServiceImpl.java b/src/main/java/com/zsc/edu/dify/modules/dify/service/Impl/SpiderServiceImpl.java index 582804b..abef61a 100644 --- a/src/main/java/com/zsc/edu/dify/modules/dify/service/Impl/SpiderServiceImpl.java +++ b/src/main/java/com/zsc/edu/dify/modules/dify/service/Impl/SpiderServiceImpl.java @@ -3,10 +3,15 @@ package com.zsc.edu.dify.modules.dify.service.Impl; import com.alibaba.fastjson.JSONObject; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.zsc.edu.dify.common.util.RedisUtils; +import com.zsc.edu.dify.framework.event.SpiderPollingService; +import com.zsc.edu.dify.framework.message.email.EmailSender; import com.zsc.edu.dify.framework.spider.SpiderConfig; import com.zsc.edu.dify.framework.spider.SpiderProperty; import com.zsc.edu.dify.modules.dify.dto.SpiderDto; import com.zsc.edu.dify.modules.dify.service.SpiderService; +import com.zsc.edu.dify.modules.message.entity.Notice; +import com.zsc.edu.dify.modules.message.entity.NoticeType; import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.StringUtils; @@ -26,6 +31,12 @@ public class SpiderServiceImpl implements SpiderService { private static final HashMap PROPERTY_MAP = new HashMap<>(); + private final RedisUtils redisUtils; + + private final EmailSender emailSender; + + + @PostConstruct public void init() { for (SpiderProperty property : spiderConfig.getConfigs()) { @@ -74,4 +85,23 @@ public class SpiderServiceImpl implements SpiderService { .bodyToMono(JSONObject.class) .block(); } + + @Override + public JSONObject isExistCache(String spiderId, String email) { + JSONObject result = (JSONObject) redisUtils.get(spiderId + ":result"); + if (result != null) { + //获取文件路径 + String url = result.getJSONObject("data").getString("download_url"); + //拼接爬虫路径 + String path = PROPERTY_MAP.get(spiderId).getUrl().replace("/api/v1", "") + url; + //发送邮件给用户 + emailSender.send(email, new Notice(NoticeType.MESSAGE, false, true, false, false, "文件已到达", path, 0L)); + //构造响应体 + JSONObject response = new JSONObject(); + response.put("msg", "爬虫任务已接受"); + response.put("code", "0"); + return response; + } + return null; + } } diff --git a/src/main/java/com/zsc/edu/dify/modules/dify/service/SpiderService.java b/src/main/java/com/zsc/edu/dify/modules/dify/service/SpiderService.java index 12dd827..660fde9 100644 --- a/src/main/java/com/zsc/edu/dify/modules/dify/service/SpiderService.java +++ b/src/main/java/com/zsc/edu/dify/modules/dify/service/SpiderService.java @@ -13,4 +13,7 @@ public interface SpiderService { JSONObject logs(String spiderId); JSONObject stop(String spiderId); + + + JSONObject isExistCache(String spiderId, String email); } diff --git a/src/main/resources/application-dev.yml b/src/main/resources/application-dev.yml index 0fca2cd..f2358a5 100644 --- a/src/main/resources/application-dev.yml +++ b/src/main/resources/application-dev.yml @@ -83,16 +83,6 @@ dify: dataset: api-key: dataset-kN5WTJ8jR877YfN1A34JceVg # 请替换为实际的知识库api-key, 若不需要调用知识库可不填 -quanguo: &quanguo - spider-id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26} - url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1} - api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26} - -spider3: &spider3 - spider-id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c} - url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1} - api-key: - spider: configs: # 全国爬虫 @@ -103,5 +93,10 @@ spider: - id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c} url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1} api-key: + # 爬虫D + - id: ${SPIDERD_ID:e1f90005-8c42-4736-a598-9Bebe49c8e12} + url: http://${SPIDER3_HOST:47.112.173.8:6258/api/v1} + api-key: # - *quanguo # - *spider3 +# - *spiderD