feat(dify):

- 新增爬虫结果缓存
- 添加邮件通知功能,当爬虫完成时发送邮件给用户
- 实现爬虫启动事件和监听器,用于启动轮询服务
- 优化 application-dev.yml,移除不必要的配置项
-移除原本的爬虫3
This commit is contained in:
vivid 2025-07-24 18:24:02 +08:00
parent aeb978ef11
commit d0953a1af2
9 changed files with 183 additions and 73 deletions

View File

@ -0,0 +1,25 @@
package com.zsc.edu.dify.framework.event;
import lombok.RequiredArgsConstructor;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Component;
/**
* @description 事件监听者
* @author vivid
*
* */
@Component
@RequiredArgsConstructor
public class EventListener {
private final SpiderPollingService spiderPollingService;
/**
* 监听爬虫启动事件
* */
@org.springframework.context.event.EventListener
public void handleStartPollingEvent(SpiderStartPollingEvent event) {
spiderPollingService.startPolling(event.getSpiderId(), event.getEmail());
}
}

View File

@ -0,0 +1,73 @@
package com.zsc.edu.dify.framework.event;
import com.alibaba.fastjson.JSONObject;
import com.zsc.edu.dify.common.util.RedisUtils;
import com.zsc.edu.dify.framework.message.email.EmailSender;
import com.zsc.edu.dify.framework.spider.SpiderConfig;
import com.zsc.edu.dify.framework.spider.SpiderProperty;
import com.zsc.edu.dify.modules.dify.service.SpiderService;
import com.zsc.edu.dify.modules.message.entity.Notice;
import com.zsc.edu.dify.modules.message.entity.NoticeType;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
/**
* @description 轮询查看爬虫结果
* @author vivid
* */
@Service
@RequiredArgsConstructor
public class SpiderPollingService {
private ScheduledExecutorService scheduler;
private final RedisUtils redisUtils;
private final SpiderService spiderService;
private final EmailSender emailSender;
private final SpiderConfig spiderConfig;
private static final HashMap<String, SpiderProperty> PROPERTY_MAP = new HashMap<>();
@PostConstruct
public void init() {
for (SpiderProperty property : spiderConfig.getConfigs()) {
PROPERTY_MAP.put(property.getId(), property);
}
}
public void startPolling(String spiderId, String email) {
scheduler = Executors.newSingleThreadScheduledExecutor();
scheduler.scheduleAtFixedRate(() -> {
executeBusinessLogic(spiderId, email);
}, 400, 3000, TimeUnit.MILLISECONDS);
}
private void executeBusinessLogic(String spiderId, String email) {
JSONObject jsonObject = spiderService.status(spiderId);
JSONObject data = jsonObject.getJSONObject("data");
if (!data.getBoolean("is_running")) {
//获取文件
String url = data.getString("download_url");
//添加爬虫请求头并且将前缀去掉
String path = PROPERTY_MAP.get(spiderId).getUrl().replace("/api/v1", "") + url;
//发送邮件给用户
emailSender.send(email, new Notice(NoticeType.MESSAGE, false, true, false, false, "文件已到达", path, 0L));
//将整个对象写入redis
redisUtils.set(spiderId + ":result", jsonObject, 60 * 60 * 24);
//停止任务
scheduler.shutdown();
}
}
public void stopPolling() {
scheduler.shutdown();
}
}

View File

@ -0,0 +1,23 @@
package com.zsc.edu.dify.framework.event;
import lombok.Getter;
import org.springframework.context.ApplicationEvent;
/**
*
* @description 定义爬虫启动的事件
* @author vivid
*
* */
@Getter
public class SpiderStartPollingEvent extends ApplicationEvent {
private String spiderId;
private String email;
public SpiderStartPollingEvent(Object source, String spiderId, String email) {
super(source);
this.spiderId = spiderId;
this.email = email;
}
}

View File

@ -1,62 +0,0 @@
package com.zsc.edu.dify.modules.dify.controller;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import jakarta.annotation.Resource;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.reactive.function.client.WebClient;
@RestController
@RequestMapping("/api/spider3")
public class Spider3Controller {
@Resource
private ObjectMapper objectMapper;
@Value("${spider3.url}")
private String SPIDER_URL;
@Value("${spider3.api-key}")
private String API_KEY;
@PostMapping("/run")
public JSONObject run() throws JsonProcessingException {
return WebClient.create(SPIDER_URL).post().uri("/start_crawl")
.contentType(MediaType.APPLICATION_JSON)
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@PostMapping("/status")
public JSONObject status() {
return WebClient.create(SPIDER_URL).post().uri("/crawl_status")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@PostMapping("/logs")
public JSONObject logs() {
return WebClient.create(SPIDER_URL).post().uri("/logs")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@PostMapping("/stop")
public JSONObject stop() {
return WebClient.create(SPIDER_URL).post().uri("/stop_crawl")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
}

View File

@ -2,10 +2,15 @@ package com.zsc.edu.dify.modules.dify.controller;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.zsc.edu.dify.framework.event.SpiderPollingService;
import com.zsc.edu.dify.framework.event.SpiderStartPollingEvent;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import com.zsc.edu.dify.modules.dify.service.SpiderService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.web.bind.annotation.*;
/**
* @description: 自定义爬虫
* @author: yao
@ -17,9 +22,23 @@ public class SpiderController {
@Autowired
private SpiderService spiderService;
@Autowired
private ApplicationEventPublisher applicationEventPublisher;
@Autowired
private SpiderPollingService spiderPollingService;
@PostMapping("/run/{spiderId}")
public JSONObject run(@RequestBody(required = false) SpiderDto dto, @PathVariable String spiderId) throws JsonProcessingException {
return spiderService.run(dto, spiderId);
JSONObject data = spiderService.isExistCache(spiderId, dto.getEmail());
if (data != null) {
// 如果缓存存在直接返回不执行 spiderService.run()
return data;
} else {
// 如果缓存不存在正常执行并且发布事件
applicationEventPublisher.publishEvent(new SpiderStartPollingEvent(this, spiderId, dto.getEmail()));
return spiderService.run(dto, spiderId);
}
}
@PostMapping("/status/{spiderId}")
@ -34,6 +53,7 @@ public class SpiderController {
@PostMapping("/stop/{spiderId}")
public JSONObject stop(@PathVariable String spiderId) {
spiderPollingService.stopPolling();
return spiderService.stop(spiderId);
}

View File

@ -1,5 +1,6 @@
package com.zsc.edu.dify.modules.dify.dto;
import jakarta.validation.constraints.Email;
import jakarta.validation.constraints.NotBlank;
import lombok.Data;
@ -12,4 +13,6 @@ public class SpiderDto {
private String keyword;
private Integer[] site_codes;
private String llm_api_key;
@Email
private String email;
}

View File

@ -3,10 +3,15 @@ package com.zsc.edu.dify.modules.dify.service.Impl;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.zsc.edu.dify.common.util.RedisUtils;
import com.zsc.edu.dify.framework.event.SpiderPollingService;
import com.zsc.edu.dify.framework.message.email.EmailSender;
import com.zsc.edu.dify.framework.spider.SpiderConfig;
import com.zsc.edu.dify.framework.spider.SpiderProperty;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import com.zsc.edu.dify.modules.dify.service.SpiderService;
import com.zsc.edu.dify.modules.message.entity.Notice;
import com.zsc.edu.dify.modules.message.entity.NoticeType;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
@ -26,6 +31,12 @@ public class SpiderServiceImpl implements SpiderService {
private static final HashMap<String, SpiderProperty> PROPERTY_MAP = new HashMap<>();
private final RedisUtils redisUtils;
private final EmailSender emailSender;
@PostConstruct
public void init() {
for (SpiderProperty property : spiderConfig.getConfigs()) {
@ -74,4 +85,23 @@ public class SpiderServiceImpl implements SpiderService {
.bodyToMono(JSONObject.class)
.block();
}
@Override
public JSONObject isExistCache(String spiderId, String email) {
JSONObject result = (JSONObject) redisUtils.get(spiderId + ":result");
if (result != null) {
//获取文件路径
String url = result.getJSONObject("data").getString("download_url");
//拼接爬虫路径
String path = PROPERTY_MAP.get(spiderId).getUrl().replace("/api/v1", "") + url;
//发送邮件给用户
emailSender.send(email, new Notice(NoticeType.MESSAGE, false, true, false, false, "文件已到达", path, 0L));
//构造响应体
JSONObject response = new JSONObject();
response.put("msg", "爬虫任务已接受");
response.put("code", "0");
return response;
}
return null;
}
}

View File

@ -13,4 +13,7 @@ public interface SpiderService {
JSONObject logs(String spiderId);
JSONObject stop(String spiderId);
JSONObject isExistCache(String spiderId, String email);
}

View File

@ -83,16 +83,6 @@ dify:
dataset:
api-key: dataset-kN5WTJ8jR877YfN1A34JceVg # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
quanguo: &quanguo
spider-id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26}
url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1}
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
spider3: &spider3
spider-id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
api-key:
spider:
configs:
# 全国爬虫
@ -103,5 +93,10 @@ spider:
- id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
api-key:
# 爬虫D
- id: ${SPIDERD_ID:e1f90005-8c42-4736-a598-9Bebe49c8e12}
url: http://${SPIDER3_HOST:47.112.173.8:6258/api/v1}
api-key:
# - *quanguo
# - *spider3
# - *spiderD