feat(spider): 重构爬虫模块

- 新增 SpiderConfig 和 SpiderProperty 类来管理爬虫配置
- 新增 SpiderService 接口和 SpiderServiceImpl 实现类来处理爬虫相关操作
- 重构了爬虫运行、状态、日志和停止等功能
- 删除了旧的 SpiderStrategy、SpiderStrategyFactory 等类
- 更新了 application-dev.yml 中的爬虫配置
This commit is contained in:
vertoryao 2025-07-20 20:46:53 +08:00
parent 736673ede8
commit aeb978ef11
13 changed files with 161 additions and 274 deletions

View File

@ -1,12 +0,0 @@
package com.zsc.edu.dify.common.strategy;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
public interface SpiderStrategy {
JSONObject run(SpiderDto dto) throws JsonProcessingException;
String getUrl();
}

View File

@ -1,35 +0,0 @@
package com.zsc.edu.dify.common.strategy.factory;
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
import com.zsc.edu.dify.exception.NotExistException;
import jakarta.annotation.PostConstruct;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Component
public class SpiderStrategyFactory {
@Autowired
private List<SpiderStrategy> spiderUrlStrategyList;
private final Map<String, SpiderStrategy> spiderUrlStrategyMap = new HashMap<>();
@PostConstruct
public void init() {
spiderUrlStrategyMap.put("1", spiderUrlStrategyList.get(0));
spiderUrlStrategyMap.put("3", spiderUrlStrategyList.get(1));
}
public SpiderStrategy getSpiderStrategy(String strategyName) {
SpiderStrategy spiderUrlStrategy = spiderUrlStrategyMap.get(strategyName);
if (spiderUrlStrategy == null) {
throw new NotExistException(SpiderStrategy.class);
}
return spiderUrlStrategy;
}
}

View File

@ -1,43 +0,0 @@
package com.zsc.edu.dify.common.strategy.impl;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import jakarta.annotation.Resource;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Component;
import org.springframework.web.reactive.function.client.WebClient;
@Component("spider1")
public class Spider1StrategyImpl implements SpiderStrategy {
@Value("${quanguo.url}")
private String SPIDER_URL;
@Value("${quanguo.api-key}")
private String API_KEY;
@Resource
private ObjectMapper objectMapper;
@Override
public JSONObject run(SpiderDto dto) throws JsonProcessingException {
dto.setLlm_api_key(API_KEY);
String body = objectMapper.writeValueAsString(dto);
return WebClient.create(SPIDER_URL).post().uri("/start_crawl")
.contentType(MediaType.APPLICATION_JSON)
.bodyValue(body)
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@Override
public String getUrl() {
return SPIDER_URL;
}
}

View File

@ -1,29 +0,0 @@
package com.zsc.edu.dify.common.strategy.impl;
import com.alibaba.fastjson.JSONObject;
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Component;
import org.springframework.web.reactive.function.client.WebClient;
@Component("spider3")
public class Spider3StrategyImpl implements SpiderStrategy {
@Value("${spider3.url}")
private String SPIDER_URL;
@Override
public JSONObject run(SpiderDto dto) {
return WebClient.create(SPIDER_URL).post().uri("/start_crawl")
.contentType(MediaType.APPLICATION_JSON)
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@Override
public String getUrl() {
return SPIDER_URL;
}
}

View File

@ -0,0 +1,20 @@
package com.zsc.edu.dify.framework.spider;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import java.util.List;
/**
* @author Yao
*/
@Data
@Configuration
@EnableConfigurationProperties(SpiderConfig.class)
@ConfigurationProperties(prefix = "spider")
public class SpiderConfig {
private List<SpiderProperty> configs;
}

View File

@ -0,0 +1,10 @@
package com.zsc.edu.dify.framework.spider;
import lombok.Data;
@Data
public class SpiderProperty {
private String id;
private String url;
private String apiKey;
}

View File

@ -10,20 +10,16 @@ import com.zsc.edu.dify.modules.operationLog.entity.OperationLogAnnotation;
import io.github.guoshiqiufeng.dify.workflow.dto.request.WorkflowRunRequest;
import io.github.guoshiqiufeng.dify.workflow.dto.response.WorkflowRunResponse;
import jakarta.annotation.Resource;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/spider2")
public class Spider2Controller {
@RequestMapping("/api/spider/dify")
public class DifySpiderController {
@Resource
private DifyWorkflowService difyWorkflowService;
@Resource
private AppEntityService appEntityService;
/**
* 运行广州公共资源交易中心 招标小助手
*
@ -36,14 +32,4 @@ public class Spider2Controller {
request.setUserId(SecurityUtil.getUserInfo().id.toString());
return ExceptionUtil.difyException(() -> difyWorkflowService.run(request, appId));
}
/**
* 根据appType获取应用列表
* @return
*/
@GetMapping("/apps")
@DataPermission
public List<AppEntity> getAppsByAppType(){
return appEntityService.selectByAppType(AppEntity.AppType.SCRAPER.getValue());
}
}

View File

@ -1,51 +0,0 @@
package com.zsc.edu.dify.modules.dify.controller;
import com.zsc.edu.dify.exception.ExceptionUtil;
import com.zsc.edu.dify.framework.mybatisplus.DataPermission;
import com.zsc.edu.dify.framework.security.SecurityUtil;
import com.zsc.edu.dify.modules.dify.entity.AppEntity;
import com.zsc.edu.dify.modules.dify.service.AppEntityService;
import com.zsc.edu.dify.modules.dify.service.DifyWorkflowService;
import com.zsc.edu.dify.modules.operationLog.entity.OperationLogAnnotation;
import io.github.guoshiqiufeng.dify.workflow.dto.request.WorkflowRunRequest;
import io.github.guoshiqiufeng.dify.workflow.dto.response.WorkflowRunResponse;
import jakarta.annotation.Resource;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/ppt")
public class PPTController {
@Resource
private DifyWorkflowService difyWorkflowService;
@Resource
private AppEntityService appEntityService;
/**
* 运行从可研申报书生成科技项目PPT_工作流
*
* @param request
* @return
*/
@PostMapping("/run/{appId}")
@OperationLogAnnotation(content = "'dify工作流'", operationType = "运行")
public WorkflowRunResponse runWorkflow(@RequestBody WorkflowRunRequest request, @PathVariable String appId) {
request.setUserId(SecurityUtil.getUserInfo().id.toString());
return ExceptionUtil.difyException(() -> difyWorkflowService.run(request, appId));
}
/**
* 根据appType获取应用列表
* @return
*/
@GetMapping("/apps")
@DataPermission
public List<AppEntity> getAppsByAppType(){
return appEntityService.selectByAppType(AppEntity.AppType.PPT.getValue());
}
}

View File

@ -2,54 +2,39 @@ package com.zsc.edu.dify.modules.dify.controller;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
import com.zsc.edu.dify.common.strategy.factory.SpiderStrategyFactory;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import com.zsc.edu.dify.modules.dify.service.SpiderService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.reactive.function.client.WebClient;
/**
* @description: 自定义爬虫
* @author: yao
*/
@RestController
@RequestMapping("/api/spider")
public class SpiderController {
@Autowired
private SpiderStrategyFactory spiderStrategyFactory;
private SpiderService spiderService;
@PostMapping("/run")
public JSONObject run(@RequestBody(required = false) SpiderDto dto, @RequestParam String spiderId) throws JsonProcessingException {
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
return spiderStrategy.run(dto);
@PostMapping("/run/{spiderId}")
public JSONObject run(@RequestBody(required = false) SpiderDto dto, @PathVariable String spiderId) throws JsonProcessingException {
return spiderService.run(dto, spiderId);
}
@PostMapping("/status")
public JSONObject status(@RequestParam String spiderId) {
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
String url = spiderStrategy.getUrl();
return WebClient.create(url).post().uri("/crawl_status")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
@PostMapping("/status/{spiderId}")
public JSONObject status(@PathVariable String spiderId) {
return spiderService.status(spiderId);
}
@PostMapping("/logs")
public JSONObject logs(@RequestParam String spiderId) {
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
String url = spiderStrategy.getUrl();
return WebClient.create(url).post().uri("/logs")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
@PostMapping("/logs/{spiderId}")
public JSONObject logs(@PathVariable String spiderId) {
return spiderService.logs(spiderId);
}
@PostMapping("/stop")
public JSONObject stop(@RequestParam String spiderId) {
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
String url = spiderStrategy.getUrl();
return WebClient.create(url).post().uri("/stop_crawl")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
@PostMapping("/stop/{spiderId}")
public JSONObject stop(@PathVariable String spiderId) {
return spiderService.stop(spiderId);
}
}

View File

@ -1,51 +0,0 @@
package com.zsc.edu.dify.modules.dify.controller;
import com.zsc.edu.dify.exception.ExceptionUtil;
import com.zsc.edu.dify.framework.mybatisplus.DataPermission;
import com.zsc.edu.dify.framework.security.SecurityUtil;
import com.zsc.edu.dify.modules.dify.entity.AppEntity;
import com.zsc.edu.dify.modules.dify.service.AppEntityService;
import com.zsc.edu.dify.modules.operationLog.entity.OperationLogAnnotation;
import io.github.guoshiqiufeng.dify.chat.DifyChat;
import io.github.guoshiqiufeng.dify.chat.dto.request.ChatMessageSendRequest;
import io.github.guoshiqiufeng.dify.chat.dto.response.ChatMessageSendResponse;
import jakarta.annotation.Resource;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/api/word")
public class WordController {
@Resource
private DifyChat difyChat;
@Resource
private AppEntityService appEntityService;
/**
* 发送职业创新申报书_对话流路线
*
* @param sendRequest 消息参数 可以自定义参数调用 difyChat 实例时重新组装即可
* 用户 id可以改为从上下文token获取
* apikey 建议在数据库进行存储前端调用时传智能体 id从数据库查询
*/
@PostMapping("/completions/{appId}")
@OperationLogAnnotation(content = "'dify对话'", operationType = "发送")
public ChatMessageSendResponse sendChatMessage(@RequestBody ChatMessageSendRequest sendRequest, @PathVariable String appId){
sendRequest.setApiKey(appEntityService.getApikey(appId));
sendRequest.setUserId(SecurityUtil.getUserInfo().id.toString());
return ExceptionUtil.difyException(()->difyChat.send(sendRequest));
}
/**
* 根据appType获取应用列表
* @return
*/
@GetMapping("/apps")
@DataPermission
public List<AppEntity> getAppsByAppType(){
return appEntityService.selectByAppType(AppEntity.AppType.WORD.getValue());
}
}

View File

@ -0,0 +1,77 @@
package com.zsc.edu.dify.modules.dify.service.Impl;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.zsc.edu.dify.framework.spider.SpiderConfig;
import com.zsc.edu.dify.framework.spider.SpiderProperty;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
import com.zsc.edu.dify.modules.dify.service.SpiderService;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import java.util.HashMap;
@RequiredArgsConstructor
@Service
public class SpiderServiceImpl implements SpiderService {
private final ObjectMapper objectMapper;
private final SpiderConfig spiderConfig;
private static final HashMap<String, SpiderProperty> PROPERTY_MAP = new HashMap<>();
@PostConstruct
public void init() {
for (SpiderProperty property : spiderConfig.getConfigs()) {
PROPERTY_MAP.put(property.getId(), property);
}
}
@Override
public JSONObject run(SpiderDto dto, String spiderId) throws JsonProcessingException {
SpiderProperty property = PROPERTY_MAP.get(spiderId);
if (StringUtils.isNotBlank(property.getApiKey())) {
dto.setLlm_api_key(property.getApiKey());
}
String body = objectMapper.writeValueAsString(dto);
return WebClient.create(property.getUrl()).post().uri("/start_crawl")
.contentType(MediaType.APPLICATION_JSON)
.bodyValue(body)
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@Override
public JSONObject status(String spiderId) {
SpiderProperty property = PROPERTY_MAP.get(spiderId);
return WebClient.create(property.getUrl()).post().uri("/crawl_status")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@Override
public JSONObject logs(String spiderId) {
SpiderProperty property = PROPERTY_MAP.get(spiderId);
return WebClient.create(property.getUrl()).post().uri("/logs")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
@Override
public JSONObject stop(String spiderId) {
SpiderProperty property = PROPERTY_MAP.get(spiderId);
return WebClient.create(property.getUrl()).post().uri("/stop_crawl")
.retrieve()
.bodyToMono(JSONObject.class)
.block();
}
}

View File

@ -0,0 +1,16 @@
package com.zsc.edu.dify.modules.dify.service;
import com.alibaba.fastjson.JSONObject;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
public interface SpiderService {
JSONObject run(SpiderDto dto, String spiderId) throws JsonProcessingException;
JSONObject status(String spiderId);
JSONObject logs(String spiderId);
JSONObject stop(String spiderId);
}

View File

@ -83,11 +83,25 @@ dify:
dataset:
api-key: dataset-kN5WTJ8jR877YfN1A34JceVg # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
quanguo:
quanguo: &quanguo
spider-id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26}
url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1}
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
spider3:
url: http://${QUANGUO_HOST:47.112.173.8:6257/api/v1}
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
spider3: &spider3
spider-id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
api-key:
spider:
configs:
# 全国爬虫
- id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26}
url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1}
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
# 爬虫3
- id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
api-key:
# - *quanguo
# - *spider3