feat(spider): 重构爬虫模块
- 新增 SpiderConfig 和 SpiderProperty 类来管理爬虫配置 - 新增 SpiderService 接口和 SpiderServiceImpl 实现类来处理爬虫相关操作 - 重构了爬虫运行、状态、日志和停止等功能 - 删除了旧的 SpiderStrategy、SpiderStrategyFactory 等类 - 更新了 application-dev.yml 中的爬虫配置
This commit is contained in:
parent
736673ede8
commit
aeb978ef11
@ -1,12 +0,0 @@
|
||||
package com.zsc.edu.dify.common.strategy;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
|
||||
public interface SpiderStrategy {
|
||||
|
||||
JSONObject run(SpiderDto dto) throws JsonProcessingException;
|
||||
|
||||
String getUrl();
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
package com.zsc.edu.dify.common.strategy.factory;
|
||||
|
||||
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
|
||||
import com.zsc.edu.dify.exception.NotExistException;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Component
|
||||
public class SpiderStrategyFactory {
|
||||
|
||||
@Autowired
|
||||
private List<SpiderStrategy> spiderUrlStrategyList;
|
||||
|
||||
private final Map<String, SpiderStrategy> spiderUrlStrategyMap = new HashMap<>();
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
spiderUrlStrategyMap.put("1", spiderUrlStrategyList.get(0));
|
||||
spiderUrlStrategyMap.put("3", spiderUrlStrategyList.get(1));
|
||||
}
|
||||
|
||||
public SpiderStrategy getSpiderStrategy(String strategyName) {
|
||||
SpiderStrategy spiderUrlStrategy = spiderUrlStrategyMap.get(strategyName);
|
||||
if (spiderUrlStrategy == null) {
|
||||
throw new NotExistException(SpiderStrategy.class);
|
||||
}
|
||||
return spiderUrlStrategy;
|
||||
}
|
||||
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
package com.zsc.edu.dify.common.strategy.impl;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import jakarta.annotation.Resource;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
@Component("spider1")
|
||||
public class Spider1StrategyImpl implements SpiderStrategy {
|
||||
|
||||
@Value("${quanguo.url}")
|
||||
private String SPIDER_URL;
|
||||
|
||||
@Value("${quanguo.api-key}")
|
||||
private String API_KEY;
|
||||
|
||||
@Resource
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
|
||||
@Override
|
||||
public JSONObject run(SpiderDto dto) throws JsonProcessingException {
|
||||
dto.setLlm_api_key(API_KEY);
|
||||
String body = objectMapper.writeValueAsString(dto);
|
||||
return WebClient.create(SPIDER_URL).post().uri("/start_crawl")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.bodyValue(body)
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() {
|
||||
return SPIDER_URL;
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package com.zsc.edu.dify.common.strategy.impl;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
@Component("spider3")
|
||||
public class Spider3StrategyImpl implements SpiderStrategy {
|
||||
@Value("${spider3.url}")
|
||||
private String SPIDER_URL;
|
||||
|
||||
@Override
|
||||
public JSONObject run(SpiderDto dto) {
|
||||
return WebClient.create(SPIDER_URL).post().uri("/start_crawl")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() {
|
||||
return SPIDER_URL;
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package com.zsc.edu.dify.framework.spider;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Yao
|
||||
*/
|
||||
@Data
|
||||
@Configuration
|
||||
@EnableConfigurationProperties(SpiderConfig.class)
|
||||
@ConfigurationProperties(prefix = "spider")
|
||||
public class SpiderConfig {
|
||||
private List<SpiderProperty> configs;
|
||||
}
|
||||
|
@ -0,0 +1,10 @@
|
||||
package com.zsc.edu.dify.framework.spider;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class SpiderProperty {
|
||||
private String id;
|
||||
private String url;
|
||||
private String apiKey;
|
||||
}
|
@ -10,20 +10,16 @@ import com.zsc.edu.dify.modules.operationLog.entity.OperationLogAnnotation;
|
||||
import io.github.guoshiqiufeng.dify.workflow.dto.request.WorkflowRunRequest;
|
||||
import io.github.guoshiqiufeng.dify.workflow.dto.response.WorkflowRunResponse;
|
||||
import jakarta.annotation.Resource;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/spider2")
|
||||
public class Spider2Controller {
|
||||
@RequestMapping("/api/spider/dify")
|
||||
public class DifySpiderController {
|
||||
@Resource
|
||||
private DifyWorkflowService difyWorkflowService;
|
||||
|
||||
@Resource
|
||||
private AppEntityService appEntityService;
|
||||
|
||||
/**
|
||||
* 运行广州公共资源交易中心 招标小助手
|
||||
*
|
||||
@ -36,14 +32,4 @@ public class Spider2Controller {
|
||||
request.setUserId(SecurityUtil.getUserInfo().id.toString());
|
||||
return ExceptionUtil.difyException(() -> difyWorkflowService.run(request, appId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据appType获取应用列表
|
||||
* @return
|
||||
*/
|
||||
@GetMapping("/apps")
|
||||
@DataPermission
|
||||
public List<AppEntity> getAppsByAppType(){
|
||||
return appEntityService.selectByAppType(AppEntity.AppType.SCRAPER.getValue());
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
package com.zsc.edu.dify.modules.dify.controller;
|
||||
|
||||
import com.zsc.edu.dify.exception.ExceptionUtil;
|
||||
import com.zsc.edu.dify.framework.mybatisplus.DataPermission;
|
||||
import com.zsc.edu.dify.framework.security.SecurityUtil;
|
||||
import com.zsc.edu.dify.modules.dify.entity.AppEntity;
|
||||
import com.zsc.edu.dify.modules.dify.service.AppEntityService;
|
||||
import com.zsc.edu.dify.modules.dify.service.DifyWorkflowService;
|
||||
import com.zsc.edu.dify.modules.operationLog.entity.OperationLogAnnotation;
|
||||
import io.github.guoshiqiufeng.dify.workflow.dto.request.WorkflowRunRequest;
|
||||
import io.github.guoshiqiufeng.dify.workflow.dto.response.WorkflowRunResponse;
|
||||
import jakarta.annotation.Resource;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/ppt")
|
||||
public class PPTController {
|
||||
|
||||
@Resource
|
||||
private DifyWorkflowService difyWorkflowService;
|
||||
|
||||
@Resource
|
||||
private AppEntityService appEntityService;
|
||||
|
||||
/**
|
||||
* 运行从可研申报书生成科技项目PPT_工作流
|
||||
*
|
||||
* @param request
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("/run/{appId}")
|
||||
@OperationLogAnnotation(content = "'dify工作流'", operationType = "运行")
|
||||
public WorkflowRunResponse runWorkflow(@RequestBody WorkflowRunRequest request, @PathVariable String appId) {
|
||||
request.setUserId(SecurityUtil.getUserInfo().id.toString());
|
||||
return ExceptionUtil.difyException(() -> difyWorkflowService.run(request, appId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据appType获取应用列表
|
||||
* @return
|
||||
*/
|
||||
@GetMapping("/apps")
|
||||
@DataPermission
|
||||
public List<AppEntity> getAppsByAppType(){
|
||||
return appEntityService.selectByAppType(AppEntity.AppType.PPT.getValue());
|
||||
}
|
||||
|
||||
}
|
@ -2,54 +2,39 @@ package com.zsc.edu.dify.modules.dify.controller;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.zsc.edu.dify.common.strategy.SpiderStrategy;
|
||||
import com.zsc.edu.dify.common.strategy.factory.SpiderStrategyFactory;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import com.zsc.edu.dify.modules.dify.service.SpiderService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
/**
|
||||
* @description: 自定义爬虫
|
||||
* @author: yao
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/api/spider")
|
||||
public class SpiderController {
|
||||
|
||||
@Autowired
|
||||
private SpiderStrategyFactory spiderStrategyFactory;
|
||||
private SpiderService spiderService;
|
||||
|
||||
@PostMapping("/run")
|
||||
public JSONObject run(@RequestBody(required = false) SpiderDto dto, @RequestParam String spiderId) throws JsonProcessingException {
|
||||
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
|
||||
return spiderStrategy.run(dto);
|
||||
@PostMapping("/run/{spiderId}")
|
||||
public JSONObject run(@RequestBody(required = false) SpiderDto dto, @PathVariable String spiderId) throws JsonProcessingException {
|
||||
return spiderService.run(dto, spiderId);
|
||||
}
|
||||
|
||||
@PostMapping("/status")
|
||||
public JSONObject status(@RequestParam String spiderId) {
|
||||
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
|
||||
String url = spiderStrategy.getUrl();
|
||||
return WebClient.create(url).post().uri("/crawl_status")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
@PostMapping("/status/{spiderId}")
|
||||
public JSONObject status(@PathVariable String spiderId) {
|
||||
return spiderService.status(spiderId);
|
||||
}
|
||||
|
||||
@PostMapping("/logs")
|
||||
public JSONObject logs(@RequestParam String spiderId) {
|
||||
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
|
||||
String url = spiderStrategy.getUrl();
|
||||
return WebClient.create(url).post().uri("/logs")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
@PostMapping("/logs/{spiderId}")
|
||||
public JSONObject logs(@PathVariable String spiderId) {
|
||||
return spiderService.logs(spiderId);
|
||||
}
|
||||
|
||||
@PostMapping("/stop")
|
||||
public JSONObject stop(@RequestParam String spiderId) {
|
||||
SpiderStrategy spiderStrategy = spiderStrategyFactory.getSpiderStrategy(spiderId);
|
||||
String url = spiderStrategy.getUrl();
|
||||
return WebClient.create(url).post().uri("/stop_crawl")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
@PostMapping("/stop/{spiderId}")
|
||||
public JSONObject stop(@PathVariable String spiderId) {
|
||||
return spiderService.stop(spiderId);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,51 +0,0 @@
|
||||
package com.zsc.edu.dify.modules.dify.controller;
|
||||
|
||||
import com.zsc.edu.dify.exception.ExceptionUtil;
|
||||
import com.zsc.edu.dify.framework.mybatisplus.DataPermission;
|
||||
import com.zsc.edu.dify.framework.security.SecurityUtil;
|
||||
import com.zsc.edu.dify.modules.dify.entity.AppEntity;
|
||||
import com.zsc.edu.dify.modules.dify.service.AppEntityService;
|
||||
import com.zsc.edu.dify.modules.operationLog.entity.OperationLogAnnotation;
|
||||
import io.github.guoshiqiufeng.dify.chat.DifyChat;
|
||||
import io.github.guoshiqiufeng.dify.chat.dto.request.ChatMessageSendRequest;
|
||||
import io.github.guoshiqiufeng.dify.chat.dto.response.ChatMessageSendResponse;
|
||||
import jakarta.annotation.Resource;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/word")
|
||||
public class WordController {
|
||||
|
||||
@Resource
|
||||
private DifyChat difyChat;
|
||||
@Resource
|
||||
private AppEntityService appEntityService;
|
||||
|
||||
/**
|
||||
* 发送职业创新申报书_对话流路线
|
||||
*
|
||||
* @param sendRequest 消息参数 (可以自定义参数,调用 difyChat 实例时重新组装即可),
|
||||
* 用户 id可以改为从上下文(token)获取,
|
||||
* apikey 建议在数据库进行存储,前端调用时传智能体 id,从数据库查询
|
||||
*/
|
||||
@PostMapping("/completions/{appId}")
|
||||
@OperationLogAnnotation(content = "'dify对话'", operationType = "发送")
|
||||
public ChatMessageSendResponse sendChatMessage(@RequestBody ChatMessageSendRequest sendRequest, @PathVariable String appId){
|
||||
sendRequest.setApiKey(appEntityService.getApikey(appId));
|
||||
sendRequest.setUserId(SecurityUtil.getUserInfo().id.toString());
|
||||
return ExceptionUtil.difyException(()->difyChat.send(sendRequest));
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据appType获取应用列表
|
||||
* @return
|
||||
*/
|
||||
@GetMapping("/apps")
|
||||
@DataPermission
|
||||
public List<AppEntity> getAppsByAppType(){
|
||||
return appEntityService.selectByAppType(AppEntity.AppType.WORD.getValue());
|
||||
}
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
package com.zsc.edu.dify.modules.dify.service.Impl;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.zsc.edu.dify.framework.spider.SpiderConfig;
|
||||
import com.zsc.edu.dify.framework.spider.SpiderProperty;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
import com.zsc.edu.dify.modules.dify.service.SpiderService;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@Service
|
||||
public class SpiderServiceImpl implements SpiderService {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
private final SpiderConfig spiderConfig;
|
||||
|
||||
private static final HashMap<String, SpiderProperty> PROPERTY_MAP = new HashMap<>();
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
for (SpiderProperty property : spiderConfig.getConfigs()) {
|
||||
PROPERTY_MAP.put(property.getId(), property);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject run(SpiderDto dto, String spiderId) throws JsonProcessingException {
|
||||
SpiderProperty property = PROPERTY_MAP.get(spiderId);
|
||||
if (StringUtils.isNotBlank(property.getApiKey())) {
|
||||
dto.setLlm_api_key(property.getApiKey());
|
||||
}
|
||||
String body = objectMapper.writeValueAsString(dto);
|
||||
return WebClient.create(property.getUrl()).post().uri("/start_crawl")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.bodyValue(body)
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject status(String spiderId) {
|
||||
SpiderProperty property = PROPERTY_MAP.get(spiderId);
|
||||
return WebClient.create(property.getUrl()).post().uri("/crawl_status")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject logs(String spiderId) {
|
||||
SpiderProperty property = PROPERTY_MAP.get(spiderId);
|
||||
return WebClient.create(property.getUrl()).post().uri("/logs")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject stop(String spiderId) {
|
||||
SpiderProperty property = PROPERTY_MAP.get(spiderId);
|
||||
return WebClient.create(property.getUrl()).post().uri("/stop_crawl")
|
||||
.retrieve()
|
||||
.bodyToMono(JSONObject.class)
|
||||
.block();
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.zsc.edu.dify.modules.dify.service;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.zsc.edu.dify.modules.dify.dto.SpiderDto;
|
||||
|
||||
public interface SpiderService {
|
||||
|
||||
JSONObject run(SpiderDto dto, String spiderId) throws JsonProcessingException;
|
||||
|
||||
JSONObject status(String spiderId);
|
||||
|
||||
JSONObject logs(String spiderId);
|
||||
|
||||
JSONObject stop(String spiderId);
|
||||
}
|
@ -83,11 +83,25 @@ dify:
|
||||
dataset:
|
||||
api-key: dataset-kN5WTJ8jR877YfN1A34JceVg # 请替换为实际的知识库api-key, 若不需要调用知识库可不填
|
||||
|
||||
quanguo:
|
||||
quanguo: &quanguo
|
||||
spider-id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1}
|
||||
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
|
||||
spider3:
|
||||
url: http://${QUANGUO_HOST:47.112.173.8:6257/api/v1}
|
||||
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
spider3: &spider3
|
||||
spider-id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
|
||||
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
|
||||
api-key:
|
||||
|
||||
spider:
|
||||
configs:
|
||||
# 全国爬虫
|
||||
- id: ${QUANGUO_ID:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
url: http://${QUANGUO_HOST:47.112.173.8:6806/api/v1}
|
||||
api-key: ${QUANGUO_API_KEY:77c068fd-d5b6-4c33-97d8-db5511a09b26}
|
||||
# 爬虫3
|
||||
- id: ${SPIDER3_ID:f3a7b9c2-5d6e-4b8f-9c1a-2d3e4f5a6b7c}
|
||||
url: http://${SPIDER3_HOST:47.112.173.8:6257/api/v1}
|
||||
api-key:
|
||||
# - *quanguo
|
||||
# - *spider3
|
||||
|
Loading…
Reference in New Issue
Block a user