Commit e1d2a7fc authored by pengxin's avatar pengxin

完善数据集文件导入导出操作。

parent c79917f7
...@@ -65,6 +65,11 @@ ...@@ -65,6 +65,11 @@
<artifactId>core</artifactId> <artifactId>core</artifactId>
<version>3.4.1</version> <version>3.4.1</version>
</dependency> </dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>4.3</version>
</dependency>
<dependency> <dependency>
<groupId>com.github.houbb</groupId> <groupId>com.github.houbb</groupId>
<artifactId>opencc4j</artifactId> <artifactId>opencc4j</artifactId>
......
...@@ -12,6 +12,36 @@ public class DatasetConstant { ...@@ -12,6 +12,36 @@ public class DatasetConstant {
*/ */
public static final Integer INPUT_STATUS = 1; public static final Integer INPUT_STATUS = 1;
/**
* ".xlsx"数据格式
*/
public static final String XLSX_TYPE = ".xlsx";
/**
* ".xls"数据格式
*/
public static final String XLS_TYPE = ".xls";
/**
* ".cvs"数据格式
*/
public static final String CSV_TYPE = ".csv";
/**
* ".txt"数据格式
*/
public static final String TXT_TYPE = ".txt";
/**
* ".jsonl"数据格式
*/
public static final String JSONL_TYPE = ".jsonl";
/**
* "."符号
*/
public static final String DOT = ".";
/** /**
* 已发布状态 * 已发布状态
*/ */
...@@ -27,6 +57,11 @@ public class DatasetConstant { ...@@ -27,6 +57,11 @@ public class DatasetConstant {
*/ */
public static final Integer UNMARK = 0; public static final Integer UNMARK = 0;
/**
* JSON临时文件
*/
public static final String TEMP_JSON = "test.json";
/** /**
* 默认单次写入10000条数据 * 默认单次写入10000条数据
*/ */
...@@ -52,6 +87,11 @@ public class DatasetConstant { ...@@ -52,6 +87,11 @@ public class DatasetConstant {
*/ */
public static final String FILE_NAME = "output."; public static final String FILE_NAME = "output.";
/**
* 文件名称
*/
public static final String TEMP_FILE_NAME = "temp";
/** /**
* 导出位置 * 导出位置
*/ */
...@@ -67,6 +107,21 @@ public class DatasetConstant { ...@@ -67,6 +107,21 @@ public class DatasetConstant {
*/ */
public static final String OUTPUT = "output"; public static final String OUTPUT = "output";
/**
* output数据
*/
public static final String SHEET = "Sheet1";
/**
* instruction数据
*/
public static final String INSTRUCTION = "instruction";
/**
* instruction数据
*/
public static final String INPUT = "input";
/** /**
* args参数值 * args参数值
*/ */
......
...@@ -2,7 +2,6 @@ package com.yice.webadmin.app.controller; ...@@ -2,7 +2,6 @@ package com.yice.webadmin.app.controller;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.BooleanUtil; import cn.hutool.core.util.BooleanUtil;
import com.alibaba.fastjson.JSON;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ArrayNode;
...@@ -33,13 +32,11 @@ import org.springframework.core.io.UrlResource; ...@@ -33,13 +32,11 @@ import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders; import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Async;
import org.springframework.web.bind.annotation.*; import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.*; import java.util.*;
...@@ -280,8 +277,6 @@ public class DatasetVersionController { ...@@ -280,8 +277,6 @@ public class DatasetVersionController {
return ResponseResult.success(); return ResponseResult.success();
} }
@PostMapping(value = "/detail") @PostMapping(value = "/detail")
public ResponseResult<MyPageData<String>> detail(@MyRequestBody Long versionId, public ResponseResult<MyPageData<String>> detail(@MyRequestBody Long versionId,
@MyRequestBody MyPageParam pageParam) throws IOException { @MyRequestBody MyPageParam pageParam) throws IOException {
...@@ -321,30 +316,10 @@ public class DatasetVersionController { ...@@ -321,30 +316,10 @@ public class DatasetVersionController {
errorMessage = "数据验证失败,导入文件不能为空!"; errorMessage = "数据验证失败,导入文件不能为空!";
return ResponseResult.error(ErrorCodeEnum.ARGUMENT_NULL_EXIST, errorMessage); return ResponseResult.error(ErrorCodeEnum.ARGUMENT_NULL_EXIST, errorMessage);
} }
this.saveMongoDB(importFile,versionId); datasetVersionService.importFile(importFile,versionId);
DatasetVersion datasetVersion = this.datasetVersionService.getById(versionId);
datasetVersion.setInputStatus(DatasetConstant.INPUT_STATUS);
datasetVersion.setDataVolume(Long.valueOf(JSON.parseArray(new String(importFile.getBytes(), StandardCharsets.UTF_8)).size()));
this.datasetVersionService.updateById(datasetVersion);
return ResponseResult.success(); return ResponseResult.success();
} }
/**
* 写入到mongodb中。
*
* @param importFile 导入的文件。
* @return 保存的本地文件名。
*/
private void saveMongoDB(MultipartFile importFile, Long versionId) throws IOException {
try {
byte[] bytes = importFile.getBytes();
datasetVersionService.writeDatasetFile(bytes,importFile.getOriginalFilename(),versionId);
} catch (IOException e) {
log.error("Failed to save mongo db imported file [" + importFile.getOriginalFilename() + " ].", e);
throw e;
}
}
@GetMapping("/export") @GetMapping("/export")
public ResponseEntity<Resource> export(@RequestParam Long versionId) throws IOException { public ResponseEntity<Resource> export(@RequestParam Long versionId) throws IOException {
DatasetVersion datasetVersion = this.datasetVersionService.getById(versionId); DatasetVersion datasetVersion = this.datasetVersionService.getById(versionId);
......
...@@ -21,6 +21,13 @@ public interface DatasetDataService { ...@@ -21,6 +21,13 @@ public interface DatasetDataService {
*/ */
void save(DatasetData datasetData); void save(DatasetData datasetData);
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
List<DatasetData> list(Long versionId);
/** /**
* 开始清洗工作 * 开始清洗工作
* @param datasetId 清洗数据集 * @param datasetId 清洗数据集
......
...@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service; ...@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service;
import com.yice.common.core.base.service.IBaseService; import com.yice.common.core.base.service.IBaseService;
import com.yice.webadmin.app.model.DatasetVersion; import com.yice.webadmin.app.model.DatasetVersion;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
...@@ -29,6 +30,25 @@ public interface DatasetVersionService extends IBaseService<DatasetVersion, Long ...@@ -29,6 +30,25 @@ public interface DatasetVersionService extends IBaseService<DatasetVersion, Long
*/ */
void saveNewBatch(List<DatasetVersion> datasetVersionList); void saveNewBatch(List<DatasetVersion> datasetVersionList);
/**
* ==============================
* 1、分页处理每页10000条数据
* 2、分别写入到json文件中
* ==============================
* 处理数据集
* @param datasetId 清洗任务id
* @return 清洗列表
*/
void doDealTaskHandler(Long datasetId, String fileUrl);
/**
* 导入文件
*
* @param importFile 文件对象。
* @param versionId 版本标识。
*/
void importFile(MultipartFile importFile, Long versionId) throws IOException;
/** /**
* 写入json格式路径 * 写入json格式路径
* @param datasetVersion * @param datasetVersion
......
...@@ -83,6 +83,18 @@ public class DatasetDataServiceImpl implements DatasetDataService { ...@@ -83,6 +83,18 @@ public class DatasetDataServiceImpl implements DatasetDataService {
MongoConstant.COLLECT_NAME + versionId); MongoConstant.COLLECT_NAME + versionId);
} }
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
@Override
public List<DatasetData> list(Long versionId) {
Query query = new Query(Criteria.where(MongoConstant.VERSION).is(versionId));
return mongoTemplate.find(query, DatasetData.class,
MongoConstant.COLLECT_NAME + versionId);
}
/** /**
* 开始清洗工作 * 开始清洗工作
* @param datasetId 数据集对应的版本 * @param datasetId 数据集对应的版本
......
package com.yice.webadmin.app.util;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
public class JsonUtils {
/**
* 转成json格式数据
* @param object 将对象转成json格式数据
* @return 返回字符串
*/
public static String toJson(Object object) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(object);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
/**
* 读取单元格中的数据
* @param cell 单元格字段
* @return 返回读取单元格中的数据
*/
public static String getCellValueAsString(Cell cell) {
if (cell == null) {
return "";
}
switch (cell.getCellType()) {
case STRING:
return cell.getStringCellValue();
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
return cell.getDateCellValue().toString();
} else {
return Double.toString(cell.getNumericCellValue());
}
case BOOLEAN:
return Boolean.toString(cell.getBooleanCellValue());
case FORMULA:
return cell.getCellFormula();
default:
return "";
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment