Commit e1d2a7fc authored by pengxin's avatar pengxin

完善数据集文件导入导出操作。

parent c79917f7
......@@ -65,6 +65,11 @@
<artifactId>core</artifactId>
<version>3.4.1</version>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>4.3</version>
</dependency>
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>opencc4j</artifactId>
......
......@@ -12,6 +12,36 @@ public class DatasetConstant {
*/
public static final Integer INPUT_STATUS = 1;
/**
* ".xlsx"数据格式
*/
public static final String XLSX_TYPE = ".xlsx";
/**
* ".xls"数据格式
*/
public static final String XLS_TYPE = ".xls";
/**
* ".cvs"数据格式
*/
public static final String CSV_TYPE = ".csv";
/**
* ".txt"数据格式
*/
public static final String TXT_TYPE = ".txt";
/**
* ".jsonl"数据格式
*/
public static final String JSONL_TYPE = ".jsonl";
/**
* "."符号
*/
public static final String DOT = ".";
/**
* 已发布状态
*/
......@@ -27,6 +57,11 @@ public class DatasetConstant {
*/
public static final Integer UNMARK = 0;
/**
* JSON临时文件
*/
public static final String TEMP_JSON = "test.json";
/**
* 默认单次写入10000条数据
*/
......@@ -52,6 +87,11 @@ public class DatasetConstant {
*/
public static final String FILE_NAME = "output.";
/**
* 文件名称
*/
public static final String TEMP_FILE_NAME = "temp";
/**
* 导出位置
*/
......@@ -67,6 +107,21 @@ public class DatasetConstant {
*/
public static final String OUTPUT = "output";
/**
* output数据
*/
public static final String SHEET = "Sheet1";
/**
* instruction数据
*/
public static final String INSTRUCTION = "instruction";
/**
* instruction数据
*/
public static final String INPUT = "input";
/**
* args参数值
*/
......
......@@ -2,7 +2,6 @@ package com.yice.webadmin.app.controller;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.BooleanUtil;
import com.alibaba.fastjson.JSON;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
......@@ -33,13 +32,11 @@ import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Async;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
......@@ -280,8 +277,6 @@ public class DatasetVersionController {
return ResponseResult.success();
}
@PostMapping(value = "/detail")
public ResponseResult<MyPageData<String>> detail(@MyRequestBody Long versionId,
@MyRequestBody MyPageParam pageParam) throws IOException {
......@@ -321,30 +316,10 @@ public class DatasetVersionController {
errorMessage = "数据验证失败,导入文件不能为空!";
return ResponseResult.error(ErrorCodeEnum.ARGUMENT_NULL_EXIST, errorMessage);
}
this.saveMongoDB(importFile,versionId);
DatasetVersion datasetVersion = this.datasetVersionService.getById(versionId);
datasetVersion.setInputStatus(DatasetConstant.INPUT_STATUS);
datasetVersion.setDataVolume(Long.valueOf(JSON.parseArray(new String(importFile.getBytes(), StandardCharsets.UTF_8)).size()));
this.datasetVersionService.updateById(datasetVersion);
datasetVersionService.importFile(importFile,versionId);
return ResponseResult.success();
}
/**
* 写入到mongodb中。
*
* @param importFile 导入的文件。
* @return 保存的本地文件名。
*/
private void saveMongoDB(MultipartFile importFile, Long versionId) throws IOException {
try {
byte[] bytes = importFile.getBytes();
datasetVersionService.writeDatasetFile(bytes,importFile.getOriginalFilename(),versionId);
} catch (IOException e) {
log.error("Failed to save mongo db imported file [" + importFile.getOriginalFilename() + " ].", e);
throw e;
}
}
@GetMapping("/export")
public ResponseEntity<Resource> export(@RequestParam Long versionId) throws IOException {
DatasetVersion datasetVersion = this.datasetVersionService.getById(versionId);
......
......@@ -21,6 +21,13 @@ public interface DatasetDataService {
*/
void save(DatasetData datasetData);
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
List<DatasetData> list(Long versionId);
/**
* 开始清洗工作
* @param datasetId 清洗数据集
......
......@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service;
import com.yice.common.core.base.service.IBaseService;
import com.yice.webadmin.app.model.DatasetVersion;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.util.List;
......@@ -29,6 +30,25 @@ public interface DatasetVersionService extends IBaseService<DatasetVersion, Long
*/
void saveNewBatch(List<DatasetVersion> datasetVersionList);
/**
* ==============================
* 1、分页处理每页10000条数据
* 2、分别写入到json文件中
* ==============================
* 处理数据集
* @param datasetId 清洗任务id
* @return 清洗列表
*/
void doDealTaskHandler(Long datasetId, String fileUrl);
/**
* 导入文件
*
* @param importFile 文件对象。
* @param versionId 版本标识。
*/
void importFile(MultipartFile importFile, Long versionId) throws IOException;
/**
* 写入json格式路径
* @param datasetVersion
......
......@@ -83,6 +83,18 @@ public class DatasetDataServiceImpl implements DatasetDataService {
MongoConstant.COLLECT_NAME + versionId);
}
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
@Override
public List<DatasetData> list(Long versionId) {
Query query = new Query(Criteria.where(MongoConstant.VERSION).is(versionId));
return mongoTemplate.find(query, DatasetData.class,
MongoConstant.COLLECT_NAME + versionId);
}
/**
* 开始清洗工作
* @param datasetId 数据集对应的版本
......
package com.yice.webadmin.app.util;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
public class JsonUtils {
/**
* 转成json格式数据
* @param object 将对象转成json格式数据
* @return 返回字符串
*/
public static String toJson(Object object) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(object);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
/**
* 读取单元格中的数据
* @param cell 单元格字段
* @return 返回读取单元格中的数据
*/
public static String getCellValueAsString(Cell cell) {
if (cell == null) {
return "";
}
switch (cell.getCellType()) {
case STRING:
return cell.getStringCellValue();
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
return cell.getDateCellValue().toString();
} else {
return Double.toString(cell.getNumericCellValue());
}
case BOOLEAN:
return Boolean.toString(cell.getBooleanCellValue());
case FORMULA:
return cell.getCellFormula();
default:
return "";
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment