Commit 21d6d730 authored by pengxin's avatar pengxin

发布数据集生成的JSON格式数据进行美化。

parent 9ff61528
...@@ -28,13 +28,7 @@ public interface DatasetCleanService extends IBaseService<DatasetClean, Long> { ...@@ -28,13 +28,7 @@ public interface DatasetCleanService extends IBaseService<DatasetClean, Long> {
* @param filePath 文件地址 * @param filePath 文件地址
* @param dataList 数据集列表 * @param dataList 数据集列表
*/ */
void appendDataListToFile(String filePath, List<DatasetData> dataList,Integer pageNum); void appendDataListToFile(String filePath, List<DatasetData> dataList);
/**
* 解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
* @param filePath 文件地址
*/
void readJsonAppendSymbol(String filePath);
/** /**
* 重新清洗任务 * 重新清洗任务
......
...@@ -5,6 +5,9 @@ import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper; ...@@ -5,6 +5,9 @@ import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.ObjectNode;
import com.github.pagehelper.Page; import com.github.pagehelper.Page;
import com.yice.common.core.base.dao.BaseDaoMapper; import com.yice.common.core.base.dao.BaseDaoMapper;
...@@ -34,6 +37,10 @@ import org.springframework.stereotype.Service; ...@@ -34,6 +37,10 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import java.io.*; import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.*; import java.util.*;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.stream.Collectors; import java.util.stream.Collectors;
...@@ -271,8 +278,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -271,8 +278,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* 解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[] * 解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
* @param filePath 文件地址 * @param filePath 文件地址
*/ */
@Override private void readJsonAppendSymbol(String filePath) {
public void readJsonAppendSymbol(String filePath) {
try (FileReader fileReader = new FileReader(filePath); try (FileReader fileReader = new FileReader(filePath);
BufferedReader bufferedReader = new BufferedReader(fileReader)) { BufferedReader bufferedReader = new BufferedReader(fileReader)) {
...@@ -315,40 +321,62 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -315,40 +321,62 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* @param dataList 数据集列表 * @param dataList 数据集列表
*/ */
@Override @Override
public void appendDataListToFile(String filePath, List<DatasetData> dataList,Integer pageNum) { public void appendDataListToFile(String filePath, List<DatasetData> dataList) {
FileWriter fileWriter = null; ObjectMapper objectMapper = new ObjectMapper();
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
Path path = Paths.get(filePath);
ArrayNode arrayNode;
try { try {
//为第一页的情况下 // 读取现有文件内容(如果存在)
if(pageNum == 1) { String existingJson = "";
// 清空文件内容 if (Files.exists(path)) {
File file = new File(filePath); existingJson = new String(Files.readAllBytes(path));
if (file.exists()) { // 去除可能存在的尾随逗号(如果有的话)
// 删除文件 existingJson = existingJson.trim();
file.delete(); if (existingJson.endsWith(",")) {
existingJson = existingJson.substring(0, existingJson.length() - 1);
} }
// 创建一个新的空文件 // 将JSON字符串转换为JsonNode
file.createNewFile(); JsonNode rootNode = objectMapper.readTree(existingJson);
// 如果rootNode是一个ArrayNode,则使用它
if (rootNode.isArray()) {
arrayNode = (ArrayNode) rootNode;
} else {
// 如果不是ArrayNode,则视为文件内容有误或空,创建新的ArrayNode
log.warn("Existing JSON file is not an array, creating a new one.");
arrayNode = JsonNodeFactory.instance.arrayNode();
}
} else {
// 如果文件不存在,创建新的ArrayNode
arrayNode = JsonNodeFactory.instance.arrayNode();
} }
fileWriter = new FileWriter(filePath, true); // 将新数据转换为JSON并添加到现有数组中
// 遍历你的数据列表,并将每一条数据写入到文件中
for (DatasetData data : dataList) { for (DatasetData data : dataList) {
if(StringUtils.isNotBlank(data.getData())) { String jsonString = data.getData();
fileWriter.write(data.getData() + ","); arrayNode.add(objectMapper.readTree(jsonString));
fileWriter.write("\n");
}
} }
} catch (IOException e) {
log.error("file write close is error", e); // 将整个JSON数组转换为格式化的字符串
} finally { String formattedJsonString = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(arrayNode);
// 如果fileWriter不为空,关闭它
if (fileWriter != null) { // 如果文件原本为空或者不是有效的JSON数组,我们需要写入完整的数组
try { if (existingJson.isEmpty() || ! arrayNode.isArray()) {
fileWriter.close(); Files.write(path, formattedJsonString.getBytes(), StandardOpenOption.CREATE);
} catch (IOException e) { } else {
log.error("file write close is error", e); // 否则,只追加新的元素(不带开头的"["和结尾的"]")
} String newElements = formattedJsonString.substring(1, formattedJsonString.length() - 1); // 去除开闭括号
Files.write(path, (existingJson + "," + newElements + "\n").getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
} }
log.info("Data appended to JSON file successfully.");
} catch (IOException e) {
log.error("Error appending to JSON file", e);
} }
} }
......
...@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service.impl; ...@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper; import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
...@@ -260,8 +261,8 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i ...@@ -260,8 +261,8 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
// 创建一个新的HashMap,只包含"output"和"instruction"字段 // 创建一个新的HashMap,只包含"output"和"instruction"字段
Map<String, String> map = new HashMap<>(); Map<String, String> map = new HashMap<>();
map.put(DatasetConstant.OUTPUT, output);
map.put(DatasetConstant.INSTRUCTION, instruction); map.put(DatasetConstant.INSTRUCTION, instruction);
map.put(DatasetConstant.OUTPUT, output);
// 使用pretty printer来格式化JSON字符串 // 使用pretty printer来格式化JSON字符串
String prettyJsonStr = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(map); String prettyJsonStr = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(map);
...@@ -339,6 +340,10 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i ...@@ -339,6 +340,10 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
cell.setCellValue(rootNode.get(DatasetConstant.INSTRUCTION).textValue()); cell.setCellValue(rootNode.get(DatasetConstant.INSTRUCTION).textValue());
cell = row.createCell(1); cell = row.createCell(1);
cell.setCellValue(rootNode.get(DatasetConstant.OUTPUT).textValue()); cell.setCellValue(rootNode.get(DatasetConstant.OUTPUT).textValue());
//根据内容自动填充列大小
sheet.autoSizeColumn(0);
sheet.autoSizeColumn(1);
} }
String fileName = null; String fileName = null;
InputStream inputStream = null; InputStream inputStream = null;
...@@ -377,6 +382,8 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i ...@@ -377,6 +382,8 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
InputStream inputStream = null; InputStream inputStream = null;
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()){ try (ByteArrayOutputStream baos = new ByteArrayOutputStream()){
PrintWriter pw = new PrintWriter(baos); PrintWriter pw = new PrintWriter(baos);
// 计算最大的宽度
int maxWidth = getMaxWidth(dataList);
// 构建表头数据 // 构建表头数据
pw.append(DatasetConstant.INSTRUCTION).append(",").append(DatasetConstant.OUTPUT).append("\n"); pw.append(DatasetConstant.INSTRUCTION).append(",").append(DatasetConstant.OUTPUT).append("\n");
...@@ -390,7 +397,7 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i ...@@ -390,7 +397,7 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
instruction = instruction.replace("\n", ""); instruction = instruction.replace("\n", "");
output = output.replace("\n", ""); output = output.replace("\n", "");
pw.append(output).append(",").append(instruction).append("\n"); pw.printf(maxWidth > 0 ? "%-" + maxWidth + "s,%-" + maxWidth + "s\n" : "%s,%s\n", output, instruction);
} }
pw.flush(); pw.flush();
// 转换为InputStream // 转换为InputStream
...@@ -411,6 +418,30 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i ...@@ -411,6 +418,30 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
return fileName; return fileName;
} }
/**
* 获取最大数据
* @param dataList 数据列表
* @return 最大长度
* @throws JsonProcessingException 异常
*/
private int getMaxWidth(List<DatasetData> dataList) throws JsonProcessingException {
int maxWidth = 0;
ObjectMapper objectMapper = new ObjectMapper();
for (DatasetData datasetData : dataList) {
JsonNode rootNode = objectMapper.readTree(datasetData.getData());
String instruction = rootNode.get(DatasetConstant.INSTRUCTION).textValue();
String output = rootNode.get(DatasetConstant.OUTPUT).textValue();
// 对包含换行符的字符串进行处理
instruction = instruction.replace("\n", "");
output = output.replace("\n", "");
maxWidth = Math.max(maxWidth, instruction.length());
maxWidth = Math.max(maxWidth, output.length());
}
return maxWidth;
}
/** /**
* 构建文件下载链接 * 构建文件下载链接
* @param fileUrl 文件地址 * @param fileUrl 文件地址
......
...@@ -583,7 +583,6 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long> ...@@ -583,7 +583,6 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
@Override @Override
public void doDealTaskHandler(Long datasetId, String fileUrl) { public void doDealTaskHandler(Long datasetId, String fileUrl) {
try { try {
Integer index = 0;
Long count = datasetDataService.count(datasetId); Long count = datasetDataService.count(datasetId);
if (count > 0) { if (count > 0) {
int pageSize = DatasetConstant.MAX_SIZE; int pageSize = DatasetConstant.MAX_SIZE;
...@@ -597,18 +596,9 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long> ...@@ -597,18 +596,9 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
//写入到数据集中 //写入到数据集中
if(CollUtil.isNotEmpty(dataList)) { if(CollUtil.isNotEmpty(dataList)) {
datasetCleanService.appendDataListToFile(fileUrl, dataList, i); datasetCleanService.appendDataListToFile(fileUrl, dataList);
index ++;
} }
} }
//解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
if(index > 0){
datasetCleanService.readJsonAppendSymbol(fileUrl);
}
//删除为空的数据集数据
this.datasetDataService.deleteByData(datasetId);
} }
} catch (Exception ex) { } catch (Exception ex) {
log.error("do deal with task handler is error:" , ex); log.error("do deal with task handler is error:" , ex);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment