Commit 4093e040 authored by pengxin's avatar pengxin

清洗数据调整。

parent f10a6ef0
......@@ -112,6 +112,11 @@ public class DatasetConstant {
*/
public static final String EMPTY_STR = "";
/**
* 空白字符
*/
public static final String NULL_STR = null;
/**
* 文本数据清洗
*/
......
......@@ -38,6 +38,14 @@ public interface DatasetDataService {
*/
DatasetData view(String id, Long versionId);
/**
* 删除整个集合中的空文档数据集数据。
*
* @param versionId 版本标识。
* @return 返回受影响的行数。
*/
void deleteByData(Long versionId);
/**
* 查询列表集合总条数
* @param versionId 版本标识
......
......@@ -225,11 +225,16 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
param.setPageNum(i);
param.setPageSize(pageSize);
List<DatasetData> dataList = datasetDataService.list(datasetId, param);
//写入到数据集中
List<DatasetData> newDataList = dealWithDatasetNodeData(dataList, datasetId, rules);
if(CollUtil.isNotEmpty(newDataList)) {
appendDataListToFile(datasetVersion.getFileUrl() ,newDataList);
}
}
//删除为空的数据集数据
this.datasetDataService.deleteByData(datasetId);
}
} catch (Exception ex) {
log.error("deal with task handler is error:" , ex);
......@@ -310,8 +315,10 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
fileWriter = new FileWriter(filePath, true);
// 遍历你的数据列表,并将每一条数据写入到文件中
for (DatasetData data : dataList) {
fileWriter.write(data.getData());
fileWriter.write("\n");
if(StringUtils.isNotBlank(data.getData())) {
fileWriter.write(data.getData());
fileWriter.write("\n");
}
}
} catch (IOException e) {
log.error("file write close is errot", e);
......@@ -338,6 +345,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
List<DatasetData> newDataList = new ArrayList<>();
try {
if(CollUtil.isNotEmpty(dataList)) {
ObjectMapper objectMapper = new ObjectMapper();
for (DatasetData datasetData : dataList) {
JsonNode rootNode = objectMapper.readTree(datasetData.getData());
......@@ -347,9 +355,13 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
//校验清洗后的数据是否满足条件,如果满足条件,则进行添加,否则直接移除。
if(StringUtils.isNotBlank(output)) {
datasetData.setData(createNewDataNode(datasetData.getData(), output));
newDataList.add(datasetData);
}else {
datasetData.setData(DatasetConstant.NULL_STR);
}
newDataList.add(datasetData);
}
//批量添加数据集列表
this.datasetDataService.updateBatch(newDataList, datasetId);
}
}catch (JsonProcessingException ex){
......
......@@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.yice.common.core.object.MyPageParam;
import com.yice.webadmin.app.constant.DatasetConstant;
import com.yice.webadmin.app.constant.MongoConstant;
import com.yice.webadmin.app.data.DatasetData;
import com.yice.webadmin.app.service.DatasetDataService;
......@@ -163,7 +164,11 @@ public class DatasetDataServiceImpl implements DatasetDataService {
for(DatasetData datasetData : dataList) {
// 解析data字段的字符串为Document或Bson
Document dataDocument = Document.parse(datasetData.getData());
Document dataDocument = null;
if(StringUtils.isNotBlank(datasetData.getData())) {
dataDocument = Document.parse(datasetData.getData());
}
// 构建查询条件
Query query = new Query(Criteria.where(MongoConstant.ID).is(datasetData.getId()));
......@@ -174,6 +179,7 @@ public class DatasetDataServiceImpl implements DatasetDataService {
// 执行更新操作
mongoTemplate.updateFirst(query, update, MongoConstant.COLLECT_NAME + versionId);
}
}
}
......@@ -202,4 +208,17 @@ public class DatasetDataServiceImpl implements DatasetDataService {
public void delete(Long versionId) {
mongoTemplate.dropCollection(MongoConstant.COLLECT_NAME + versionId);
}
/**
* 删除整个集合中的空文档数据集数据。
*
* @param versionId 版本标识。
* @return 返回受影响的行数。
*/
@Override
public void deleteByData(Long versionId) {
Query query = new Query();
query.addCriteria(Criteria.where(DatasetConstant.DATA).is(DatasetConstant.NULL_STR));
mongoTemplate.remove(query, MongoConstant.COLLECT_NAME + versionId);
}
}
......@@ -7,6 +7,7 @@ import com.yice.webadmin.app.constant.DatasetCleanConstant;
import com.yice.webadmin.app.constant.DatasetConstant;
import com.yice.webadmin.app.data.DatasetRule;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.regex.Matcher;
......@@ -37,6 +38,8 @@ public class DataCleanerUtil {
* @return 返回清洗后的数据
*/
public static String buildCleanAfterData(String data, List<DatasetRule> rules) {
if(StringUtils.isEmpty(data)) return DatasetConstant.EMPTY_STR;
StringBuilder sb = new StringBuilder();
for (DatasetRule rule : rules) {
if(rule.getArgs() > 0) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment