Commit 4093e040 authored by pengxin's avatar pengxin

清洗数据调整。

parent f10a6ef0
...@@ -112,6 +112,11 @@ public class DatasetConstant { ...@@ -112,6 +112,11 @@ public class DatasetConstant {
*/ */
public static final String EMPTY_STR = ""; public static final String EMPTY_STR = "";
/**
* 空白字符
*/
public static final String NULL_STR = null;
/** /**
* 文本数据清洗 * 文本数据清洗
*/ */
......
...@@ -38,6 +38,14 @@ public interface DatasetDataService { ...@@ -38,6 +38,14 @@ public interface DatasetDataService {
*/ */
DatasetData view(String id, Long versionId); DatasetData view(String id, Long versionId);
/**
* 删除整个集合中的空文档数据集数据。
*
* @param versionId 版本标识。
* @return 返回受影响的行数。
*/
void deleteByData(Long versionId);
/** /**
* 查询列表集合总条数 * 查询列表集合总条数
* @param versionId 版本标识 * @param versionId 版本标识
......
...@@ -225,11 +225,16 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -225,11 +225,16 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
param.setPageNum(i); param.setPageNum(i);
param.setPageSize(pageSize); param.setPageSize(pageSize);
List<DatasetData> dataList = datasetDataService.list(datasetId, param); List<DatasetData> dataList = datasetDataService.list(datasetId, param);
//写入到数据集中
List<DatasetData> newDataList = dealWithDatasetNodeData(dataList, datasetId, rules); List<DatasetData> newDataList = dealWithDatasetNodeData(dataList, datasetId, rules);
if(CollUtil.isNotEmpty(newDataList)) { if(CollUtil.isNotEmpty(newDataList)) {
appendDataListToFile(datasetVersion.getFileUrl() ,newDataList); appendDataListToFile(datasetVersion.getFileUrl() ,newDataList);
} }
} }
//删除为空的数据集数据
this.datasetDataService.deleteByData(datasetId);
} }
} catch (Exception ex) { } catch (Exception ex) {
log.error("deal with task handler is error:" , ex); log.error("deal with task handler is error:" , ex);
...@@ -310,8 +315,10 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -310,8 +315,10 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
fileWriter = new FileWriter(filePath, true); fileWriter = new FileWriter(filePath, true);
// 遍历你的数据列表,并将每一条数据写入到文件中 // 遍历你的数据列表,并将每一条数据写入到文件中
for (DatasetData data : dataList) { for (DatasetData data : dataList) {
fileWriter.write(data.getData()); if(StringUtils.isNotBlank(data.getData())) {
fileWriter.write("\n"); fileWriter.write(data.getData());
fileWriter.write("\n");
}
} }
} catch (IOException e) { } catch (IOException e) {
log.error("file write close is errot", e); log.error("file write close is errot", e);
...@@ -338,6 +345,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -338,6 +345,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
List<DatasetData> newDataList = new ArrayList<>(); List<DatasetData> newDataList = new ArrayList<>();
try { try {
if(CollUtil.isNotEmpty(dataList)) { if(CollUtil.isNotEmpty(dataList)) {
ObjectMapper objectMapper = new ObjectMapper(); ObjectMapper objectMapper = new ObjectMapper();
for (DatasetData datasetData : dataList) { for (DatasetData datasetData : dataList) {
JsonNode rootNode = objectMapper.readTree(datasetData.getData()); JsonNode rootNode = objectMapper.readTree(datasetData.getData());
...@@ -347,9 +355,13 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -347,9 +355,13 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
//校验清洗后的数据是否满足条件,如果满足条件,则进行添加,否则直接移除。 //校验清洗后的数据是否满足条件,如果满足条件,则进行添加,否则直接移除。
if(StringUtils.isNotBlank(output)) { if(StringUtils.isNotBlank(output)) {
datasetData.setData(createNewDataNode(datasetData.getData(), output)); datasetData.setData(createNewDataNode(datasetData.getData(), output));
newDataList.add(datasetData); }else {
datasetData.setData(DatasetConstant.NULL_STR);
} }
newDataList.add(datasetData);
} }
//批量添加数据集列表
this.datasetDataService.updateBatch(newDataList, datasetId); this.datasetDataService.updateBatch(newDataList, datasetId);
} }
}catch (JsonProcessingException ex){ }catch (JsonProcessingException ex){
......
...@@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil; ...@@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.yice.common.core.object.MyPageParam; import com.yice.common.core.object.MyPageParam;
import com.yice.webadmin.app.constant.DatasetConstant;
import com.yice.webadmin.app.constant.MongoConstant; import com.yice.webadmin.app.constant.MongoConstant;
import com.yice.webadmin.app.data.DatasetData; import com.yice.webadmin.app.data.DatasetData;
import com.yice.webadmin.app.service.DatasetDataService; import com.yice.webadmin.app.service.DatasetDataService;
...@@ -163,7 +164,11 @@ public class DatasetDataServiceImpl implements DatasetDataService { ...@@ -163,7 +164,11 @@ public class DatasetDataServiceImpl implements DatasetDataService {
for(DatasetData datasetData : dataList) { for(DatasetData datasetData : dataList) {
// 解析data字段的字符串为Document或Bson // 解析data字段的字符串为Document或Bson
Document dataDocument = Document.parse(datasetData.getData()); Document dataDocument = null;
if(StringUtils.isNotBlank(datasetData.getData())) {
dataDocument = Document.parse(datasetData.getData());
}
// 构建查询条件 // 构建查询条件
Query query = new Query(Criteria.where(MongoConstant.ID).is(datasetData.getId())); Query query = new Query(Criteria.where(MongoConstant.ID).is(datasetData.getId()));
...@@ -174,6 +179,7 @@ public class DatasetDataServiceImpl implements DatasetDataService { ...@@ -174,6 +179,7 @@ public class DatasetDataServiceImpl implements DatasetDataService {
// 执行更新操作 // 执行更新操作
mongoTemplate.updateFirst(query, update, MongoConstant.COLLECT_NAME + versionId); mongoTemplate.updateFirst(query, update, MongoConstant.COLLECT_NAME + versionId);
} }
} }
} }
...@@ -202,4 +208,17 @@ public class DatasetDataServiceImpl implements DatasetDataService { ...@@ -202,4 +208,17 @@ public class DatasetDataServiceImpl implements DatasetDataService {
public void delete(Long versionId) { public void delete(Long versionId) {
mongoTemplate.dropCollection(MongoConstant.COLLECT_NAME + versionId); mongoTemplate.dropCollection(MongoConstant.COLLECT_NAME + versionId);
} }
/**
* 删除整个集合中的空文档数据集数据。
*
* @param versionId 版本标识。
* @return 返回受影响的行数。
*/
@Override
public void deleteByData(Long versionId) {
Query query = new Query();
query.addCriteria(Criteria.where(DatasetConstant.DATA).is(DatasetConstant.NULL_STR));
mongoTemplate.remove(query, MongoConstant.COLLECT_NAME + versionId);
}
} }
...@@ -7,6 +7,7 @@ import com.yice.webadmin.app.constant.DatasetCleanConstant; ...@@ -7,6 +7,7 @@ import com.yice.webadmin.app.constant.DatasetCleanConstant;
import com.yice.webadmin.app.constant.DatasetConstant; import com.yice.webadmin.app.constant.DatasetConstant;
import com.yice.webadmin.app.data.DatasetRule; import com.yice.webadmin.app.data.DatasetRule;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import java.util.*; import java.util.*;
import java.util.regex.Matcher; import java.util.regex.Matcher;
...@@ -37,6 +38,8 @@ public class DataCleanerUtil { ...@@ -37,6 +38,8 @@ public class DataCleanerUtil {
* @return 返回清洗后的数据 * @return 返回清洗后的数据
*/ */
public static String buildCleanAfterData(String data, List<DatasetRule> rules) { public static String buildCleanAfterData(String data, List<DatasetRule> rules) {
if(StringUtils.isEmpty(data)) return DatasetConstant.EMPTY_STR;
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (DatasetRule rule : rules) { for (DatasetRule rule : rules) {
if(rule.getArgs() > 0) { if(rule.getArgs() > 0) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment