Commit f19150e5 authored by pengxin's avatar pengxin

导出出错问题修改。

parent 74073a00
...@@ -125,13 +125,20 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -125,13 +125,20 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
public Future<Void> executeCleanTaskAsync(List<DatasetData> dataList, Long cleanId, Long datasetId) { public Future<Void> executeCleanTaskAsync(List<DatasetData> dataList, Long cleanId, Long datasetId) {
asyncDealWithDatasetSaveBatch(dataList, cleanId); asyncDealWithDatasetSaveBatch(dataList, cleanId);
dealWithTaskHandler(datasetId,cleanId); dealWithTaskHandler(datasetId,cleanId);
updateCleanStatus(cleanId,DatasetConstant.CLEAN_FINISHED);
return new AsyncResult<>(null);
}
/**
* 更新清洗状态
* @param cleanId 清洗标识
*/
private void updateCleanStatus (Long cleanId,Integer cleanStatus) {
DatasetClean filter = new DatasetClean(); DatasetClean filter = new DatasetClean();
filter.setCleanStatus(DatasetConstant.CLEAN_FINISHED); filter.setCleanStatus(cleanStatus);
filter.setFinishTime(new Date()); filter.setFinishTime(new Date());
filter.setCleanId(cleanId); filter.setCleanId(cleanId);
this.updateById(filter); this.updateById(filter);
return new AsyncResult<>(null);
} }
/** /**
...@@ -175,12 +182,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -175,12 +182,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
if (future != null && !future.isDone()) { if (future != null && !future.isDone()) {
future.cancel(true); future.cancel(true);
} }
//暂停清洗 updateCleanStatus(cleanId,DatasetConstant.PAUSE_FINISHED);
DatasetClean filter = new DatasetClean();
filter.setCleanStatus(DatasetConstant.PAUSE_FINISHED);
filter.setFinishTime(null);
filter.setCleanId(cleanId);
this.updateById(filter);
} }
/** /**
...@@ -193,12 +195,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -193,12 +195,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
if(null != clean){ if(null != clean){
doDatasetCleanHandler(clean.getDatasetId(), cleanId); doDatasetCleanHandler(clean.getDatasetId(), cleanId);
} }
//重新清洗 updateCleanStatus(cleanId,DatasetConstant.CLEAN_PROGRESS);
DatasetClean filter = new DatasetClean();
filter.setCleanStatus(DatasetConstant.CLEAN_PROGRESS);
filter.setFinishTime(null);
filter.setCleanId(cleanId);
this.updateById(filter);
} }
/** /**
...@@ -207,6 +204,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -207,6 +204,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* 1、分页处理每页10000条数据 * 1、分页处理每页10000条数据
* 2、更新Mongodb数据库中的数据 * 2、更新Mongodb数据库中的数据
* 3、更新json存储地址的数据集数据 * 3、更新json存储地址的数据集数据
* 4、更新版本数据集状态
* ============================== * ==============================
* 处理数据集 * 处理数据集
* @param cleanId 清洗任务id * @param cleanId 清洗任务id
...@@ -233,7 +231,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -233,7 +231,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
//写入到数据集中 //写入到数据集中
List<DatasetData> newDataList = dealWithDatasetNodeData(dataList, datasetId, rules); List<DatasetData> newDataList = dealWithDatasetNodeData(dataList, datasetId, rules);
if(CollUtil.isNotEmpty(newDataList)) { if(CollUtil.isNotEmpty(newDataList)) {
appendDataListToFile(datasetVersion.getFileUrl() ,newDataList); appendDataListToFile(datasetVersion.getFileUrl() ,newDataList, i);
} }
} }
...@@ -313,17 +311,31 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp ...@@ -313,17 +311,31 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* @param filePath 文件地址 * @param filePath 文件地址
* @param dataList 数据集列表 * @param dataList 数据集列表
*/ */
public void appendDataListToFile(String filePath, List<DatasetData> dataList) { public void appendDataListToFile(String filePath, List<DatasetData> dataList,Integer pageNum) {
FileWriter fileWriter = null; FileWriter fileWriter = null;
try { try {
//为第一页的情况下
if(pageNum == 1) {
// 清空文件内容
File file = new File(filePath);
if (file.exists()) {
// 删除文件
file.delete();
}
// 创建一个新的空文件
file.createNewFile();
}
fileWriter = new FileWriter(filePath, true); fileWriter = new FileWriter(filePath, true);
fileWriter.write("[");
// 遍历你的数据列表,并将每一条数据写入到文件中 // 遍历你的数据列表,并将每一条数据写入到文件中
for (DatasetData data : dataList) { for (DatasetData data : dataList) {
if(StringUtils.isNotBlank(data.getData())) { if(StringUtils.isNotBlank(data.getData())) {
fileWriter.write(data.getData()); fileWriter.write(data.getData() + ",");
fileWriter.write("\n"); fileWriter.write("\n");
} }
} }
fileWriter.write("]");
} catch (IOException e) { } catch (IOException e) {
log.error("file write close is errot", e); log.error("file write close is errot", e);
} finally { } finally {
......
...@@ -144,7 +144,7 @@ public class DataCleanerUtil { ...@@ -144,7 +144,7 @@ public class DataCleanerUtil {
Matcher matcher = pattern.matcher(data); Matcher matcher = pattern.matcher(data);
while (matcher.find()) { while (matcher.find()) {
//如果出现关键字符,则直接替换为空白字符 //如果出现关键字符,则直接替换为空白字符
matcher.appendReplacement(result, ""); matcher.appendReplacement(result, "***");
} }
matcher.appendTail(result); matcher.appendTail(result);
} else { } else {
...@@ -237,7 +237,7 @@ public class DataCleanerUtil { ...@@ -237,7 +237,7 @@ public class DataCleanerUtil {
StringBuffer result = new StringBuffer(); StringBuffer result = new StringBuffer();
double specialCharacterRatio = calculateSpecialCharacterRatio(data); double specialCharacterRatio = calculateSpecialCharacterRatio(data);
if (specialCharacterRatio <= radio) { if (specialCharacterRatio <= radio) {
result.append(data.replaceAll("[#$^&*()]", "")); result.append(data.replaceAll("[#$^&()]", ""));
} else { } else {
result.append(data); result.append(data);
} }
...@@ -254,7 +254,7 @@ public class DataCleanerUtil { ...@@ -254,7 +254,7 @@ public class DataCleanerUtil {
int specialCharactersCount = 0; int specialCharactersCount = 0;
for (Term term : termList) { for (Term term : termList) {
if (term.word.matches(".*[#$%^&*()].*")) { if (term.word.matches(".*[#$%^&()].*")) {
specialCharactersCount++; specialCharactersCount++;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment