Commit ccb4b969 authored by pengxin's avatar pengxin

更新状态

parent 092b5f16
......@@ -110,23 +110,54 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
datasetCleanConfigMapper.insert(datasetCleanConfig);
}
DatasetVersion datasetVersion = new DatasetVersion();
datasetVersion.setVersionId(datasetClean.getDatasetId());
datasetVersion.setCleanStatus(DatasetConstant.CLEAN_PROGRESS);
datasetVersionService.updateById(datasetVersion);
updateVersionStatus(datasetClean.getDatasetId(),DatasetConstant.CLEAN_PROGRESS);
doDatasetCleanHandler(datasetClean.getDatasetId(), datasetClean.getCleanId());
return datasetClean;
}
/**
* 清洗100个样本
* @param datasetId 数据集对应的版本
*/
@Async("taskExecutor")
public void doDatasetCleanHandler(Long datasetId, Long cleanId) {
MyPageParam param = new MyPageParam();
param.setPageNum(DatasetConstant.PAGE_NUM);
param.setPageSize(DatasetConstant.MAX_PAGE_SIZE);
List<DatasetData> dataList = datasetDataService.list(datasetId, param);
if(CollUtil.isNotEmpty(dataList)) {
Future<Void> future = executeCleanTaskAsync(dataList, cleanId, datasetId);
futures.put(cleanId, future);
}
}
/**
* 线程方法
* @param dataList 数据集列表
* @param cleanId 清洗标识
* @param datasetId 清洗集标识
* @return 线程方法
*/
public Future<Void> executeCleanTaskAsync(List<DatasetData> dataList, Long cleanId, Long datasetId) {
asyncDealWithDatasetSaveBatch(dataList, cleanId);
dealWithTaskHandler(datasetId,cleanId);
updateCleanStatus(cleanId,DatasetConstant.CLEAN_FINISHED);
updateVersionStatus(datasetId,DatasetConstant.CLEAN_FINISHED);
return new AsyncResult<>(null);
}
/**
* 更新版本状态
* @param versionId 数据集标识
* @param cleanStatus 清洗状态
*/
private void updateVersionStatus(Long versionId, Integer cleanStatus) {
DatasetVersion filter = new DatasetVersion();
filter.setVersionId(versionId);
filter.setCleanStatus(cleanStatus);
this.datasetVersionService.updateById(filter);
}
/**
* 更新清洗状态
* @param cleanId 清洗标识
......@@ -139,21 +170,6 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
this.updateById(filter);
}
/**
* 清洗100个样本
* @param datasetId 数据集对应的版本
*/
private void doDatasetCleanHandler(Long datasetId, Long cleanId) {
MyPageParam param = new MyPageParam();
param.setPageNum(DatasetConstant.PAGE_NUM);
param.setPageSize(DatasetConstant.MAX_PAGE_SIZE);
List<DatasetData> dataList = datasetDataService.list(datasetId, param);
if(CollUtil.isNotEmpty(dataList)) {
Future<Void> future = executeCleanTaskAsync(dataList, cleanId, datasetId);
futures.put(cleanId, future);
}
}
/**
* 只异步处理前100条数据清洗数据
* @param dataList 清洗列表
......@@ -180,7 +196,11 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
if (future != null && !future.isDone()) {
future.cancel(true);
}
updateCleanStatus(cleanId,DatasetConstant.PAUSE_FINISHED);
DatasetClean filter = this.datasetCleanMapper.selectById(cleanId);
if(null != filter){
updateCleanStatus(cleanId,DatasetConstant.PAUSE_FINISHED);
updateVersionStatus(filter.getDatasetId(),DatasetConstant.PAUSE_FINISHED);
}
}
/**
......@@ -192,8 +212,9 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
DatasetClean clean = this.datasetCleanMapper.selectById(cleanId);
if(null != clean){
doDatasetCleanHandler(clean.getDatasetId(), cleanId);
updateCleanStatus(cleanId,DatasetConstant.CLEAN_PROGRESS);
updateVersionStatus(clean.getDatasetId(),DatasetConstant.CLEAN_PROGRESS);
}
updateCleanStatus(cleanId,DatasetConstant.CLEAN_PROGRESS);
}
/**
......
......@@ -143,7 +143,7 @@ public class DataCleanerUtil {
Matcher matcher = pattern.matcher(data);
while (matcher.find()) {
//如果出现关键字符,则直接替换为空白字符
matcher.appendReplacement(result, "***");
matcher.appendReplacement(result, "");
}
matcher.appendTail(result);
} else {
......@@ -236,7 +236,7 @@ public class DataCleanerUtil {
StringBuffer result = new StringBuffer();
double specialCharacterRatio = calculateSpecialCharacterRatio(data);
if (specialCharacterRatio <= radio) {
result.append(data.replaceAll("[#$^&()]", ""));
result.append(data.replaceAll("[#$*^&()]", ""));
} else {
result.append(data);
}
......@@ -253,7 +253,7 @@ public class DataCleanerUtil {
int specialCharactersCount = 0;
for (Term term : termList) {
if (term.word.matches(".*[#$%^&()].*")) {
if (term.word.matches(".*[#$%*^&()].*")) {
specialCharactersCount++;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment