Commit 0634e753 authored by pengxin's avatar pengxin

添加数据清洗功能。

parent 32843a49
package com.yice.webadmin.app.data;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.util.Date;
@Data
@ApiModel
@NoArgsConstructor
@AllArgsConstructor
@Document(collection = "dataset_data")
public class DatasetData {
/**
* 文档标识
*/
@Id
@ApiModelProperty(name = "_id",value = "文档标识")
private String id;
@ApiModelProperty(name = "version_id",value = "版本标识")
/**
* 版本标识
*/
@Field("version_id")
private Long versionId;
@ApiModelProperty(name = "data",value = "json格式数据")
/**
* json格式数据
*/
@Field("data")
private String data;
@ApiModelProperty(name = "create_time",value="创建时间")
/**
* 创建时间
*/
@Field("create_time")
private Date createTime;
@ApiModelProperty(name = "mark_status",value="标记状态")
/**
* 标记状态
*/
@Field("mark_status")
private Integer markStatus;
}
package com.yice.webadmin.app.data;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.util.Date;
@Data
@ApiModel
@NoArgsConstructor
@AllArgsConstructor
@Document(collection = "dataset_data_clean")
public class DatasetDataClean {
/**
* 文档标识
*/
@Id
@ApiModelProperty(name = "_id",value = "文档标识")
private String id;
@ApiModelProperty(name = "clean_id",value = "清洗任务标识id")
/**
* 清洗任务标识id
*/
@Field("clean_id")
private Long cleanId;
@ApiModelProperty(name = "clean_before_data",value = "清洗前数据")
/**
* 清洗前数据
*/
@Field("clean_before_data")
private String cleanBeforeData;
@ApiModelProperty(name = "clean_after_data",value="清洗后数据")
/**
* 清洗后数据
*/
@Field("clean_after_data")
private String cleanAfterData;
@ApiModelProperty(name = "create_time",value="创建时间")
/**
* 创建时间
*/
@Field("create_time")
private Date createTime;
}
......@@ -7,6 +7,7 @@ import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.util.Date;
......@@ -17,19 +18,33 @@ import java.util.Date;
@Document(collection = "dataset_data_deduplicate")
public class DatasetDataDeduplicate {
/**
* 文档标识
*/
@Id
@ApiModelProperty(name = "_id",value = "文档标识")
private String id;
@ApiModelProperty(name = "clean_id",value = "清洗任务标识id")
/**
* 清洗任务标识id
*/
@Field("clean_id")
private Long cleanId;
@ApiModelProperty(name = "clean_before_data",value = "清洗前数据")
/**
* 清洗前数据
*/
@Field("clean_before_data")
private String cleanBeforeData;
@ApiModelProperty(name = "clean_after_data",value="清洗后数据")
/**
* 清洗后数据
*/
@Field("clean_after_data")
private String cleanAfterData;
@ApiModelProperty(name = "create_time",value="创建时间")
/**
* 创建时间
*/
@Field("create_time")
private Date createTime;
}
package com.yice.webadmin.app.data;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.util.Date;
@Data
@ApiModel
@NoArgsConstructor
@AllArgsConstructor
@Document(collection = "dataset_data_desensitive")
public class DatasetDataDesensitive {
/**
* 文档标识
*/
@Id
@ApiModelProperty(name = "_id",value = "文档标识")
private String id;
@ApiModelProperty(name = "clean_id",value = "清洗任务标识id")
/**
* 清洗任务标识id
*/
@Field("clean_id")
private Long cleanId;
@ApiModelProperty(name = "clean_before_data",value = "清洗前数据")
/**
* 清洗前数据
*/
@Field("clean_before_data")
private String cleanBeforeData;
@ApiModelProperty(name = "clean_after_data",value="清洗后数据")
/**
* 清洗后数据
*/
@Field("clean_after_data")
private String cleanAfterData;
@ApiModelProperty(name = "create_time",value="创建时间")
/**
* 创建时间
*/
@Field("create_time")
private Date createTime;
}
package com.yice.webadmin.app.data;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.util.Date;
@Data
@ApiModel
@NoArgsConstructor
@AllArgsConstructor
@Document(collection = "dataset_data_filter")
public class DatasetDataFilter {
/**
* 文档标识
*/
@Id
@ApiModelProperty(name = "_id",value = "文档标识")
private String id;
@ApiModelProperty(name = "clean_id",value = "清洗任务标识id")
/**
* 清洗任务标识id
*/
@Field("clean_id")
private Long cleanId;
@ApiModelProperty(name = "content",value = "过滤内容")
/**
* 清洗后数据
*/
@Field("content")
private String content;
@ApiModelProperty(name = "create_time",value="创建时间")
/**
* 创建时间
*/
@Field("create_time")
private Date createTime;
}
package com.yice.webadmin.app.service.impl;
import cn.hutool.core.collection.CollUtil;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.yice.common.core.object.MyPageParam;
......@@ -13,6 +14,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
......@@ -157,7 +159,22 @@ public class DatasetDataServiceImpl implements DatasetDataService {
*/
@Override
public void updateBatch(List<DatasetData> dataList, Long versionId) {
mongoTemplate.save(dataList, MongoConstant.COLLECT_NAME + versionId);
if(CollUtil.isNotEmpty(dataList)) {
for(DatasetData datasetData : dataList) {
// 解析data字段的字符串为Document或Bson
Document dataDocument = Document.parse(datasetData.getData());
// 构建查询条件
Query query = new Query(Criteria.where(MongoConstant.ID).is(datasetData.getId()));
// 构建更新操作
Update update = new Update();
update.set(MongoConstant.DATA, dataDocument);
// 执行更新操作
mongoTemplate.updateFirst(query, update, MongoConstant.COLLECT_NAME + versionId);
}
}
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment