Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lmp_server
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lmp
lmp_server
Commits
4093e040
Commit
4093e040
authored
Apr 08, 2024
by
pengxin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
清洗数据调整。
parent
f10a6ef0
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
51 additions
and
4 deletions
+51
-4
DatasetConstant.java
.../java/com/yice/webadmin/app/constant/DatasetConstant.java
+5
-0
DatasetDataService.java
...ava/com/yice/webadmin/app/service/DatasetDataService.java
+8
-0
DatasetCleanServiceImpl.java
...ce/webadmin/app/service/impl/DatasetCleanServiceImpl.java
+15
-3
DatasetDataServiceImpl.java
...ice/webadmin/app/service/impl/DatasetDataServiceImpl.java
+20
-1
DataCleanerUtil.java
...main/java/com/yice/webadmin/app/util/DataCleanerUtil.java
+3
-0
No files found.
application-webadmin/src/main/java/com/yice/webadmin/app/constant/DatasetConstant.java
View file @
4093e040
...
...
@@ -112,6 +112,11 @@ public class DatasetConstant {
*/
public
static
final
String
EMPTY_STR
=
""
;
/**
* 空白字符
*/
public
static
final
String
NULL_STR
=
null
;
/**
* 文本数据清洗
*/
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/DatasetDataService.java
View file @
4093e040
...
...
@@ -38,6 +38,14 @@ public interface DatasetDataService {
*/
DatasetData
view
(
String
id
,
Long
versionId
);
/**
* 删除整个集合中的空文档数据集数据。
*
* @param versionId 版本标识。
* @return 返回受影响的行数。
*/
void
deleteByData
(
Long
versionId
);
/**
* 查询列表集合总条数
* @param versionId 版本标识
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetCleanServiceImpl.java
View file @
4093e040
...
...
@@ -225,11 +225,16 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
param
.
setPageNum
(
i
);
param
.
setPageSize
(
pageSize
);
List
<
DatasetData
>
dataList
=
datasetDataService
.
list
(
datasetId
,
param
);
//写入到数据集中
List
<
DatasetData
>
newDataList
=
dealWithDatasetNodeData
(
dataList
,
datasetId
,
rules
);
if
(
CollUtil
.
isNotEmpty
(
newDataList
))
{
appendDataListToFile
(
datasetVersion
.
getFileUrl
()
,
newDataList
);
}
}
//删除为空的数据集数据
this
.
datasetDataService
.
deleteByData
(
datasetId
);
}
}
catch
(
Exception
ex
)
{
log
.
error
(
"deal with task handler is error:"
,
ex
);
...
...
@@ -310,8 +315,10 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
fileWriter
=
new
FileWriter
(
filePath
,
true
);
// 遍历你的数据列表,并将每一条数据写入到文件中
for
(
DatasetData
data
:
dataList
)
{
fileWriter
.
write
(
data
.
getData
());
fileWriter
.
write
(
"\n"
);
if
(
StringUtils
.
isNotBlank
(
data
.
getData
()))
{
fileWriter
.
write
(
data
.
getData
());
fileWriter
.
write
(
"\n"
);
}
}
}
catch
(
IOException
e
)
{
log
.
error
(
"file write close is errot"
,
e
);
...
...
@@ -338,6 +345,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
List
<
DatasetData
>
newDataList
=
new
ArrayList
<>();
try
{
if
(
CollUtil
.
isNotEmpty
(
dataList
))
{
ObjectMapper
objectMapper
=
new
ObjectMapper
();
for
(
DatasetData
datasetData
:
dataList
)
{
JsonNode
rootNode
=
objectMapper
.
readTree
(
datasetData
.
getData
());
...
...
@@ -347,9 +355,13 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
//校验清洗后的数据是否满足条件,如果满足条件,则进行添加,否则直接移除。
if
(
StringUtils
.
isNotBlank
(
output
))
{
datasetData
.
setData
(
createNewDataNode
(
datasetData
.
getData
(),
output
));
newDataList
.
add
(
datasetData
);
}
else
{
datasetData
.
setData
(
DatasetConstant
.
NULL_STR
);
}
newDataList
.
add
(
datasetData
);
}
//批量添加数据集列表
this
.
datasetDataService
.
updateBatch
(
newDataList
,
datasetId
);
}
}
catch
(
JsonProcessingException
ex
){
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetDataServiceImpl.java
View file @
4093e040
...
...
@@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.yice.common.core.object.MyPageParam
;
import
com.yice.webadmin.app.constant.DatasetConstant
;
import
com.yice.webadmin.app.constant.MongoConstant
;
import
com.yice.webadmin.app.data.DatasetData
;
import
com.yice.webadmin.app.service.DatasetDataService
;
...
...
@@ -163,7 +164,11 @@ public class DatasetDataServiceImpl implements DatasetDataService {
for
(
DatasetData
datasetData
:
dataList
)
{
// 解析data字段的字符串为Document或Bson
Document
dataDocument
=
Document
.
parse
(
datasetData
.
getData
());
Document
dataDocument
=
null
;
if
(
StringUtils
.
isNotBlank
(
datasetData
.
getData
()))
{
dataDocument
=
Document
.
parse
(
datasetData
.
getData
());
}
// 构建查询条件
Query
query
=
new
Query
(
Criteria
.
where
(
MongoConstant
.
ID
).
is
(
datasetData
.
getId
()));
...
...
@@ -174,6 +179,7 @@ public class DatasetDataServiceImpl implements DatasetDataService {
// 执行更新操作
mongoTemplate
.
updateFirst
(
query
,
update
,
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
}
}
...
...
@@ -202,4 +208,17 @@ public class DatasetDataServiceImpl implements DatasetDataService {
public
void
delete
(
Long
versionId
)
{
mongoTemplate
.
dropCollection
(
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
/**
* 删除整个集合中的空文档数据集数据。
*
* @param versionId 版本标识。
* @return 返回受影响的行数。
*/
@Override
public
void
deleteByData
(
Long
versionId
)
{
Query
query
=
new
Query
();
query
.
addCriteria
(
Criteria
.
where
(
DatasetConstant
.
DATA
).
is
(
DatasetConstant
.
NULL_STR
));
mongoTemplate
.
remove
(
query
,
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
}
application-webadmin/src/main/java/com/yice/webadmin/app/util/DataCleanerUtil.java
View file @
4093e040
...
...
@@ -7,6 +7,7 @@ import com.yice.webadmin.app.constant.DatasetCleanConstant;
import
com.yice.webadmin.app.constant.DatasetConstant
;
import
com.yice.webadmin.app.data.DatasetRule
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.lang3.StringUtils
;
import
java.util.*
;
import
java.util.regex.Matcher
;
...
...
@@ -37,6 +38,8 @@ public class DataCleanerUtil {
* @return 返回清洗后的数据
*/
public
static
String
buildCleanAfterData
(
String
data
,
List
<
DatasetRule
>
rules
)
{
if
(
StringUtils
.
isEmpty
(
data
))
return
DatasetConstant
.
EMPTY_STR
;
StringBuilder
sb
=
new
StringBuilder
();
for
(
DatasetRule
rule
:
rules
)
{
if
(
rule
.
getArgs
()
>
0
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment