Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lmp_server
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lmp
lmp_server
Commits
f19150e5
Commit
f19150e5
authored
Apr 09, 2024
by
pengxin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
导出出错问题修改。
parent
74073a00
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
20 deletions
+32
-20
DatasetCleanServiceImpl.java
...ce/webadmin/app/service/impl/DatasetCleanServiceImpl.java
+29
-17
DataCleanerUtil.java
...main/java/com/yice/webadmin/app/util/DataCleanerUtil.java
+3
-3
No files found.
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetCleanServiceImpl.java
View file @
f19150e5
...
...
@@ -125,13 +125,20 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
public
Future
<
Void
>
executeCleanTaskAsync
(
List
<
DatasetData
>
dataList
,
Long
cleanId
,
Long
datasetId
)
{
asyncDealWithDatasetSaveBatch
(
dataList
,
cleanId
);
dealWithTaskHandler
(
datasetId
,
cleanId
);
updateCleanStatus
(
cleanId
,
DatasetConstant
.
CLEAN_FINISHED
);
return
new
AsyncResult
<>(
null
);
}
/**
* 更新清洗状态
* @param cleanId 清洗标识
*/
private
void
updateCleanStatus
(
Long
cleanId
,
Integer
cleanStatus
)
{
DatasetClean
filter
=
new
DatasetClean
();
filter
.
setCleanStatus
(
DatasetConstant
.
CLEAN_FINISHED
);
filter
.
setCleanStatus
(
cleanStatus
);
filter
.
setFinishTime
(
new
Date
());
filter
.
setCleanId
(
cleanId
);
this
.
updateById
(
filter
);
return
new
AsyncResult
<>(
null
);
}
/**
...
...
@@ -175,12 +182,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
if
(
future
!=
null
&&
!
future
.
isDone
())
{
future
.
cancel
(
true
);
}
//暂停清洗
DatasetClean
filter
=
new
DatasetClean
();
filter
.
setCleanStatus
(
DatasetConstant
.
PAUSE_FINISHED
);
filter
.
setFinishTime
(
null
);
filter
.
setCleanId
(
cleanId
);
this
.
updateById
(
filter
);
updateCleanStatus
(
cleanId
,
DatasetConstant
.
PAUSE_FINISHED
);
}
/**
...
...
@@ -193,12 +195,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
if
(
null
!=
clean
){
doDatasetCleanHandler
(
clean
.
getDatasetId
(),
cleanId
);
}
//重新清洗
DatasetClean
filter
=
new
DatasetClean
();
filter
.
setCleanStatus
(
DatasetConstant
.
CLEAN_PROGRESS
);
filter
.
setFinishTime
(
null
);
filter
.
setCleanId
(
cleanId
);
this
.
updateById
(
filter
);
updateCleanStatus
(
cleanId
,
DatasetConstant
.
CLEAN_PROGRESS
);
}
/**
...
...
@@ -207,6 +204,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* 1、分页处理每页10000条数据
* 2、更新Mongodb数据库中的数据
* 3、更新json存储地址的数据集数据
* 4、更新版本数据集状态
* ==============================
* 处理数据集
* @param cleanId 清洗任务id
...
...
@@ -233,7 +231,7 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
//写入到数据集中
List
<
DatasetData
>
newDataList
=
dealWithDatasetNodeData
(
dataList
,
datasetId
,
rules
);
if
(
CollUtil
.
isNotEmpty
(
newDataList
))
{
appendDataListToFile
(
datasetVersion
.
getFileUrl
()
,
newDataList
);
appendDataListToFile
(
datasetVersion
.
getFileUrl
()
,
newDataList
,
i
);
}
}
...
...
@@ -313,17 +311,31 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* @param filePath 文件地址
* @param dataList 数据集列表
*/
public
void
appendDataListToFile
(
String
filePath
,
List
<
DatasetData
>
dataList
)
{
public
void
appendDataListToFile
(
String
filePath
,
List
<
DatasetData
>
dataList
,
Integer
pageNum
)
{
FileWriter
fileWriter
=
null
;
try
{
//为第一页的情况下
if
(
pageNum
==
1
)
{
// 清空文件内容
File
file
=
new
File
(
filePath
);
if
(
file
.
exists
())
{
// 删除文件
file
.
delete
();
}
// 创建一个新的空文件
file
.
createNewFile
();
}
fileWriter
=
new
FileWriter
(
filePath
,
true
);
fileWriter
.
write
(
"["
);
// 遍历你的数据列表,并将每一条数据写入到文件中
for
(
DatasetData
data
:
dataList
)
{
if
(
StringUtils
.
isNotBlank
(
data
.
getData
()))
{
fileWriter
.
write
(
data
.
getData
());
fileWriter
.
write
(
data
.
getData
()
+
","
);
fileWriter
.
write
(
"\n"
);
}
}
fileWriter
.
write
(
"]"
);
}
catch
(
IOException
e
)
{
log
.
error
(
"file write close is errot"
,
e
);
}
finally
{
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/util/DataCleanerUtil.java
View file @
f19150e5
...
...
@@ -144,7 +144,7 @@ public class DataCleanerUtil {
Matcher
matcher
=
pattern
.
matcher
(
data
);
while
(
matcher
.
find
())
{
//如果出现关键字符,则直接替换为空白字符
matcher
.
appendReplacement
(
result
,
""
);
matcher
.
appendReplacement
(
result
,
"
***
"
);
}
matcher
.
appendTail
(
result
);
}
else
{
...
...
@@ -237,7 +237,7 @@ public class DataCleanerUtil {
StringBuffer
result
=
new
StringBuffer
();
double
specialCharacterRatio
=
calculateSpecialCharacterRatio
(
data
);
if
(
specialCharacterRatio
<=
radio
)
{
result
.
append
(
data
.
replaceAll
(
"[#$^&
*
()]"
,
""
));
result
.
append
(
data
.
replaceAll
(
"[#$^&()]"
,
""
));
}
else
{
result
.
append
(
data
);
}
...
...
@@ -254,7 +254,7 @@ public class DataCleanerUtil {
int
specialCharactersCount
=
0
;
for
(
Term
term
:
termList
)
{
if
(
term
.
word
.
matches
(
".*[#$%^&
*
()].*"
))
{
if
(
term
.
word
.
matches
(
".*[#$%^&()].*"
))
{
specialCharactersCount
++;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment