Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lmp_server
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lmp
lmp_server
Commits
cf09bb54
Commit
cf09bb54
authored
Apr 11, 2024
by
pengxin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
发布后才能生成文件。
parent
ccf7df3b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
91 additions
and
26 deletions
+91
-26
DatasetVersionController.java
...ice/webadmin/app/controller/DatasetVersionController.java
+4
-20
DatasetCleanService.java
...va/com/yice/webadmin/app/service/DatasetCleanService.java
+13
-0
DatasetCleanServiceImpl.java
...ce/webadmin/app/service/impl/DatasetCleanServiceImpl.java
+4
-2
DatasetVersionServiceImpl.java
.../webadmin/app/service/impl/DatasetVersionServiceImpl.java
+70
-4
No files found.
application-webadmin/src/main/java/com/yice/webadmin/app/controller/DatasetVersionController.java
View file @
cf09bb54
...
@@ -45,7 +45,6 @@ import org.springframework.web.multipart.MultipartFile;
...
@@ -45,7 +45,6 @@ import org.springframework.web.multipart.MultipartFile;
import
java.io.File
;
import
java.io.File
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.file.Files
;
import
java.nio.file.Path
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.nio.file.Paths
;
import
java.util.*
;
import
java.util.*
;
...
@@ -355,13 +354,8 @@ public class DatasetVersionController {
...
@@ -355,13 +354,8 @@ public class DatasetVersionController {
errorMessage
=
"数据验证失败,导入文件不能为空!"
;
errorMessage
=
"数据验证失败,导入文件不能为空!"
;
return
ResponseResult
.
error
(
ErrorCodeEnum
.
ARGUMENT_NULL_EXIST
,
errorMessage
);
return
ResponseResult
.
error
(
ErrorCodeEnum
.
ARGUMENT_NULL_EXIST
,
errorMessage
);
}
}
this
.
saveMongoDB
(
importFile
,
versionId
);
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
String
versionName
=
datasetVersion
.
getVersionName
();
//先存储文件
String
fullName
=
this
.
saveDatasetFile
(
importFile
,
versionName
,
versionId
);
//再存储数据集配置文件
datasetVersionService
.
saveDatasetInfo
(
versionName
);
datasetVersion
.
setFileUrl
(
fullName
);
datasetVersion
.
setInputStatus
(
DatasetConstant
.
INPUT_STATUS
);
datasetVersion
.
setInputStatus
(
DatasetConstant
.
INPUT_STATUS
);
datasetVersion
.
setDataVolume
(
Long
.
valueOf
(
JSON
.
parseArray
(
new
String
(
importFile
.
getBytes
(),
StandardCharsets
.
UTF_8
)).
size
()));
datasetVersion
.
setDataVolume
(
Long
.
valueOf
(
JSON
.
parseArray
(
new
String
(
importFile
.
getBytes
(),
StandardCharsets
.
UTF_8
)).
size
()));
this
.
datasetVersionService
.
updateById
(
datasetVersion
);
this
.
datasetVersionService
.
updateById
(
datasetVersion
);
...
@@ -369,29 +363,19 @@ public class DatasetVersionController {
...
@@ -369,29 +363,19 @@ public class DatasetVersionController {
}
}
/**
/**
*
保存导入文件
。
*
写入到mongodb中
。
*
*
* @param importFile 导入的文件。
* @param importFile 导入的文件。
* @return 保存的本地文件名。
* @return 保存的本地文件名。
*/
*/
private
String
saveDatasetFile
(
MultipartFile
importFile
,
String
versionName
,
Long
versionId
)
throws
IOException
{
private
void
saveMongoDB
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
String
fullName
=
pythonConfig
.
getDatasetFileBaseDir
()
+
versionName
+
".json"
;
try
{
try
{
byte
[]
bytes
=
importFile
.
getBytes
();
byte
[]
bytes
=
importFile
.
getBytes
();
Path
path
=
Paths
.
get
(
fullName
);
// 如果没有files文件夹,则创建
if
(!
Files
.
isWritable
(
path
))
{
Files
.
createDirectories
(
Paths
.
get
(
pythonConfig
.
getDatasetFileBaseDir
()));
}
// 文件写入指定路径、应该是追加到文件里面
Files
.
write
(
path
,
bytes
);
// 写入到mongodb中
datasetVersionService
.
writeDatasetFile
(
bytes
,
importFile
.
getOriginalFilename
(),
versionId
);
datasetVersionService
.
writeDatasetFile
(
bytes
,
importFile
.
getOriginalFilename
(),
versionId
);
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
log
.
error
(
"Failed to
write
imported file ["
+
importFile
.
getOriginalFilename
()
+
" ]."
,
e
);
log
.
error
(
"Failed to
save mongo db
imported file ["
+
importFile
.
getOriginalFilename
()
+
" ]."
,
e
);
throw
e
;
throw
e
;
}
}
return
fullName
;
}
}
@GetMapping
(
"/export"
)
@GetMapping
(
"/export"
)
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/DatasetCleanService.java
View file @
cf09bb54
...
@@ -23,6 +23,19 @@ public interface DatasetCleanService extends IBaseService<DatasetClean, Long> {
...
@@ -23,6 +23,19 @@ public interface DatasetCleanService extends IBaseService<DatasetClean, Long> {
*/
*/
DatasetClean
saveNew
(
DatasetClean
datasetClean
);
DatasetClean
saveNew
(
DatasetClean
datasetClean
);
/**
* 第二个方法:将数据列表追加到文件
* @param filePath 文件地址
* @param dataList 数据集列表
*/
void
appendDataListToFile
(
String
filePath
,
List
<
DatasetData
>
dataList
,
Integer
pageNum
);
/**
* 解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
* @param filePath 文件地址
*/
void
readJsonAppendSymbol
(
String
filePath
);
/**
/**
* 重新清洗任务
* 重新清洗任务
* @param cleanId 清洗任务id
* @param cleanId 清洗任务id
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetCleanServiceImpl.java
View file @
cf09bb54
...
@@ -315,7 +315,8 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
...
@@ -315,7 +315,8 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* 解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
* 解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
* @param filePath 文件地址
* @param filePath 文件地址
*/
*/
private
void
readJsonAppendSymbol
(
String
filePath
)
{
@Override
public
void
readJsonAppendSymbol
(
String
filePath
)
{
try
(
FileReader
fileReader
=
new
FileReader
(
filePath
);
try
(
FileReader
fileReader
=
new
FileReader
(
filePath
);
BufferedReader
bufferedReader
=
new
BufferedReader
(
fileReader
))
{
BufferedReader
bufferedReader
=
new
BufferedReader
(
fileReader
))
{
...
@@ -357,7 +358,8 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
...
@@ -357,7 +358,8 @@ public class DatasetCleanServiceImpl extends BaseService<DatasetClean, Long> imp
* @param filePath 文件地址
* @param filePath 文件地址
* @param dataList 数据集列表
* @param dataList 数据集列表
*/
*/
private
void
appendDataListToFile
(
String
filePath
,
List
<
DatasetData
>
dataList
,
Integer
pageNum
)
{
@Override
public
void
appendDataListToFile
(
String
filePath
,
List
<
DatasetData
>
dataList
,
Integer
pageNum
)
{
FileWriter
fileWriter
=
null
;
FileWriter
fileWriter
=
null
;
try
{
try
{
//为第一页的情况下
//为第一页的情况下
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetVersionServiceImpl.java
View file @
cf09bb54
...
@@ -13,6 +13,7 @@ import com.github.pagehelper.Page;
...
@@ -13,6 +13,7 @@ import com.github.pagehelper.Page;
import
com.yice.common.core.base.dao.BaseDaoMapper
;
import
com.yice.common.core.base.dao.BaseDaoMapper
;
import
com.yice.common.core.base.service.BaseService
;
import
com.yice.common.core.base.service.BaseService
;
import
com.yice.common.core.object.CallResult
;
import
com.yice.common.core.object.CallResult
;
import
com.yice.common.core.object.MyPageParam
;
import
com.yice.common.core.object.MyRelationParam
;
import
com.yice.common.core.object.MyRelationParam
;
import
com.yice.common.core.util.MyModelUtil
;
import
com.yice.common.core.util.MyModelUtil
;
import
com.yice.common.sequence.wrapper.IdGeneratorWrapper
;
import
com.yice.common.sequence.wrapper.IdGeneratorWrapper
;
...
@@ -23,10 +24,7 @@ import com.yice.webadmin.app.data.DatasetData;
...
@@ -23,10 +24,7 @@ import com.yice.webadmin.app.data.DatasetData;
import
com.yice.webadmin.app.model.DatasetDetail
;
import
com.yice.webadmin.app.model.DatasetDetail
;
import
com.yice.webadmin.app.model.DatasetManage
;
import
com.yice.webadmin.app.model.DatasetManage
;
import
com.yice.webadmin.app.model.DatasetVersion
;
import
com.yice.webadmin.app.model.DatasetVersion
;
import
com.yice.webadmin.app.service.DatasetDataService
;
import
com.yice.webadmin.app.service.*
;
import
com.yice.webadmin.app.service.DatasetDetailService
;
import
com.yice.webadmin.app.service.DatasetManageService
;
import
com.yice.webadmin.app.service.DatasetVersionService
;
import
com.yice.webadmin.app.util.Sha1Util
;
import
com.yice.webadmin.app.util.Sha1Util
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
...
@@ -65,6 +63,8 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
...
@@ -65,6 +63,8 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
private
PythonConfig
pythonConfig
;
private
PythonConfig
pythonConfig
;
@Autowired
@Autowired
private
DatasetDataService
datasetDataService
;
private
DatasetDataService
datasetDataService
;
@Autowired
private
DatasetCleanService
datasetCleanService
;
/**
/**
* 返回当前Service的主表Mapper对象。
* 返回当前Service的主表Mapper对象。
...
@@ -310,6 +310,72 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
...
@@ -310,6 +310,72 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
return
datasetVersionMapper
.
updateById
(
datasetVersion
)
>
0
;
return
datasetVersionMapper
.
updateById
(
datasetVersion
)
>
0
;
}
}
/**
* 写入json格式路径
* @param datasetVersion
* @throws IOException 异常
*/
public
boolean
saveDatasetJsonPath
(
DatasetVersion
datasetVersion
)
throws
IOException
{
//导入时不需要写入到json文件中
String
versionName
=
datasetVersion
.
getVersionName
();
//先存储文件
String
fullName
=
pythonConfig
.
getDatasetFileBaseDir
()
+
versionName
+
".json"
;
this
.
doDealTaskHandler
(
datasetVersion
.
getVersionId
(),
versionName
,
fullName
);
//再存储数据集配置文件
this
.
saveDatasetInfo
(
versionName
);
DatasetVersion
filter
=
new
DatasetVersion
();
filter
.
setVersionId
(
datasetVersion
.
getVersionId
());
filter
.
setFileUrl
(
fullName
);
filter
.
setReleaseStatus
(
DatasetConstant
.
STATUS_PUBLISHED
);
return
this
.
updateById
(
filter
);
}
/**
* ==============================
* ===总数据清洗过程===
* 1、分页处理每页10000条数据
* 2、更新Mongodb数据库中的数据
* 3、更新json存储地址的数据集数据
* 4、更新版本数据集状态
* ==============================
* 处理数据集
* @param datasetId 清洗任务id
* @param versionName 数据集名称
* @return 清洗列表
*/
private
void
doDealTaskHandler
(
Long
datasetId
,
String
versionName
,
String
fileUrl
)
{
try
{
Integer
index
=
0
;
Long
count
=
datasetDataService
.
count
(
datasetId
);
if
(
count
>
0
)
{
int
pageSize
=
DatasetConstant
.
MAX_SIZE
;
int
totalPages
=
(
int
)
Math
.
ceil
((
double
)
count
/
pageSize
);
MyPageParam
param
;
for
(
int
i
=
1
;
i
<=
totalPages
;
i
++)
{
param
=
new
MyPageParam
();
param
.
setPageNum
(
i
);
param
.
setPageSize
(
pageSize
);
List
<
DatasetData
>
dataList
=
datasetDataService
.
list
(
datasetId
,
param
);
//写入到数据集中
if
(
CollUtil
.
isNotEmpty
(
dataList
))
{
datasetCleanService
.
appendDataListToFile
(
fileUrl
,
dataList
,
i
);
}
}
//解析文件去掉多余的数据,比如文件里面最后一个,多加了一个",",缺少符号[]
if
(
index
>
0
){
datasetCleanService
.
readJsonAppendSymbol
(
fileUrl
);
}
//删除为空的数据集数据
this
.
datasetDataService
.
deleteByData
(
datasetId
);
}
}
catch
(
Exception
ex
)
{
log
.
error
(
"deal with task handler is error:"
,
ex
);
}
}
/**
/**
* 删除指定数据。
* 删除指定数据。
*
*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment