Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lmp_server
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lmp
lmp_server
Commits
e1d2a7fc
Commit
e1d2a7fc
authored
Apr 26, 2024
by
pengxin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
完善数据集文件导入导出操作。
parent
c79917f7
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
690 additions
and
59 deletions
+690
-59
pom.xml
application-webadmin/pom.xml
+5
-0
DatasetConstant.java
.../java/com/yice/webadmin/app/constant/DatasetConstant.java
+55
-0
DatasetVersionController.java
...ice/webadmin/app/controller/DatasetVersionController.java
+1
-26
DatasetDataService.java
...ava/com/yice/webadmin/app/service/DatasetDataService.java
+7
-0
DatasetVersionService.java
.../com/yice/webadmin/app/service/DatasetVersionService.java
+20
-0
DatasetDataServiceImpl.java
...ice/webadmin/app/service/impl/DatasetDataServiceImpl.java
+12
-0
DatasetOutputServiceImpl.java
...e/webadmin/app/service/impl/DatasetOutputServiceImpl.java
+290
-26
DatasetVersionServiceImpl.java
.../webadmin/app/service/impl/DatasetVersionServiceImpl.java
+249
-7
JsonUtils.java
...n/src/main/java/com/yice/webadmin/app/util/JsonUtils.java
+51
-0
No files found.
application-webadmin/pom.xml
View file @
e1d2a7fc
...
...
@@ -65,6 +65,11 @@
<artifactId>
core
</artifactId>
<version>
3.4.1
</version>
</dependency>
<dependency>
<groupId>
com.opencsv
</groupId>
<artifactId>
opencsv
</artifactId>
<version>
4.3
</version>
</dependency>
<dependency>
<groupId>
com.github.houbb
</groupId>
<artifactId>
opencc4j
</artifactId>
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/constant/DatasetConstant.java
View file @
e1d2a7fc
...
...
@@ -12,6 +12,36 @@ public class DatasetConstant {
*/
public
static
final
Integer
INPUT_STATUS
=
1
;
/**
* ".xlsx"数据格式
*/
public
static
final
String
XLSX_TYPE
=
".xlsx"
;
/**
* ".xls"数据格式
*/
public
static
final
String
XLS_TYPE
=
".xls"
;
/**
* ".cvs"数据格式
*/
public
static
final
String
CSV_TYPE
=
".csv"
;
/**
* ".txt"数据格式
*/
public
static
final
String
TXT_TYPE
=
".txt"
;
/**
* ".jsonl"数据格式
*/
public
static
final
String
JSONL_TYPE
=
".jsonl"
;
/**
* "."符号
*/
public
static
final
String
DOT
=
"."
;
/**
* 已发布状态
*/
...
...
@@ -27,6 +57,11 @@ public class DatasetConstant {
*/
public
static
final
Integer
UNMARK
=
0
;
/**
* JSON临时文件
*/
public
static
final
String
TEMP_JSON
=
"test.json"
;
/**
* 默认单次写入10000条数据
*/
...
...
@@ -52,6 +87,11 @@ public class DatasetConstant {
*/
public
static
final
String
FILE_NAME
=
"output."
;
/**
* 文件名称
*/
public
static
final
String
TEMP_FILE_NAME
=
"temp"
;
/**
* 导出位置
*/
...
...
@@ -67,6 +107,21 @@ public class DatasetConstant {
*/
public
static
final
String
OUTPUT
=
"output"
;
/**
* output数据
*/
public
static
final
String
SHEET
=
"Sheet1"
;
/**
* instruction数据
*/
public
static
final
String
INSTRUCTION
=
"instruction"
;
/**
* instruction数据
*/
public
static
final
String
INPUT
=
"input"
;
/**
* args参数值
*/
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/controller/DatasetVersionController.java
View file @
e1d2a7fc
...
...
@@ -2,7 +2,6 @@ package com.yice.webadmin.app.controller;
import
cn.hutool.core.collection.CollUtil
;
import
cn.hutool.core.util.BooleanUtil
;
import
com.alibaba.fastjson.JSON
;
import
com.fasterxml.jackson.databind.JsonNode
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.fasterxml.jackson.databind.node.ArrayNode
;
...
...
@@ -33,13 +32,11 @@ import org.springframework.core.io.UrlResource;
import
org.springframework.http.HttpHeaders
;
import
org.springframework.http.MediaType
;
import
org.springframework.http.ResponseEntity
;
import
org.springframework.scheduling.annotation.Async
;
import
org.springframework.web.bind.annotation.*
;
import
org.springframework.web.multipart.MultipartFile
;
import
java.io.File
;
import
java.io.IOException
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.util.*
;
...
...
@@ -280,8 +277,6 @@ public class DatasetVersionController {
return
ResponseResult
.
success
();
}
@PostMapping
(
value
=
"/detail"
)
public
ResponseResult
<
MyPageData
<
String
>>
detail
(
@MyRequestBody
Long
versionId
,
@MyRequestBody
MyPageParam
pageParam
)
throws
IOException
{
...
...
@@ -321,30 +316,10 @@ public class DatasetVersionController {
errorMessage
=
"数据验证失败,导入文件不能为空!"
;
return
ResponseResult
.
error
(
ErrorCodeEnum
.
ARGUMENT_NULL_EXIST
,
errorMessage
);
}
this
.
saveMongoDB
(
importFile
,
versionId
);
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
datasetVersion
.
setInputStatus
(
DatasetConstant
.
INPUT_STATUS
);
datasetVersion
.
setDataVolume
(
Long
.
valueOf
(
JSON
.
parseArray
(
new
String
(
importFile
.
getBytes
(),
StandardCharsets
.
UTF_8
)).
size
()));
this
.
datasetVersionService
.
updateById
(
datasetVersion
);
datasetVersionService
.
importFile
(
importFile
,
versionId
);
return
ResponseResult
.
success
();
}
/**
* 写入到mongodb中。
*
* @param importFile 导入的文件。
* @return 保存的本地文件名。
*/
private
void
saveMongoDB
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
try
{
byte
[]
bytes
=
importFile
.
getBytes
();
datasetVersionService
.
writeDatasetFile
(
bytes
,
importFile
.
getOriginalFilename
(),
versionId
);
}
catch
(
IOException
e
)
{
log
.
error
(
"Failed to save mongo db imported file ["
+
importFile
.
getOriginalFilename
()
+
" ]."
,
e
);
throw
e
;
}
}
@GetMapping
(
"/export"
)
public
ResponseEntity
<
Resource
>
export
(
@RequestParam
Long
versionId
)
throws
IOException
{
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/DatasetDataService.java
View file @
e1d2a7fc
...
...
@@ -21,6 +21,13 @@ public interface DatasetDataService {
*/
void
save
(
DatasetData
datasetData
);
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
List
<
DatasetData
>
list
(
Long
versionId
);
/**
* 开始清洗工作
* @param datasetId 清洗数据集
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/DatasetVersionService.java
View file @
e1d2a7fc
...
...
@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service;
import
com.yice.common.core.base.service.IBaseService
;
import
com.yice.webadmin.app.model.DatasetVersion
;
import
org.springframework.web.multipart.MultipartFile
;
import
java.io.IOException
;
import
java.util.List
;
...
...
@@ -29,6 +30,25 @@ public interface DatasetVersionService extends IBaseService<DatasetVersion, Long
*/
void
saveNewBatch
(
List
<
DatasetVersion
>
datasetVersionList
);
/**
* ==============================
* 1、分页处理每页10000条数据
* 2、分别写入到json文件中
* ==============================
* 处理数据集
* @param datasetId 清洗任务id
* @return 清洗列表
*/
void
doDealTaskHandler
(
Long
datasetId
,
String
fileUrl
);
/**
* 导入文件
*
* @param importFile 文件对象。
* @param versionId 版本标识。
*/
void
importFile
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
;
/**
* 写入json格式路径
* @param datasetVersion
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetDataServiceImpl.java
View file @
e1d2a7fc
...
...
@@ -83,6 +83,18 @@ public class DatasetDataServiceImpl implements DatasetDataService {
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
@Override
public
List
<
DatasetData
>
list
(
Long
versionId
)
{
Query
query
=
new
Query
(
Criteria
.
where
(
MongoConstant
.
VERSION
).
is
(
versionId
));
return
mongoTemplate
.
find
(
query
,
DatasetData
.
class
,
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
/**
* 开始清洗工作
* @param datasetId 数据集对应的版本
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetOutputServiceImpl.java
View file @
e1d2a7fc
This diff is collapsed.
Click to expand it.
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetVersionServiceImpl.java
View file @
e1d2a7fc
This diff is collapsed.
Click to expand it.
application-webadmin/src/main/java/com/yice/webadmin/app/util/JsonUtils.java
0 → 100644
View file @
e1d2a7fc
package
com
.
yice
.
webadmin
.
app
.
util
;
import
com.fasterxml.jackson.core.JsonProcessingException
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
org.apache.poi.ss.usermodel.Cell
;
import
org.apache.poi.ss.usermodel.DateUtil
;
public
class
JsonUtils
{
/**
* 转成json格式数据
* @param object 将对象转成json格式数据
* @return 返回字符串
*/
public
static
String
toJson
(
Object
object
)
{
try
{
ObjectMapper
objectMapper
=
new
ObjectMapper
();
return
objectMapper
.
writeValueAsString
(
object
);
}
catch
(
JsonProcessingException
e
)
{
throw
new
RuntimeException
(
e
);
}
}
/**
* 读取单元格中的数据
* @param cell 单元格字段
* @return 返回读取单元格中的数据
*/
public
static
String
getCellValueAsString
(
Cell
cell
)
{
if
(
cell
==
null
)
{
return
""
;
}
switch
(
cell
.
getCellType
())
{
case
STRING:
return
cell
.
getStringCellValue
();
case
NUMERIC:
if
(
DateUtil
.
isCellDateFormatted
(
cell
))
{
return
cell
.
getDateCellValue
().
toString
();
}
else
{
return
Double
.
toString
(
cell
.
getNumericCellValue
());
}
case
BOOLEAN:
return
Boolean
.
toString
(
cell
.
getBooleanCellValue
());
case
FORMULA:
return
cell
.
getCellFormula
();
default
:
return
""
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment