Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lmp_server
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lmp
lmp_server
Commits
e1d2a7fc
Commit
e1d2a7fc
authored
Apr 26, 2024
by
pengxin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
完善数据集文件导入导出操作。
parent
c79917f7
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
690 additions
and
59 deletions
+690
-59
pom.xml
application-webadmin/pom.xml
+5
-0
DatasetConstant.java
.../java/com/yice/webadmin/app/constant/DatasetConstant.java
+55
-0
DatasetVersionController.java
...ice/webadmin/app/controller/DatasetVersionController.java
+1
-26
DatasetDataService.java
...ava/com/yice/webadmin/app/service/DatasetDataService.java
+7
-0
DatasetVersionService.java
.../com/yice/webadmin/app/service/DatasetVersionService.java
+20
-0
DatasetDataServiceImpl.java
...ice/webadmin/app/service/impl/DatasetDataServiceImpl.java
+12
-0
DatasetOutputServiceImpl.java
...e/webadmin/app/service/impl/DatasetOutputServiceImpl.java
+290
-26
DatasetVersionServiceImpl.java
.../webadmin/app/service/impl/DatasetVersionServiceImpl.java
+249
-7
JsonUtils.java
...n/src/main/java/com/yice/webadmin/app/util/JsonUtils.java
+51
-0
No files found.
application-webadmin/pom.xml
View file @
e1d2a7fc
...
@@ -65,6 +65,11 @@
...
@@ -65,6 +65,11 @@
<artifactId>
core
</artifactId>
<artifactId>
core
</artifactId>
<version>
3.4.1
</version>
<version>
3.4.1
</version>
</dependency>
</dependency>
<dependency>
<groupId>
com.opencsv
</groupId>
<artifactId>
opencsv
</artifactId>
<version>
4.3
</version>
</dependency>
<dependency>
<dependency>
<groupId>
com.github.houbb
</groupId>
<groupId>
com.github.houbb
</groupId>
<artifactId>
opencc4j
</artifactId>
<artifactId>
opencc4j
</artifactId>
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/constant/DatasetConstant.java
View file @
e1d2a7fc
...
@@ -12,6 +12,36 @@ public class DatasetConstant {
...
@@ -12,6 +12,36 @@ public class DatasetConstant {
*/
*/
public
static
final
Integer
INPUT_STATUS
=
1
;
public
static
final
Integer
INPUT_STATUS
=
1
;
/**
* ".xlsx"数据格式
*/
public
static
final
String
XLSX_TYPE
=
".xlsx"
;
/**
* ".xls"数据格式
*/
public
static
final
String
XLS_TYPE
=
".xls"
;
/**
* ".cvs"数据格式
*/
public
static
final
String
CSV_TYPE
=
".csv"
;
/**
* ".txt"数据格式
*/
public
static
final
String
TXT_TYPE
=
".txt"
;
/**
* ".jsonl"数据格式
*/
public
static
final
String
JSONL_TYPE
=
".jsonl"
;
/**
* "."符号
*/
public
static
final
String
DOT
=
"."
;
/**
/**
* 已发布状态
* 已发布状态
*/
*/
...
@@ -27,6 +57,11 @@ public class DatasetConstant {
...
@@ -27,6 +57,11 @@ public class DatasetConstant {
*/
*/
public
static
final
Integer
UNMARK
=
0
;
public
static
final
Integer
UNMARK
=
0
;
/**
* JSON临时文件
*/
public
static
final
String
TEMP_JSON
=
"test.json"
;
/**
/**
* 默认单次写入10000条数据
* 默认单次写入10000条数据
*/
*/
...
@@ -52,6 +87,11 @@ public class DatasetConstant {
...
@@ -52,6 +87,11 @@ public class DatasetConstant {
*/
*/
public
static
final
String
FILE_NAME
=
"output."
;
public
static
final
String
FILE_NAME
=
"output."
;
/**
* 文件名称
*/
public
static
final
String
TEMP_FILE_NAME
=
"temp"
;
/**
/**
* 导出位置
* 导出位置
*/
*/
...
@@ -67,6 +107,21 @@ public class DatasetConstant {
...
@@ -67,6 +107,21 @@ public class DatasetConstant {
*/
*/
public
static
final
String
OUTPUT
=
"output"
;
public
static
final
String
OUTPUT
=
"output"
;
/**
* output数据
*/
public
static
final
String
SHEET
=
"Sheet1"
;
/**
* instruction数据
*/
public
static
final
String
INSTRUCTION
=
"instruction"
;
/**
* instruction数据
*/
public
static
final
String
INPUT
=
"input"
;
/**
/**
* args参数值
* args参数值
*/
*/
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/controller/DatasetVersionController.java
View file @
e1d2a7fc
...
@@ -2,7 +2,6 @@ package com.yice.webadmin.app.controller;
...
@@ -2,7 +2,6 @@ package com.yice.webadmin.app.controller;
import
cn.hutool.core.collection.CollUtil
;
import
cn.hutool.core.collection.CollUtil
;
import
cn.hutool.core.util.BooleanUtil
;
import
cn.hutool.core.util.BooleanUtil
;
import
com.alibaba.fastjson.JSON
;
import
com.fasterxml.jackson.databind.JsonNode
;
import
com.fasterxml.jackson.databind.JsonNode
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.fasterxml.jackson.databind.node.ArrayNode
;
import
com.fasterxml.jackson.databind.node.ArrayNode
;
...
@@ -33,13 +32,11 @@ import org.springframework.core.io.UrlResource;
...
@@ -33,13 +32,11 @@ import org.springframework.core.io.UrlResource;
import
org.springframework.http.HttpHeaders
;
import
org.springframework.http.HttpHeaders
;
import
org.springframework.http.MediaType
;
import
org.springframework.http.MediaType
;
import
org.springframework.http.ResponseEntity
;
import
org.springframework.http.ResponseEntity
;
import
org.springframework.scheduling.annotation.Async
;
import
org.springframework.web.bind.annotation.*
;
import
org.springframework.web.bind.annotation.*
;
import
org.springframework.web.multipart.MultipartFile
;
import
org.springframework.web.multipart.MultipartFile
;
import
java.io.File
;
import
java.io.File
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.file.Path
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.nio.file.Paths
;
import
java.util.*
;
import
java.util.*
;
...
@@ -280,8 +277,6 @@ public class DatasetVersionController {
...
@@ -280,8 +277,6 @@ public class DatasetVersionController {
return
ResponseResult
.
success
();
return
ResponseResult
.
success
();
}
}
@PostMapping
(
value
=
"/detail"
)
@PostMapping
(
value
=
"/detail"
)
public
ResponseResult
<
MyPageData
<
String
>>
detail
(
@MyRequestBody
Long
versionId
,
public
ResponseResult
<
MyPageData
<
String
>>
detail
(
@MyRequestBody
Long
versionId
,
@MyRequestBody
MyPageParam
pageParam
)
throws
IOException
{
@MyRequestBody
MyPageParam
pageParam
)
throws
IOException
{
...
@@ -321,30 +316,10 @@ public class DatasetVersionController {
...
@@ -321,30 +316,10 @@ public class DatasetVersionController {
errorMessage
=
"数据验证失败,导入文件不能为空!"
;
errorMessage
=
"数据验证失败,导入文件不能为空!"
;
return
ResponseResult
.
error
(
ErrorCodeEnum
.
ARGUMENT_NULL_EXIST
,
errorMessage
);
return
ResponseResult
.
error
(
ErrorCodeEnum
.
ARGUMENT_NULL_EXIST
,
errorMessage
);
}
}
this
.
saveMongoDB
(
importFile
,
versionId
);
datasetVersionService
.
importFile
(
importFile
,
versionId
);
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
datasetVersion
.
setInputStatus
(
DatasetConstant
.
INPUT_STATUS
);
datasetVersion
.
setDataVolume
(
Long
.
valueOf
(
JSON
.
parseArray
(
new
String
(
importFile
.
getBytes
(),
StandardCharsets
.
UTF_8
)).
size
()));
this
.
datasetVersionService
.
updateById
(
datasetVersion
);
return
ResponseResult
.
success
();
return
ResponseResult
.
success
();
}
}
/**
* 写入到mongodb中。
*
* @param importFile 导入的文件。
* @return 保存的本地文件名。
*/
private
void
saveMongoDB
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
try
{
byte
[]
bytes
=
importFile
.
getBytes
();
datasetVersionService
.
writeDatasetFile
(
bytes
,
importFile
.
getOriginalFilename
(),
versionId
);
}
catch
(
IOException
e
)
{
log
.
error
(
"Failed to save mongo db imported file ["
+
importFile
.
getOriginalFilename
()
+
" ]."
,
e
);
throw
e
;
}
}
@GetMapping
(
"/export"
)
@GetMapping
(
"/export"
)
public
ResponseEntity
<
Resource
>
export
(
@RequestParam
Long
versionId
)
throws
IOException
{
public
ResponseEntity
<
Resource
>
export
(
@RequestParam
Long
versionId
)
throws
IOException
{
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
DatasetVersion
datasetVersion
=
this
.
datasetVersionService
.
getById
(
versionId
);
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/DatasetDataService.java
View file @
e1d2a7fc
...
@@ -21,6 +21,13 @@ public interface DatasetDataService {
...
@@ -21,6 +21,13 @@ public interface DatasetDataService {
*/
*/
void
save
(
DatasetData
datasetData
);
void
save
(
DatasetData
datasetData
);
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
List
<
DatasetData
>
list
(
Long
versionId
);
/**
/**
* 开始清洗工作
* 开始清洗工作
* @param datasetId 清洗数据集
* @param datasetId 清洗数据集
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/DatasetVersionService.java
View file @
e1d2a7fc
...
@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service;
...
@@ -2,6 +2,7 @@ package com.yice.webadmin.app.service;
import
com.yice.common.core.base.service.IBaseService
;
import
com.yice.common.core.base.service.IBaseService
;
import
com.yice.webadmin.app.model.DatasetVersion
;
import
com.yice.webadmin.app.model.DatasetVersion
;
import
org.springframework.web.multipart.MultipartFile
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.List
;
import
java.util.List
;
...
@@ -29,6 +30,25 @@ public interface DatasetVersionService extends IBaseService<DatasetVersion, Long
...
@@ -29,6 +30,25 @@ public interface DatasetVersionService extends IBaseService<DatasetVersion, Long
*/
*/
void
saveNewBatch
(
List
<
DatasetVersion
>
datasetVersionList
);
void
saveNewBatch
(
List
<
DatasetVersion
>
datasetVersionList
);
/**
* ==============================
* 1、分页处理每页10000条数据
* 2、分别写入到json文件中
* ==============================
* 处理数据集
* @param datasetId 清洗任务id
* @return 清洗列表
*/
void
doDealTaskHandler
(
Long
datasetId
,
String
fileUrl
);
/**
* 导入文件
*
* @param importFile 文件对象。
* @param versionId 版本标识。
*/
void
importFile
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
;
/**
/**
* 写入json格式路径
* 写入json格式路径
* @param datasetVersion
* @param datasetVersion
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetDataServiceImpl.java
View file @
e1d2a7fc
...
@@ -83,6 +83,18 @@ public class DatasetDataServiceImpl implements DatasetDataService {
...
@@ -83,6 +83,18 @@ public class DatasetDataServiceImpl implements DatasetDataService {
MongoConstant
.
COLLECT_NAME
+
versionId
);
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
}
/**
* 查询列表集合
* @param versionId 版本标识
* @return 返回查看对象列表
*/
@Override
public
List
<
DatasetData
>
list
(
Long
versionId
)
{
Query
query
=
new
Query
(
Criteria
.
where
(
MongoConstant
.
VERSION
).
is
(
versionId
));
return
mongoTemplate
.
find
(
query
,
DatasetData
.
class
,
MongoConstant
.
COLLECT_NAME
+
versionId
);
}
/**
/**
* 开始清洗工作
* 开始清洗工作
* @param datasetId 数据集对应的版本
* @param datasetId 数据集对应的版本
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetOutputServiceImpl.java
View file @
e1d2a7fc
package
com
.
yice
.
webadmin
.
app
.
service
.
impl
;
package
com
.
yice
.
webadmin
.
app
.
service
.
impl
;
import
cn.hutool.core.collection.CollUtil
;
import
cn.hutool.core.collection.CollUtil
;
import
com.alibaba.fastjson.JSON
;
import
com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper
;
import
com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper
;
import
com.fasterxml.jackson.core.type.TypeReference
;
import
com.fasterxml.jackson.databind.JsonNode
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.github.pagehelper.Page
;
import
com.github.pagehelper.Page
;
import
com.yice.common.core.base.dao.BaseDaoMapper
;
import
com.yice.common.core.base.dao.BaseDaoMapper
;
import
com.yice.common.core.base.service.BaseService
;
import
com.yice.common.core.base.service.BaseService
;
...
@@ -17,24 +19,27 @@ import com.yice.common.minio.config.MinioProperties;
...
@@ -17,24 +19,27 @@ import com.yice.common.minio.config.MinioProperties;
import
com.yice.common.sequence.wrapper.IdGeneratorWrapper
;
import
com.yice.common.sequence.wrapper.IdGeneratorWrapper
;
import
com.yice.webadmin.app.constant.DatasetConstant
;
import
com.yice.webadmin.app.constant.DatasetConstant
;
import
com.yice.webadmin.app.dao.DatasetOutputMapper
;
import
com.yice.webadmin.app.dao.DatasetOutputMapper
;
import
com.yice.webadmin.app.data.DatasetData
;
import
com.yice.webadmin.app.model.DatasetOutput
;
import
com.yice.webadmin.app.model.DatasetOutput
;
import
com.yice.webadmin.app.service.DatasetDataService
;
import
com.yice.webadmin.app.service.DatasetOutputService
;
import
com.yice.webadmin.app.service.DatasetOutputService
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.io.IOUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.poi.ss.usermodel.Cell
;
import
org.apache.poi.ss.usermodel.Row
;
import
org.apache.poi.xssf.usermodel.XSSFSheet
;
import
org.apache.poi.xssf.usermodel.XSSFWorkbook
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.core.io.Resource
;
import
org.springframework.core.io.UrlResource
;
import
org.springframework.stereotype.Service
;
import
org.springframework.stereotype.Service
;
import
org.springframework.transaction.annotation.Transactional
;
import
org.springframework.transaction.annotation.Transactional
;
import
java.io.
IOException
;
import
java.io.
*
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.text.DecimalFormat
;
import
java.text.DecimalFormat
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
/**
/**
* 数据集详情数据操作服务类。
* 数据集详情数据操作服务类。
...
@@ -54,6 +59,8 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
...
@@ -54,6 +59,8 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
private
UpDownloaderFactory
upDownloaderFactory
;
private
UpDownloaderFactory
upDownloaderFactory
;
@Autowired
@Autowired
private
MinioProperties
minioProperties
;
private
MinioProperties
minioProperties
;
@Autowired
private
DatasetDataService
datasetDataService
;
/**
/**
* 返回当前Service的主表Mapper对象。
* 返回当前Service的主表Mapper对象。
...
@@ -103,7 +110,7 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
...
@@ -103,7 +110,7 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
@Override
@Override
public
DatasetOutput
saveNew
(
String
fileType
,
String
fileUrl
,
Long
versionId
)
{
public
DatasetOutput
saveNew
(
String
fileType
,
String
fileUrl
,
Long
versionId
)
{
DatasetOutput
datasetOutput
=
new
DatasetOutput
();
DatasetOutput
datasetOutput
=
new
DatasetOutput
();
String
uploadUrl
=
doUpload
(
fileUrl
,
fileType
,
datasetOutput
);
String
uploadUrl
=
doUpload
(
versionId
,
fileType
.
toLowerCase
(),
datasetOutput
);
if
(
StringUtils
.
isEmpty
(
uploadUrl
))
return
null
;
if
(
StringUtils
.
isEmpty
(
uploadUrl
))
return
null
;
datasetOutput
.
setOutputId
(
idGenerator
.
nextLongId
());
datasetOutput
.
setOutputId
(
idGenerator
.
nextLongId
());
...
@@ -112,7 +119,7 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
...
@@ -112,7 +119,7 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
datasetOutput
.
setOutputStartTime
(
new
Date
());
datasetOutput
.
setOutputStartTime
(
new
Date
());
datasetOutput
.
setStatus
(
DatasetConstant
.
STATUS_UNFINISHED
);
datasetOutput
.
setStatus
(
DatasetConstant
.
STATUS_UNFINISHED
);
datasetOutput
.
setDownloadUrl
(
uploadUrl
);
datasetOutput
.
setDownloadUrl
(
uploadUrl
);
datasetOutput
.
setMarkFormatType
(
fileType
);
datasetOutput
.
setMarkFormatType
(
fileType
.
toUpperCase
()
);
datasetOutput
.
setOutputPostion
(
DatasetConstant
.
OUTPUT_POSTION
);
datasetOutput
.
setOutputPostion
(
DatasetConstant
.
OUTPUT_POSTION
);
datasetOutput
.
setOutputContent
(
DatasetConstant
.
OUTPUT_CONTENT
);
datasetOutput
.
setOutputContent
(
DatasetConstant
.
OUTPUT_CONTENT
);
datasetOutputMapper
.
insert
(
datasetOutput
);
datasetOutputMapper
.
insert
(
datasetOutput
);
...
@@ -121,36 +128,293 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
...
@@ -121,36 +128,293 @@ public class DatasetOutputServiceImpl extends BaseService<DatasetOutput, Long> i
/**
/**
* 根据类型,生成对应的文件类型名称
* 根据类型,生成对应的文件类型名称
* @param fileUrl 文件地址
* 构建不同的文件类型,分别写入到minio文件服务器中
* @param versionId 版本标识
* @param fileType 文件类型
* @param fileType 文件类型
*/
*/
private
String
doUpload
(
String
fileUrl
,
String
fileType
,
DatasetOutput
datasetOutput
)
{
private
String
doUpload
(
Long
versionId
,
String
fileType
,
DatasetOutput
datasetOutput
)
{
String
fileName
=
null
;
String
fileName
=
null
;
try
{
try
{
Path
file
=
Paths
.
get
(
fileUrl
);
List
<
DatasetData
>
dataList
=
datasetDataService
.
list
(
versionId
);
Resource
resource
=
new
UrlResource
(
file
.
toUri
());
datasetOutput
.
setDataCount
(
Long
.
valueOf
(
dataList
.
size
()));
byte
[]
data
=
IOUtils
.
toByteArray
(
resource
.
getInputStream
());
if
(
CollUtil
.
isNotEmpty
(
dataList
))
{
datasetOutput
.
setDataCount
(
Long
.
valueOf
(
JSON
.
parseArray
(
new
String
(
data
,
StandardCharsets
.
UTF_8
)).
size
()));
fileName
=
exportFileByFileType
(
DatasetConstant
.
DOT
+
fileType
,
dataList
,
datasetOutput
);
double
sizeInMb
=
(
double
)
resource
.
contentLength
()
/
1024
;
}
DecimalFormat
df
=
new
DecimalFormat
(
"#.##"
);
}
catch
(
IOException
ex
){
datasetOutput
.
setFileSize
(
df
.
format
(
sizeInMb
)
+
DatasetConstant
.
KB
);
log
.
error
(
"上传文件错误信息为:"
,
ex
);
}
return
fileName
;
}
/**
* 返回生成的minio文件地址
* @param fileType 文件类型
* @param dataList 数据集集合列表
*/
private
String
exportFileByFileType
(
String
fileType
,
List
<
DatasetData
>
dataList
,
DatasetOutput
datasetOutput
)
throws
IOException
{
String
fileName
=
null
;
switch
(
fileType
)
{
case
DatasetConstant
.
JSONL_TYPE
:
fileName
=
exportJsonlFileType
(
fileType
,
dataList
,
datasetOutput
);
break
;
case
DatasetConstant
.
TXT_TYPE
:
fileName
=
exportTxtFileType
(
fileType
,
dataList
,
datasetOutput
);
break
;
case
DatasetConstant
.
XLS_TYPE
:
case
DatasetConstant
.
XLSX_TYPE
:
fileName
=
exportXlsxFileType
(
fileType
,
dataList
,
datasetOutput
);
break
;
case
DatasetConstant
.
CSV_TYPE
:
fileName
=
exportCsvFileType
(
fileType
,
dataList
,
datasetOutput
);
}
return
fileName
;
}
/**
* 导出jsonl文件格式的数据
* @param fileType 文件类型
* @param inputStream 流对象
* @return 返回minio文件地址
* @throws IOException 异常
*/
private
String
saveMinioInputStream
(
String
fileType
,
InputStream
inputStream
){
String
fileName
=
null
;
UploadStoreTypeEnum
uploadStoreType
=
UploadStoreTypeEnum
.
values
()[
DatasetConstant
.
UPLOAD
];
UploadStoreTypeEnum
uploadStoreType
=
UploadStoreTypeEnum
.
values
()[
DatasetConstant
.
UPLOAD
];
BaseUpDownloader
upDownloader
=
upDownloaderFactory
.
get
(
uploadStoreType
);
BaseUpDownloader
upDownloader
=
upDownloaderFactory
.
get
(
uploadStoreType
);
UploadResponseInfo
responseInfo
=
upDownloader
.
doUpload
(
resource
.
getInputStream
()
,
UploadResponseInfo
responseInfo
=
upDownloader
.
doUpload
(
inputStream
,
DatasetConstant
.
FILE_NAME
+
fileType
);
DatasetConstant
.
TEMP_
FILE_NAME
+
fileType
);
if
(!
Boolean
.
TRUE
.
equals
(
responseInfo
.
getUploadFailed
()))
{
if
(!
Boolean
.
TRUE
.
equals
(
responseInfo
.
getUploadFailed
()))
{
fileName
=
buildDownloadUrl
(
responseInfo
.
getFilename
());
fileName
=
buildDownloadUrl
(
responseInfo
.
getFilename
());
}
}
return
fileName
;
}
/**
* 导出Txt文件格式的数据
* @param fileType 文件类型
* @param dataList 资源对象
* @param datasetOutput 输出对象
* @return 返回minio文件地址
* @throws IOException 异常
*/
private
String
exportTxtFileType
(
String
fileType
,
List
<
DatasetData
>
dataList
,
DatasetOutput
datasetOutput
){
String
fileName
=
null
;
InputStream
inputStream
=
null
;
try
(
ByteArrayOutputStream
baos
=
new
ByteArrayOutputStream
())
{
// 添加数据
ObjectMapper
objectMapper
=
new
ObjectMapper
();
StringBuilder
txtData
=
new
StringBuilder
();
for
(
DatasetData
datasetData
:
dataList
)
{
JsonNode
rootNode
=
objectMapper
.
readTree
(
datasetData
.
getData
());
String
output
=
rootNode
.
get
(
DatasetConstant
.
OUTPUT
).
textValue
();
String
instruction
=
rootNode
.
get
(
DatasetConstant
.
INSTRUCTION
).
textValue
();
txtData
.
append
(
instruction
).
append
(
"[["
).
append
(
output
).
append
(
"]],\n"
);
}
String
text
=
txtData
.
substring
(
0
,
txtData
.
lastIndexOf
(
","
)
);
baos
.
write
(
text
.
getBytes
(
StandardCharsets
.
UTF_8
));
inputStream
=
dealWithFileInputStream
(
baos
,
datasetOutput
);
fileName
=
saveMinioInputStream
(
fileType
,
inputStream
);
}
catch
(
IOException
e
)
{
log
.
error
(
"生成txt文件格式错误,请重新导出或者联系管理员:"
,
e
);
}
finally
{
try
{
if
(
null
!=
inputStream
)
{
inputStream
.
close
();
}
}
catch
(
IOException
ex
){
}
catch
(
IOException
ex
){
log
.
error
(
"上传文件错误信息为:"
,
ex
);
log
.
error
(
"写入txt文件时关闭流异常,请重新操作"
,
ex
);
}
}
return
fileName
;
}
/**
* 导出Jsonl文件格式的数据
* @param fileType 文件类型
* @param dataList 资源对象
* @param datasetOutput 输出对象
* @return 返回minio文件地址
* @throws IOException 异常
*/
private
String
exportJsonlFileType
(
String
fileType
,
List
<
DatasetData
>
dataList
,
DatasetOutput
datasetOutput
)
{
String
fileName
=
null
;
InputStream
inputStream
=
null
;
try
(
ByteArrayOutputStream
baos
=
new
ByteArrayOutputStream
())
{
// 定义开始符号
baos
.
write
(
"["
.
getBytes
(
StandardCharsets
.
UTF_8
));
// 遍历数据列表,为每个DatasetData对象生成JSONL字符串并写入ByteArrayOutputStream
for
(
int
i
=
0
;
i
<
dataList
.
size
();
i
++)
{
DatasetData
datasetData
=
dataList
.
get
(
i
);
// 解析JSON字符串为Map对象
ObjectMapper
objectMapper
=
new
ObjectMapper
();
Map
<
String
,
Object
>
dataMap
=
objectMapper
.
readValue
(
datasetData
.
getData
(),
new
TypeReference
<
Map
<
String
,
Object
>>()
{});
// 提取"output"和"instruction"字段的值
String
output
=
(
String
)
dataMap
.
get
(
DatasetConstant
.
OUTPUT
);
String
instruction
=
(
String
)
dataMap
.
get
(
DatasetConstant
.
INSTRUCTION
);
// 创建一个新的HashMap,只包含"output"和"instruction"字段
Map
<
String
,
String
>
map
=
new
HashMap
<>();
map
.
put
(
DatasetConstant
.
OUTPUT
,
output
);
map
.
put
(
DatasetConstant
.
INSTRUCTION
,
instruction
);
// 使用pretty printer来格式化JSON字符串
String
prettyJsonStr
=
objectMapper
.
writerWithDefaultPrettyPrinter
().
writeValueAsString
(
map
);
// 写入JSONL格式的字符串(每行一个JSON对象)
baos
.
write
(
prettyJsonStr
.
getBytes
(
StandardCharsets
.
UTF_8
));
// 如果不是最后一组数据,则添加换行符
if
(
i
<
dataList
.
size
()
-
1
)
{
baos
.
write
(
",\n"
.
getBytes
(
StandardCharsets
.
UTF_8
));
}
}
baos
.
write
(
"]"
.
getBytes
(
StandardCharsets
.
UTF_8
));
// 无需定义结束符号,因为JSONL文件本身是一系列的JSON对象,每个对象占一行
inputStream
=
dealWithFileInputStream
(
baos
,
datasetOutput
);
fileName
=
saveMinioInputStream
(
fileType
,
inputStream
);
}
catch
(
IOException
e
)
{
log
.
error
(
"生成Jsonl文件格式错误,请重新导出或者联系管理员:"
,
e
);
}
finally
{
try
{
if
(
null
!=
inputStream
)
{
inputStream
.
close
();
}
}
catch
(
IOException
ex
)
{
log
.
error
(
"关闭Jsonl流文件异常,请重新操作"
,
ex
);
}
}
return
fileName
;
}
/**
* 处理文件流文件数据
* @param baos 输出流
* @param datasetOutput 导出对象
* @return 返回输入流
* @throws IOException 异常
*/
private
InputStream
dealWithFileInputStream
(
ByteArrayOutputStream
baos
,
DatasetOutput
datasetOutput
)
{
InputStream
inputStream
=
new
ByteArrayInputStream
(
baos
.
toByteArray
());
double
sizeInMb
=
(
double
)
baos
.
toByteArray
().
length
/
1024
;
DecimalFormat
df
=
new
DecimalFormat
(
"#.##"
);
datasetOutput
.
setFileSize
(
df
.
format
(
sizeInMb
)
+
DatasetConstant
.
KB
);
return
inputStream
;
}
/**
* 导出Xlsx文件格式的数据
* @param fileType 文件类型
* @param dataList 资源对象
* @param datasetOutput 输出对象
* @return 返回minio文件地址
* @throws IOException 异常
*/
private
String
exportXlsxFileType
(
String
fileType
,
List
<
DatasetData
>
dataList
,
DatasetOutput
datasetOutput
)
throws
IOException
{
// 创建一个新的XLSX工作簿
XSSFWorkbook
workbook
=
new
XSSFWorkbook
();
// 创建一个工作表
XSSFSheet
sheet
=
workbook
.
createSheet
(
DatasetConstant
.
SHEET
);
// 构建表头数据
Row
row
=
sheet
.
createRow
(
0
);
Cell
cell
=
row
.
createCell
(
0
);
cell
.
setCellValue
(
DatasetConstant
.
INSTRUCTION
);
cell
=
row
.
createCell
(
1
);
cell
.
setCellValue
(
DatasetConstant
.
OUTPUT
);
// 添加数据
ObjectMapper
objectMapper
=
new
ObjectMapper
();
for
(
int
i
=
0
;
i
<
dataList
.
size
();
i
++)
{
row
=
sheet
.
createRow
(
i
+
1
);
cell
=
row
.
createCell
(
0
);
JsonNode
rootNode
=
objectMapper
.
readTree
(
dataList
.
get
(
i
).
getData
());
cell
.
setCellValue
(
rootNode
.
get
(
DatasetConstant
.
INSTRUCTION
).
textValue
());
cell
=
row
.
createCell
(
1
);
cell
.
setCellValue
(
rootNode
.
get
(
DatasetConstant
.
OUTPUT
).
textValue
());
}
String
fileName
=
null
;
InputStream
inputStream
=
null
;
try
(
ByteArrayOutputStream
baos
=
new
ByteArrayOutputStream
())
{
workbook
.
write
(
baos
);
inputStream
=
dealWithFileInputStream
(
baos
,
datasetOutput
);
fileName
=
saveMinioInputStream
(
fileType
,
inputStream
);
}
catch
(
IOException
e
)
{
log
.
error
(
"生成Xlsx文件格式错误,请重新导出或者联系管理员:"
,
e
);
}
finally
{
try
{
// 关闭工作簿
if
(
workbook
!=
null
)
{
workbook
.
close
();
}
if
(
null
!=
inputStream
)
{
inputStream
.
close
();
}
}
catch
(
IOException
ex
){
log
.
error
(
"关闭流异常,请重新操作"
,
ex
);
}
}
return
fileName
;
}
/**
* 导出Csv文件格式的数据
* @param fileType 文件类型
* @param dataList 数据集集合
* @param datasetOutput 输出对象
* @return 返回minio文件地址
* @throws IOException 异常
*/
private
String
exportCsvFileType
(
String
fileType
,
List
<
DatasetData
>
dataList
,
DatasetOutput
datasetOutput
)
{
String
fileName
=
null
;
InputStream
inputStream
=
null
;
try
(
ByteArrayOutputStream
baos
=
new
ByteArrayOutputStream
()){
PrintWriter
pw
=
new
PrintWriter
(
baos
);
// 构建表头数据
pw
.
append
(
DatasetConstant
.
INSTRUCTION
).
append
(
","
).
append
(
DatasetConstant
.
OUTPUT
).
append
(
"\n"
);
ObjectMapper
objectMapper
=
new
ObjectMapper
();
for
(
DatasetData
datasetData
:
dataList
)
{
JsonNode
rootNode
=
objectMapper
.
readTree
(
datasetData
.
getData
());
String
instruction
=
rootNode
.
get
(
DatasetConstant
.
INSTRUCTION
).
textValue
();
String
output
=
rootNode
.
get
(
DatasetConstant
.
OUTPUT
).
textValue
();
// 对包含换行符的字符串进行处理
instruction
=
instruction
.
replace
(
"\n"
,
""
);
output
=
output
.
replace
(
"\n"
,
""
);
pw
.
append
(
output
).
append
(
","
).
append
(
instruction
).
append
(
"\n"
);
}
pw
.
flush
();
// 转换为InputStream
inputStream
=
dealWithFileInputStream
(
baos
,
datasetOutput
);
// 假设saveMinioInputStream方法能够正确保存文件并返回文件名
fileName
=
saveMinioInputStream
(
fileType
,
inputStream
);
}
catch
(
IOException
ex
)
{
log
.
error
(
"生成CSV文件格式错误,请重新导出或者联系管理员: "
,
ex
);
}
finally
{
try
{
if
(
null
!=
inputStream
)
{
inputStream
.
close
();
}
}
catch
(
IOException
ex
){
log
.
error
(
"关闭CSV文件流异常,请重新操作"
,
ex
);
}
}
}
return
fileName
;
return
fileName
;
}
}
/**
/**
* 构建文件下载链接
* 构建文件下载链接
* @param fileUrl
* @param fileUrl
文件地址
* @return
* @return
构建的URL地址
*/
*/
private
String
buildDownloadUrl
(
String
fileUrl
)
{
private
String
buildDownloadUrl
(
String
fileUrl
)
{
StringBuilder
sb
=
new
StringBuilder
();
StringBuilder
sb
=
new
StringBuilder
();
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/service/impl/DatasetVersionServiceImpl.java
View file @
e1d2a7fc
...
@@ -10,6 +10,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
...
@@ -10,6 +10,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import
com.fasterxml.jackson.databind.SerializationFeature
;
import
com.fasterxml.jackson.databind.SerializationFeature
;
import
com.fasterxml.jackson.databind.node.ObjectNode
;
import
com.fasterxml.jackson.databind.node.ObjectNode
;
import
com.github.pagehelper.Page
;
import
com.github.pagehelper.Page
;
import
com.opencsv.CSVReader
;
import
com.opencsv.CSVReaderBuilder
;
import
com.yice.common.core.base.dao.BaseDaoMapper
;
import
com.yice.common.core.base.dao.BaseDaoMapper
;
import
com.yice.common.core.base.service.BaseService
;
import
com.yice.common.core.base.service.BaseService
;
import
com.yice.common.core.object.CallResult
;
import
com.yice.common.core.object.CallResult
;
...
@@ -27,21 +29,29 @@ import com.yice.webadmin.app.service.DatasetCleanService;
...
@@ -27,21 +29,29 @@ import com.yice.webadmin.app.service.DatasetCleanService;
import
com.yice.webadmin.app.service.DatasetDataService
;
import
com.yice.webadmin.app.service.DatasetDataService
;
import
com.yice.webadmin.app.service.DatasetManageService
;
import
com.yice.webadmin.app.service.DatasetManageService
;
import
com.yice.webadmin.app.service.DatasetVersionService
;
import
com.yice.webadmin.app.service.DatasetVersionService
;
import
com.yice.webadmin.app.util.JsonUtils
;
import
com.yice.webadmin.app.util.Sha1Util
;
import
com.yice.webadmin.app.util.Sha1Util
;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.poi.ss.usermodel.Cell
;
import
org.apache.poi.ss.usermodel.Row
;
import
org.apache.poi.ss.usermodel.Sheet
;
import
org.apache.poi.ss.usermodel.Workbook
;
import
org.apache.poi.xssf.usermodel.XSSFWorkbook
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.stereotype.Service
;
import
org.springframework.stereotype.Service
;
import
org.springframework.transaction.annotation.Transactional
;
import
org.springframework.transaction.annotation.Transactional
;
import
org.springframework.web.multipart.MultipartFile
;
import
java.io.File
;
import
java.io.*
;
import
java.io.IOException
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.charset.StandardCharsets
;
import
java.nio.file.Files
;
import
java.nio.file.Files
;
import
java.nio.file.Path
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.nio.file.Paths
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
/**
/**
* 数据集版本数据操作服务类。
* 数据集版本数据操作服务类。
...
@@ -136,6 +146,227 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
...
@@ -136,6 +146,227 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
}
}
}
}
/**
* 导入文件
* 支持Jsonl、Excel、Txt、Cvs文件格式导入
* @param importFile 文件对象。
* @param versionId 版本标识。
*/
@Override
public
void
importFile
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
this
.
readFileDealWithWrite
(
importFile
,
versionId
);
DatasetVersion
datasetVersion
=
this
.
getById
(
versionId
);
datasetVersion
.
setInputStatus
(
DatasetConstant
.
INPUT_STATUS
);
datasetVersion
.
setDataVolume
(
datasetDataService
.
count
(
versionId
));
this
.
updateById
(
datasetVersion
);
}
/**
* 将Txt文件类型写入到mongodb中。
*
* @param txtFile 导入的文件。
* @param versionId 版本标识。
* @return 保存的本地文件名。
*/
private
void
readTxtFormatFile
(
MultipartFile
txtFile
,
Long
versionId
)
throws
IOException
{
// 使用BufferedReader读取文件内容
BufferedReader
bufferedReader
=
new
BufferedReader
(
new
InputStreamReader
(
txtFile
.
getInputStream
(),
StandardCharsets
.
UTF_8
));
// 初始化JSON字符串
StringBuilder
jsonBuilder
=
new
StringBuilder
(
"["
);
// 逐行读取文件内容
String
line
;
while
((
line
=
bufferedReader
.
readLine
())
!=
null
)
{
// 移除行首尾的空白字符
line
=
line
.
trim
();
// 检查行是否为空
if
(
line
.
isEmpty
())
{
continue
;
}
// 解析每一行的数据 假设格式是: 文本内容 [[摘要内容]]
int
startIndex
=
line
.
indexOf
(
"[["
);
int
endIndex
=
line
.
lastIndexOf
(
"]]"
);
// 检查摘要格式是否正确
if
(
startIndex
!=
-
1
&&
endIndex
!=
-
1
&&
endIndex
>
startIndex
)
{
// 提取INSTRUCTION(新闻内容)和OUTPUT(摘要内容)
String
instruction
=
line
.
substring
(
0
,
startIndex
).
trim
();
// 摘要内容,并去除可能的转义引号
String
output
=
line
.
substring
(
startIndex
+
2
,
endIndex
).
trim
().
replaceAll
(
"\\\\\""
,
"\""
);
// 创建一个新的对象
Map
<
String
,
Object
>
rowData
=
new
HashMap
<>();
// 将这一行的数据添加到Map中
rowData
.
put
(
DatasetConstant
.
INSTRUCTION
,
instruction
);
rowData
.
put
(
DatasetConstant
.
OUTPUT
,
output
);
rowData
.
put
(
DatasetConstant
.
INPUT
,
""
);
// 将这一行的数据转换为一个JSON对象
String
rowJson
=
JsonUtils
.
toJson
(
rowData
);
// 如果不是第一行,则在JSON字符串中添加逗号
if
(
jsonBuilder
.
length
()
>
1
)
{
jsonBuilder
.
append
(
","
);
}
// 将这一行的JSON对象添加到JSON字符串中
jsonBuilder
.
append
(
rowJson
);
}
else
{
// 如果格式不正确,可以记录日志或抛出异常
System
.
err
.
println
(
"Invalid line format: "
+
line
);
}
}
jsonBuilder
.
append
(
"]"
);
// 处理mongodb中的数据
dealWithMongoData
(
versionId
,
jsonBuilder
.
toString
());
}
/**
* 将Jsonl写入到mongodb中。
*
* @param importFile 导入的文件。
* @return 保存的本地文件名。
*/
private
void
readJsonlFormatFile
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
try
{
byte
[]
bytes
=
importFile
.
getBytes
();
this
.
writeDatasetFile
(
bytes
,
importFile
.
getOriginalFilename
(),
versionId
);
}
catch
(
IOException
e
)
{
log
.
error
(
"Failed to jsonl file type imported file ["
+
importFile
.
getOriginalFilename
()
+
" ]."
,
e
);
throw
e
;
}
}
/**
* 将Cvs文件写入到mongodb中。
*
* @param importFile 导入的文件。
* @return 保存的本地文件名。
*/
private
void
readCsvFormatFile
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
// 使用OpenCSV创建CSVReader
Reader
reader
=
new
InputStreamReader
(
importFile
.
getInputStream
(),
StandardCharsets
.
UTF_8
);
CSVReader
csvReader
=
new
CSVReaderBuilder
(
reader
).
withSkipLines
(
1
).
build
();
// 初始化JSON字符串
StringBuilder
jsonBuilder
=
new
StringBuilder
(
"["
);
// 读取每一行的数据
String
[]
line
;
while
((
line
=
csvReader
.
readNext
())
!=
null
)
{
// 创建一个新的对象
Map
<
String
,
Object
>
rowData
=
new
HashMap
<>();
// 将这一行的数据添加到Map中
rowData
.
put
(
DatasetConstant
.
OUTPUT
,
line
[
1
]);
rowData
.
put
(
DatasetConstant
.
INPUT
,
""
);
rowData
.
put
(
DatasetConstant
.
INSTRUCTION
,
line
[
0
]);
// 将这一行的数据转换为一个JSON对象
String
rowJson
=
JsonUtils
.
toJson
(
rowData
);
// 如果不是第一行,则在JSON字符串中添加逗号
if
(
jsonBuilder
.
length
()
>
1
)
{
jsonBuilder
.
append
(
","
);
}
// 将这一行的JSON对象添加到JSON字符串中
jsonBuilder
.
append
(
rowJson
);
}
jsonBuilder
.
append
(
"]"
);
// 处理mongodb中的数据
dealWithMongoData
(
versionId
,
jsonBuilder
.
toString
());
}
/**
* 将Excel文件写入到mongodb中。
*
* @param importFile 导入的文件。
* @param versionId 版本标识。
* @return 保存的本地文件名。
*/
private
void
readExcelFormatFile
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
// 使用Apache POI创建Workbook
try
(
InputStream
inputStream
=
importFile
.
getInputStream
())
{
Workbook
workbook
=
new
XSSFWorkbook
(
inputStream
);
// 假设读取第一个sheet
Sheet
sheet
=
workbook
.
getSheetAt
(
0
);
// 初始化JSON字符串
StringBuilder
jsonBuilder
=
new
StringBuilder
(
"["
);
// 读取每一行的数据
for
(
Row
row
:
sheet
)
{
if
(
row
.
getRowNum
()
==
0
)
{
// 如果是标题行,可以跳过
continue
;
}
// 创建一个新的对象
Map
<
String
,
Object
>
rowData
=
new
HashMap
<>();
// 假设第一列是INSTRUCTION,第二列是OUTPUT
Cell
instructionCell
=
row
.
getCell
(
0
);
Cell
outputCell
=
row
.
getCell
(
1
);
// 处理可能为空的单元格
String
instruction
=
(
instructionCell
!=
null
)
?
JsonUtils
.
getCellValueAsString
(
instructionCell
)
:
""
;
String
output
=
(
outputCell
!=
null
)
?
JsonUtils
.
getCellValueAsString
(
outputCell
)
:
""
;
// 将这一行的数据添加到Map中
rowData
.
put
(
DatasetConstant
.
INSTRUCTION
,
instruction
);
rowData
.
put
(
DatasetConstant
.
OUTPUT
,
output
);
rowData
.
put
(
DatasetConstant
.
INPUT
,
""
);
// 将这一行的数据转换为一个JSON对象
String
rowJson
=
JsonUtils
.
toJson
(
rowData
);
// 如果不是第一行数据,则在JSON字符串中添加逗号
if
(
jsonBuilder
.
length
()
>
1
)
{
jsonBuilder
.
append
(
","
);
}
// 将这一行的JSON对象添加到JSON字符串中
jsonBuilder
.
append
(
rowJson
);
}
jsonBuilder
.
append
(
"]"
);
// 处理mongodb中的数据
dealWithMongoData
(
versionId
,
jsonBuilder
.
toString
());
}
}
/**
* 判断文件类型,支持Excel、Cvs、Json、Txt文件格式类型写入
* 如果是excel或者cvs要进行解析
* @param importFile 文件对象
* @param versionId 版本标识
*/
private
void
readFileDealWithWrite
(
MultipartFile
importFile
,
Long
versionId
)
throws
IOException
{
String
contentType
=
importFile
.
getOriginalFilename
().
toLowerCase
();
if
(
contentType
.
indexOf
(
DatasetConstant
.
XLS_TYPE
)
>
0
||
contentType
.
indexOf
(
DatasetConstant
.
XLSX_TYPE
)
>
0
)
{
// 如果文件类型为excel,则进行相应的处理
this
.
readExcelFormatFile
(
importFile
,
versionId
);
}
else
if
(
contentType
.
indexOf
(
DatasetConstant
.
CSV_TYPE
)
>
0
)
{
// 如果文件类型为csv,则进行相应的处理
this
.
readCsvFormatFile
(
importFile
,
versionId
);
}
else
if
(
contentType
.
indexOf
(
DatasetConstant
.
TXT_TYPE
)
>
0
){
// 如果文件类型为txt,则进行相应的处理
this
.
readTxtFormatFile
(
importFile
,
versionId
);
}
else
{
// 如果文件类型为jsonl,则进行相应的处理
this
.
readJsonlFormatFile
(
importFile
,
versionId
);
}
}
/**
/**
* 进行存储、写入以及更新配置等操作
* 进行存储、写入以及更新配置等操作
* @param datasetId 数据集标识
* @param datasetId 数据集标识
...
@@ -261,13 +492,23 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
...
@@ -261,13 +492,23 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
try
{
try
{
// 或者指定字符集进行转换,替换"UTF-8"为你想要使用的字符集
// 或者指定字符集进行转换,替换"UTF-8"为你想要使用的字符集
String
result
=
new
String
(
bytes
,
StandardCharsets
.
UTF_8
);
String
result
=
new
String
(
bytes
,
StandardCharsets
.
UTF_8
);
// 处理mongodb中的数据
dealWithMongoData
(
versionId
,
result
);
}
catch
(
Exception
ex
)
{
log
.
error
(
"Failed to write mongodb database ["
+
originalFilename
+
" ]."
,
ex
);
}
}
/**
* 处理mongodb中的数据
* @param versionId 版本标识
* @param result 数据
*/
private
void
dealWithMongoData
(
Long
versionId
,
String
result
)
{
//先删除数据集
//先删除数据集
datasetDataService
.
delete
(
versionId
);
datasetDataService
.
delete
(
versionId
);
//保存到mongodb中
//保存到mongodb中
datasetDataService
.
save
(
new
DatasetData
(
null
,
versionId
,
result
,
new
Date
(),
null
));
datasetDataService
.
save
(
new
DatasetData
(
null
,
versionId
,
result
,
new
Date
(),
null
));
}
catch
(
Exception
ex
)
{
log
.
error
(
"Failed to write mongodb database ["
+
originalFilename
+
" ]."
,
ex
);
}
}
}
/**
/**
...
@@ -339,7 +580,8 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
...
@@ -339,7 +580,8 @@ public class DatasetVersionServiceImpl extends BaseService<DatasetVersion, Long>
* @param datasetId 清洗任务id
* @param datasetId 清洗任务id
* @return 清洗列表
* @return 清洗列表
*/
*/
private
void
doDealTaskHandler
(
Long
datasetId
,
String
fileUrl
)
{
@Override
public
void
doDealTaskHandler
(
Long
datasetId
,
String
fileUrl
)
{
try
{
try
{
Integer
index
=
0
;
Integer
index
=
0
;
Long
count
=
datasetDataService
.
count
(
datasetId
);
Long
count
=
datasetDataService
.
count
(
datasetId
);
...
...
application-webadmin/src/main/java/com/yice/webadmin/app/util/JsonUtils.java
0 → 100644
View file @
e1d2a7fc
package
com
.
yice
.
webadmin
.
app
.
util
;
import
com.fasterxml.jackson.core.JsonProcessingException
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
org.apache.poi.ss.usermodel.Cell
;
import
org.apache.poi.ss.usermodel.DateUtil
;
public
class
JsonUtils
{
/**
* 转成json格式数据
* @param object 将对象转成json格式数据
* @return 返回字符串
*/
public
static
String
toJson
(
Object
object
)
{
try
{
ObjectMapper
objectMapper
=
new
ObjectMapper
();
return
objectMapper
.
writeValueAsString
(
object
);
}
catch
(
JsonProcessingException
e
)
{
throw
new
RuntimeException
(
e
);
}
}
/**
* 读取单元格中的数据
* @param cell 单元格字段
* @return 返回读取单元格中的数据
*/
public
static
String
getCellValueAsString
(
Cell
cell
)
{
if
(
cell
==
null
)
{
return
""
;
}
switch
(
cell
.
getCellType
())
{
case
STRING:
return
cell
.
getStringCellValue
();
case
NUMERIC:
if
(
DateUtil
.
isCellDateFormatted
(
cell
))
{
return
cell
.
getDateCellValue
().
toString
();
}
else
{
return
Double
.
toString
(
cell
.
getNumericCellValue
());
}
case
BOOLEAN:
return
Boolean
.
toString
(
cell
.
getBooleanCellValue
());
case
FORMULA:
return
cell
.
getCellFormula
();
default
:
return
""
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment