逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号),其文件以纯文本形式存储表格数据(数字和文本)。纯文本意味着该文件是一个字符序列,不含必须像二进制数字那样被解读的数据。CSV文件由任意数目的记录组成,记录间以某种换行符分隔;每条记录由字段组成,字段间的分隔符是其它字符或字符串。

如果不想看过程分析,想直接看代码请跳到最后!!!

分隔符:

最常见的分隔符是逗号或制表符,但是由于项目中使用的是,所以以下案例以此字符演示。

思路:

  • 传入.csv文件集所在的目录,例如new File("C:\\Users\\brbai\\Desktop\\files")
  • 递归读取此目录下所有层级的.csv文件
  • 调用insertData(File file,int type)使用字符缓冲流读取首行的字段名,并将字符缓冲流传给readCSV使用
  • 按需求对读取的字符做转换,使其与数据库类型匹配,方便插入数据库。例如 : 首行为字段名,用,连接,其他行数据,普通字符串用''包裹,时间转换为相应格式,其他同理
  • readCSV(BufferedReader reader)每次读取1000行数据返回,批量插入数据库,插入成功后继续读取,每个.csv文件读取完后,关闭缓冲流
  • 依次读取每个文件,直至所有文件读取写入结束
    遍历文件夹,读取所有.csv文件,插入数据库
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
  /** 
* @Description: 遍历文件夹,读取文件插入数据库
* @Param: [f] 目录(文件夹)
* @return: void
*/
public void getFileName(File f) {

//获取当前目录的子目录对象的数组
File[] file = f.listFiles();
//遍历当前File数组(父目录的)
for (File file2 : file) {

String fileName = file2.getName();
if(!file2.isDirectory()){

long start = System.currentTimeMillis();

if(fileName.indexOf("soa_bank") != -1){
insertData(file2,SOA_BANK);
}else if(fileName.indexOf("soa_account") != -1){
insertData(file2,SOA_ACCOUNT);
}

log.info("文件{}导入完成,用时{}ms",file2.getPath(),(System.currentTimeMillis()-start));
}
//如果file2是一个文件夹则递归遍历子目录,否则结束当前目录遍历
if(file2.isDirectory() && !"today".equals(fileName)) {
getFileName(file2);
}
}
}
读取csv文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/** 
* @Description: 把数据从csv文件中读取到list
* @Param: [reader] 字符缓冲流
* @return: java.util.List 读取到的数据(一次读取1000行)
*/
public List readCSV(BufferedReader reader){
try {
ArrayList<String> list = new ArrayList<String>();
String line = null;
for(int i=0; i < 1000; i++) {
line = reader.readLine();

if(line == null){
break;
}

line = "'"+line.replaceAll("︴","','")+"'";

//匹配'2019-01-01'这种时间格式的正则表达式
String regex1 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[']";
Pattern pattern1 = Pattern.compile(regex1);
Matcher m1 = pattern1.matcher(line);
line = line.replaceAll(regex1 ,"︴");
//替换时间格式
while(m1.find()){
line = line.replaceFirst("︴"," to_date ( "+m1.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}

//匹配'2019-01-01 12:12:06'这种时间格式的正则表达式
String regex2 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}[:][0-9]{1,2}[']";
Pattern pattern2 = Pattern.compile(regex2);
Matcher m2 = pattern2.matcher(line);
line = line.replaceAll(regex2 ,"︴");
//替换时间格式
while(m2.find()){
line = line.replaceFirst("︴"," to_date ( "+m2.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}

list.add(line);
}
return list;
} catch (Exception e) {
log.error("文件:{}读取信息出错");
try {
reader.close();
} catch (IOException ex) {
ex.printStackTrace();
}
e.printStackTrace();
}
return null;
}

批量插入数据库
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  /** 
* @Description: 批量插入到数据库
* @Param: [file, type] file:文件,type:数据库对应表
* @return: void
*/
public void insertData(File file,int type) {

String head = null;
List<String> list = null;
BufferedReader reader = null;
//读取第一行的标题信息
try {
reader = new BufferedReader(new FileReader(file));
head = reader.readLine();

if(head == null){
log.info("文件{}内容为空!!!",file.getPath());
return;
} else {
head = head.replaceAll("︴",",");
}

while ((list = readCSV(reader)).size() != 0){

switch (type){
case 1:{
bankInfoMapper.insertListData(head,list);
}
break;
case 2:{
accountMapper.insertListData(head,list);
}
break;
}

log.info("文件{}导入中--- +{}行",file.getPath(),list.size());
}
} catch (IOException e) {
log.info("***文件{}导入数据库出现异常***",file.getPath());
e.printStackTrace();
}finally {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Mapper接口(AccountMapper为例)
1
2
3
public interface AccountMapper extends BaseMapper<Account> {
void insertListData(@Param("head") String head,@Param("list") List<String> list);
}
批量插入(Oracle数据库)(对应Mapper接口的xml文件,例)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.xxx.spmacore.soa.mapper.AccountMapper">
<insert id="insertListData" useGeneratedKeys="false">
insert into T_ACCOUNT ( ${head} )
<foreach collection="list" separator ="UNION ALL" item="item">
(
SELECT
${item}
FROM DUAL
)
</foreach>
</insert>
</mapper>

读取数据库表结构

当csv文件中时间为null,或数据字段不含全部的表字段时,可能需要查询数据库各字段属性与其对应做相关处理,此处只提供数据库表结构的读取,相关处理按实际业务情况。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
//注入SqlSessionFactory
@Autowired
private SqlSessionFactory sqlSessionFactory;

/**
* @Description: 读取数据库表结构
* @Param: [tableName]
* @return: java.util.Map<java.lang.String,java.util.Map<java.lang.String,java.lang.String>>
*/
public Map<String,Map<String, String>> getDatabaseAttribute(String tableName) {

SqlSession sqlSession = sqlSessionFactory.openSession();
Connection connection = sqlSession.getConnection();

Map<String,Map<String, String>> database = new HashMap<>();

DatabaseMetaData metaData = null;
try {
metaData = connection.getMetaData();

ResultSet columns = metaData
.getColumns(null, "DEVTEST", tableName, "%");

Map<String,String> attributes = new HashMap<>();
while (columns.next()){
attributes.put(columns.getString("COLUMN_NAME"),columns.getString("TYPE_NAME"));
}

database.put(tableName,attributes);
} catch (SQLException e) {
e.printStackTrace();
}finally {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
return database;
}


完整工具类 CSVUtils

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
package com.xxx.spmacore.common.utils;

import com.xxx.spmacore.soa.mapper.AccountMapper;
import com.xxx.spmacore.soa.mapper.BankInfoMapper;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

/**
* @author: brbai
* @create: 2019-07-09 11:29:41
* @description:
*/
@Component
@Slf4j
public class CSVUtils {

@Autowired
private BankInfoMapper bankInfoMapper;

@Autowired
private AccountMapper accountMapper;

private static final int SOA_BANK = 1;
private static final int SOA_ACCOUNT = 2;

/**
* @Description: 遍历文件夹,读取文件插入数据库
* @Param: [f] 目录(文件夹)
* @return: void
*/
public void getFileName(File f) {

//获取当前目录的子目录对象的数组
File[] file = f.listFiles();
//遍历当前File数组(父目录的)
for (File file2 : file) {

String fileName = file2.getName();
if(!file2.isDirectory()){

long start = System.currentTimeMillis();

if(fileName.indexOf("soa_bank") != -1){
insertData(file2,SOA_BANK);
}else if(fileName.indexOf("soa_account") != -1){
insertData(file2,SOA_ACCOUNT);
}
log.info("文件{}导入完成,用时{}ms",file2.getPath(),(System.currentTimeMillis()-start));
}
//如果file2是一个文件夹则递归遍历子目录,否则结束当前目录遍历
if(file2.isDirectory()) {
getFileName(file2);
}
}
}

/**
* @Description: 把数据从csv文件中读取到list
* @Param: [reader] 字符缓冲流
* @return: java.util.List 读取到的数据(一次读取1000行)
*/
public List readCSV(BufferedReader reader){
try {
ArrayList<String> list = new ArrayList<String>();
String line = null;
for(int i=0; i < 1000; i++) {
line = reader.readLine();

if(line == null){
break;
}

line = "'"+line.replaceAll("︴","','")+"'";

//'xxxx-xx-xx'正则表达式
String regex1 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[']";
Pattern pattern1 = Pattern.compile(regex1);
Matcher m1 = pattern1.matcher(line);
line = line.replaceAll(regex1 ,"︴");
//替换时间格式
while(m1.find()){
line = line.replaceFirst("︴"," to_date ( "+m1.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}

//'xxxx-xx-xx xx:xx:xx'正则表达式
String regex2 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}[:][0-9]{1,2}[']";
Pattern pattern2 = Pattern.compile(regex2);
Matcher m2 = pattern2.matcher(line);
line = line.replaceAll(regex2 ,"︴");
//替换时间格式
while(m2.find()){
line = line.replaceFirst("︴"," to_date ( "+m2.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}

list.add(line);
}
return list;
} catch (Exception e) {
log.error("文件:{}读取信息出错");
try {
reader.close();
} catch (IOException ex) {
ex.printStackTrace();
}
e.printStackTrace();
}
return null;
}

/**
* @Description: 批量插入到数据库
* @Param: [file, type] file:文件,type:数据库对应表
* @return: void
*/
public void insertData(File file,int type) {


String head = null;
List<String> list = null;
BufferedReader reader = null;
//读取第一行的标题信息
try {
reader = new BufferedReader(new FileReader(file));
head = reader.readLine();

if(head == null){
log.info("文件{}内容为空!!!",file.getPath());
return;
} else {
head = head.replaceAll("︴",",");
}

while ((list = readCSV(reader)).size() != 0){

switch (type){
case 1:{
bankInfoMapper.insertListData(head,list);
}
break;
case 2:{
accountMapper.insertListData(head,list);
}
break;
}

log.info("文件{}导入中--- +{}行",file.getPath(),list.size());
}
} catch (IOException e) {
log.info("***文件{}导入数据库出现异常***",file.getPath());
e.printStackTrace();
}finally {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

}