对日期存储加了些许功能

This commit is contained in:
moon 2025-10-16 15:54:58 +08:00
parent 00ebd0e940
commit ce80016c72
7 changed files with 560 additions and 195 deletions

View File

@ -5,11 +5,17 @@ import com.bjtds.brichat.service.ExcelToMysqlService;
import com.bjtds.brichat.util.ResultUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.io.File;
import java.util.concurrent.TimeUnit;
@RestController
@Slf4j
@ -20,6 +26,9 @@ public class EexcelToMysqlController {
@Autowired
private ExcelToMysqlService excelToMysqlService;
@Autowired
private StringRedisTemplate stringRedisTemplate;
@PostMapping("excelToMysql")
public ResultUtils excelToMysql(@RequestParam("file") MultipartFile file) {
@ -28,8 +37,10 @@ public class EexcelToMysqlController {
if (filename.endsWith(".xls") || filename.endsWith(".xlsx")) {
// Excel 文件执行处理逻辑
try {
excelToMysqlService.importExcelToMysql(file);
return ResultUtils.success("已将文件导入到MySQL数据库");
// 生成唯一任务 ID
String taskId = UUID.randomUUID().toString();
excelToMysqlService.importExcelToMysql(file, taskId);
return ResultUtils.success(taskId,"导入任务开始,任务 ID: " + taskId);
} catch (Exception e) {
log.error("导入 Excel 文件到 MySQL 失败: {}", e.getMessage(), e);
return ResultUtils.error("导入 Excel 文件到 MySQL 失败");
@ -45,4 +56,18 @@ public class EexcelToMysqlController {
}
}
@GetMapping("checkTask")
public ResultUtils checkTask(@RequestParam("taskId") String taskId) {
Map<String,String> result = new HashMap<>();
String total = stringRedisTemplate.opsForValue().get("importing:" + taskId+":"+"total");
String status = stringRedisTemplate.opsForValue().get("importing:" + taskId+":"+"status");
String finished = stringRedisTemplate.opsForValue().get("importing:" + taskId+":"+"finished");
result.put("total", total);
result.put("status", status);
result.put("finished", finished);
if (status == null) {
return ResultUtils.error("任务 ID 不存在");
}
return ResultUtils.success(result);
}
}

View File

@ -5,5 +5,5 @@ import org.springframework.web.multipart.MultipartFile;
import java.io.File;
public interface ExcelToMysqlService {
void importExcelToMysql(MultipartFile file);
void importExcelToMysql(MultipartFile file, String taskId);
}

View File

@ -5,113 +5,163 @@ import com.bjtds.brichat.entity.exceltomysql.ColumnDefinition;
import com.bjtds.brichat.mapper.opengauss.ExcelToMysqlMapper;
import com.bjtds.brichat.service.ExcelToMysqlService;
import com.bjtds.brichat.util.ExcelColumnTypeUtil;
import com.bjtds.brichat.util.ExcelReader;
import com.bjtds.brichat.util.PinyinUtil;
import com.bjtds.brichat.util.SheetHandler;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.SheetVisibility;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* 实现 Excel 导入到 MySQL 的服务
*/
@Service
@Slf4j
@Async
public class ExcelToMysqlServiceImpl implements ExcelToMysqlService {
@Autowired
private ExcelToMysqlMapper excelToMysqlMapper;
@Autowired
private StringRedisTemplate stringRedisTemplate;
@Override
public void importExcelToMysql(MultipartFile file) {
Workbook workbook = null;
try (InputStream fis = file.getInputStream()) {
workbook = WorkbookFactory.create(fis);
} catch (IOException e) {
log.error("读取 Excel 文件失败: {}", e.getMessage(), e);
return;
public void importExcelToMysql(MultipartFile file, String taskId) {
try (OPCPackage pkg = OPCPackage.open(file.getInputStream())) {
// 获取 Excel 文件总 sheet 含隐藏表
int totalSheets;
List<Integer> visibleSheetIndexes = new ArrayList<>();
List<String> visibleSheetNames = new ArrayList<>();
try (InputStream fis = file.getInputStream();
Workbook workbook = WorkbookFactory.create(fis)) {
totalSheets = workbook.getNumberOfSheets();
log.info("📊 Excel 总 sheet 数(包含空/隐藏表): {}", totalSheets);
stringRedisTemplate.opsForValue().set("importing:" + taskId+":"+"status", "importing", 30, TimeUnit.MINUTES);
// 筛选可见 Sheet
for (int i = 0; i < totalSheets; i++) {
if (workbook.getSheetVisibility(i) == SheetVisibility.VISIBLE) {
visibleSheetIndexes.add(i);
visibleSheetNames.add(workbook.getSheetName(i));
} else {
log.info("🙈 跳过隐藏 Sheet [{}]", workbook.getSheetName(i));
}
}
int sheetCount = workbook.getNumberOfSheets();
if (sheetCount == 0) {
log.warn("Excel 文件无 sheet: {}", file.getOriginalFilename());
return;
log.info("📜 实际可读的 Sheet 数(排除隐藏表): {}", visibleSheetIndexes.size());
}
stringRedisTemplate.opsForValue().set("importing:" + taskId+":"+"total", String.valueOf(visibleSheetIndexes.size()), 30, TimeUnit.MINUTES);
// 遍历每个 sheet
for (int sheetIndex = 0; sheetIndex < sheetCount; sheetIndex++) {
List<Map<String, String>> rows;
try {
rows = ExcelReader.readExcel(file, sheetIndex);
} catch (IOException e) {
log.error("读取 sheet {} 失败: {}", sheetIndex, e.getMessage());
// 读取 Excel 流式数据
XSSFReader reader = new XSSFReader(pkg);
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
StylesTable styles = reader.getStylesTable();
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) reader.getSheetsData();
int sheetIndex = -1; // 可见 Sheet 索引计数
stringRedisTemplate.opsForValue().set("importing:" + taskId+":"+"finished", String.valueOf(sheetIndex + 1), 30, TimeUnit.MINUTES);
while (sheets.hasNext()) {
InputStream sheetStream = sheets.next();
sheetIndex++;
// 只处理可见 Sheet
if (sheetIndex >= visibleSheetIndexes.size()) {
sheetStream.close();
continue;
}
if (rows.isEmpty()) {
log.warn("Sheet {} 无数据,跳过", sheetIndex);
continue;
}
String sheetName = visibleSheetNames.get(sheetIndex);
log.info("🗂️ [{} / {}] 开始处理 Sheet{}", sheetIndex + 1, visibleSheetIndexes.size(), sheetName);
// 文件名去掉后缀转拼音
// 生成表名文件名 + sheet名 转拼音
String fileName = file.getOriginalFilename()
.replaceAll("\\.xlsx$|\\.xls$|\\.csv$", "");
fileName = PinyinUtil.toPinyin(fileName);
String tableName = PinyinUtil.toPinyin(fileName + "_" + sheetName);
if (tableName.matches("^[0-9].*")) tableName = "t_" + tableName;
// sheet名转拼音
String sheetName = workbook.getSheetAt(sheetIndex).getSheetName();
sheetName = sheetName.replaceAll("[^a-zA-Z0-9]", "_");
sheetName = PinyinUtil.toPinyin(sheetName);
// 解析前20行推断字段类型
Map<String, String> dbColumnTypes = ExcelColumnTypeUtil.getColumnDbTypes(file, visibleSheetIndexes.get(sheetIndex), 20);
// 合并成表名
String tableName = fileName + "_" + sheetName;
if (tableName.matches("^[0-9].*")) {
tableName = "t_" + tableName;
}
log.error("表名:{}", tableName);
Map<String, String> dbColumnTypes;
try {
dbColumnTypes = ExcelColumnTypeUtil.getColumnDbTypes(file, sheetIndex, 20);
} catch (Exception e) {
log.error("获取列类型失败: {}", e.getMessage());
if (dbColumnTypes == null || dbColumnTypes.isEmpty()) {
log.warn("⚠️ 跳过空 Sheet [{}](无有效数据)", sheetName);
sheetStream.close();
continue;
}
// 生成列定义
// 生成字段定义
List<ColumnDefinition> definitions = new ArrayList<>();
int colIdx = 0;
for (Map.Entry<String, String> entry : dbColumnTypes.entrySet()) {
ColumnDefinition def = new ColumnDefinition();
def.setName(PinyinUtil.toPinyin(entry.getKey())); // 中文列转拼音
def.setType(entry.getValue()); // 使用动态类型
// 列名转拼音
String colName = PinyinUtil.toPinyin(entry.getKey());
if (colName == null || colName.trim().isEmpty()) {
colName = "unnamed_col_" + (colIdx + 1);
}
// 处理特殊字符保留字统一小写
colName = colName.trim().replaceAll("[^a-zA-Z0-9_]", "_").toLowerCase();
def.setName(colName);
def.setType(entry.getValue());
def.setNullable(true);
definitions.add(def);
colIdx++;
}
// 创建表
// 创建数据
excelToMysqlMapper.createTable(tableName, definitions);
log.info("✅ 已创建表:{}", tableName);
log.info("✅ 已创建表 [{}],字段数{}", tableName, definitions.size());
// 插入数据
for (Map<String, String> map : rows) {
List<ColumData> dataList = new ArrayList<>();
for (Map.Entry<String, String> entry : map.entrySet()) {
ColumData data = new ColumData();
data.setColumName(PinyinUtil.toPinyin(entry.getKey()));
data.setColumValue(entry.getValue());
dataList.add(data);
}
excelToMysqlMapper.insertRow(tableName, dataList);
}
// 使用 SAX 解析插入数据
processSheet(styles, strings, sheetStream, tableName);
log.info("✅ 已导入 {} 行到表 {}", rows.size(), tableName);
}
sheetStream.close();
log.info("🎯 完成导入表 [{}]\n", tableName);
stringRedisTemplate.opsForValue().set("importing:" + taskId+":"+"finished", String.valueOf(sheetIndex + 1), 30, TimeUnit.MINUTES);
}
log.info("🎉 Excel 导入完成,共处理 {} 个有效 Sheet", visibleSheetIndexes.size());
stringRedisTemplate.opsForValue().set("importing:" + taskId+":"+"status", "done", 30, TimeUnit.MINUTES);
} catch (Exception e) {
log.error("❌ Excel 流式导入失败: {}", e.getMessage(), e);
}
}
/**
* 使用 SAX 模式流式解析 sheet
*/
private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetStream, String tableName)
throws Exception {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
SheetHandler handler = new SheetHandler(strings, tableName, excelToMysqlMapper);
parser.setContentHandler(handler);
parser.parse(new InputSource(sheetStream));
handler.flushBatch(); // 最后一批提交
}
}

View File

@ -2,26 +2,25 @@ package com.bjtds.brichat.util;
import org.apache.poi.ss.usermodel.*;
import org.springframework.web.multipart.MultipartFile;
import lombok.extern.slf4j.Slf4j;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* Excel列类型判断工具类并映射为数据库类型
* Excel列类型判断工具类
* 自动识别 STRING / INTEGER / DOUBLE / BOOLEAN / DATE / TIME / DATETIME
* 精确映射到数据库字段类型
*/
@Slf4j
public class ExcelColumnTypeUtil {
public enum ColumnType {
STRING, INTEGER, DOUBLE, BOOLEAN, DATE, UNKNOWN
STRING, INTEGER, DOUBLE, BOOLEAN, DATE, TIME, DATETIME, UNKNOWN
}
/**
* 获取列对应的数据库类型
* @param file Excel文件
* @param sheetIndex 第几个sheet
* @param sampleRows 用前几行判断
* @return Map<列名, 数据库列类型>
*/
/** 获取列对应的数据库类型 */
public static Map<String, String> getColumnDbTypes(MultipartFile file, int sheetIndex, int sampleRows) throws Exception {
Map<String, ColumnType> columnTypes = getColumnTypes(file, sheetIndex, sampleRows);
Map<String, String> dbTypes = new LinkedHashMap<>();
@ -31,29 +30,29 @@ public class ExcelColumnTypeUtil {
return dbTypes;
}
/**
* Excel列类型映射为数据库列类型
*/
/** Excel列类型映射为数据库列类型 */
private static String toDbType(ColumnType type) {
switch (type) {
case STRING:
return "TEXT";
case INTEGER:
return "BIGINT"; // OpenGauss/PostgreSQL推荐使用BIGINT
return "BIGINT"; // OpenGauss/PostgreSQL 推荐
case DOUBLE:
return "DOUBLE PRECISION";
case BOOLEAN:
return "BOOLEAN";
case DATE:
return "DATE";
case TIME:
return "TIME";
case DATETIME:
return "TIMESTAMP";
default:
return "TEXT";
}
}
/**
* 判断每一列的数据类型
*/
/** 判断每一列的数据类型 */
public static Map<String, ColumnType> getColumnTypes(MultipartFile file, int sheetIndex, int sampleRows) throws Exception {
Map<String, ColumnType> columnTypes = new LinkedHashMap<>();
@ -61,6 +60,7 @@ public class ExcelColumnTypeUtil {
Workbook workbook = WorkbookFactory.create(is)) {
int sheetCount = workbook.getNumberOfSheets();
log.info("📘 Sheet数量: {}", sheetCount);
if (sheetIndex >= sheetCount || sheetIndex < 0) {
throw new IllegalArgumentException("Sheet索引超出范围");
}
@ -71,7 +71,7 @@ public class ExcelColumnTypeUtil {
List<String> headers = new ArrayList<>();
for (Cell cell : headerRow) {
headers.add(cell.getStringCellValue().trim());
headers.add(getCellValue(cell));
}
for (String header : headers) {
@ -95,36 +95,137 @@ public class ExcelColumnTypeUtil {
return columnTypes;
}
/** 判断单元格类型 */
private static ColumnType detectCellType(Cell cell) {
if (cell == null || cell.getCellType() == CellType.BLANK) return ColumnType.UNKNOWN;
switch (cell.getCellType()) {
case STRING: return ColumnType.STRING;
case BOOLEAN: return ColumnType.BOOLEAN;
case STRING:
String val = cell.getStringCellValue().trim();
if (val.matches("\\d{1,2}:\\d{2}(:\\d{2})?")) return ColumnType.TIME;
if (val.matches("\\d{4}-\\d{2}-\\d{2}")) return ColumnType.DATE;
if (val.matches("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}(:\\d{2})?")) return ColumnType.DATETIME;
return ColumnType.STRING;
case BOOLEAN:
return ColumnType.BOOLEAN;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) return ColumnType.DATE;
double val = cell.getNumericCellValue();
return (val == Math.floor(val)) ? ColumnType.INTEGER : ColumnType.DOUBLE;
if (DateUtil.isCellDateFormatted(cell)) {
Date date = cell.getDateCellValue();
Calendar cal = Calendar.getInstance();
cal.setTime(date);
int year = cal.get(Calendar.YEAR);
int hour = cal.get(Calendar.HOUR_OF_DAY);
int minute = cal.get(Calendar.MINUTE);
int second = cal.get(Calendar.SECOND);
if ((year == 1900 || year == 1899) && (hour + minute + second > 0))
return ColumnType.TIME; // 仅时间
if (hour + minute + second > 0)
return ColumnType.DATETIME;
return ColumnType.DATE;
} else {
double valNum = cell.getNumericCellValue();
return (valNum == Math.floor(valNum)) ? ColumnType.INTEGER : ColumnType.DOUBLE;
}
case FORMULA:
try {
if (DateUtil.isCellDateFormatted(cell)) {
return detectCellTypeFromDate(cell.getDateCellValue());
}
double num = cell.getNumericCellValue();
return (num == Math.floor(num)) ? ColumnType.INTEGER : ColumnType.DOUBLE;
} catch (IllegalStateException e) {
return ColumnType.STRING;
}
default: return ColumnType.STRING;
default:
return ColumnType.STRING;
}
}
/** 根据日期内容判断是时间/日期/日期时间 */
private static ColumnType detectCellTypeFromDate(Date date) {
Calendar cal = Calendar.getInstance();
cal.setTime(date);
int year = cal.get(Calendar.YEAR);
int hour = cal.get(Calendar.HOUR_OF_DAY);
int minute = cal.get(Calendar.MINUTE);
int second = cal.get(Calendar.SECOND);
if ((year == 1900 || year == 1899) && (hour + minute + second > 0)) return ColumnType.TIME;
if (hour + minute + second > 0) return ColumnType.DATETIME;
return ColumnType.DATE;
}
/** 类型合并规则 */
private static ColumnType mergeTypes(ColumnType oldType, ColumnType newType) {
if (oldType == ColumnType.UNKNOWN) return newType;
if (oldType == newType) return oldType;
if (oldType == ColumnType.STRING || newType == ColumnType.STRING) return ColumnType.STRING;
if ((oldType == ColumnType.DOUBLE && newType == ColumnType.INTEGER) ||
(oldType == ColumnType.INTEGER && newType == ColumnType.DOUBLE)) return ColumnType.DOUBLE;
if ((oldType == ColumnType.DATE && (newType == ColumnType.INTEGER || newType == ColumnType.DOUBLE)) ||
(newType == ColumnType.DATE && (oldType == ColumnType.INTEGER || oldType == ColumnType.DOUBLE))) return ColumnType.STRING;
if (oldType == ColumnType.BOOLEAN || newType == ColumnType.BOOLEAN) return ColumnType.STRING;
if ((oldType == ColumnType.DATE && newType == ColumnType.TIME) ||
(oldType == ColumnType.TIME && newType == ColumnType.DATE)) return ColumnType.DATETIME;
return ColumnType.STRING;
}
/** 读取 Excel 单元格的值,带格式化 */
private static String getCellValue(Cell cell) {
if (cell == null) return "";
try {
switch (cell.getCellType()) {
case STRING:
return cell.getStringCellValue().trim();
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
Date date = cell.getDateCellValue();
ColumnType t = detectCellTypeFromDate(date);
switch (t) {
case TIME:
return new SimpleDateFormat("HH:mm:ss").format(date);
case DATE:
return new SimpleDateFormat("yyyy-MM-dd").format(date);
case DATETIME:
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(date);
default:
return date.toString();
}
} else {
double num = cell.getNumericCellValue();
if (num == Math.floor(num)) return String.valueOf((long) num);
return String.valueOf(num);
}
case BOOLEAN:
return String.valueOf(cell.getBooleanCellValue());
case FORMULA:
try {
if (DateUtil.isCellDateFormatted(cell)) {
Date date = cell.getDateCellValue();
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(date);
}
double num = cell.getNumericCellValue();
if (num == Math.floor(num)) return String.valueOf((long) num);
return String.valueOf(num);
} catch (IllegalStateException e) {
return cell.getStringCellValue();
}
case BLANK:
return "";
default:
return cell.toString();
}
} catch (Exception e) {
log.warn("⚠️ getCellValue 解析异常: {}", e.getMessage());
return "";
}
}
}

View File

@ -1,85 +0,0 @@
package com.bjtds.brichat.util;
import org.apache.poi.ss.usermodel.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.*;
public class ExcelReader {
public static List<Map<String, String>> readExcel(MultipartFile file , int sheetIndex) throws IOException {
List<Map<String, String>> data = new ArrayList<>();
try (InputStream fis = file.getInputStream();
Workbook workbook = WorkbookFactory.create(fis)) {
int sheetCount = workbook.getNumberOfSheets();
if(sheetIndex >= sheetCount||sheetIndex<0) {
throw new IllegalArgumentException("Sheet索引超出范围");
}
Sheet sheet = workbook.getSheetAt(sheetIndex);
Row headerRow = sheet.getRow(0);
if (headerRow == null) {
return data;
}
List<String> headers = new ArrayList<>();
for (Cell cell : headerRow) {
headers.add(getCellValue(cell));
}
for (int i = 1; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
if (row == null) continue;
Map<String, String> rowData = new LinkedHashMap<>();
for (int j = 0; j < headers.size(); j++) {
Cell cell = row.getCell(j);
String value = getCellValue(cell);
rowData.put(headers.get(j), value);
}
data.add(rowData);
}
}
return data;
}
private static String getCellValue(Cell cell) {
if (cell == null) return "";
switch (cell.getCellType()) {
case STRING:
return cell.getStringCellValue().trim();
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
return new SimpleDateFormat("yyyy-MM-dd").format(cell.getDateCellValue());
} else {
double num = cell.getNumericCellValue();
if (num == Math.floor(num)) {
return String.valueOf((long) num); // 转成整数
} else {
return String.valueOf(num);
}
}
case BOOLEAN:
return String.valueOf(cell.getBooleanCellValue());
case FORMULA:
try {
double num = cell.getNumericCellValue();
if (num == Math.floor(num)) {
return String.valueOf((long) num);
} else {
return String.valueOf(num);
}
} catch (IllegalStateException e) {
return cell.getStringCellValue();
}
case BLANK:
return "";
default:
return cell.toString();
}
}
}

View File

@ -0,0 +1,37 @@
package com.bjtds.brichat.util;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
public class PinyinUtil {
private static final HanyuPinyinOutputFormat FORMAT = new HanyuPinyinOutputFormat();
static {
FORMAT.setCaseType(HanyuPinyinCaseType.LOWERCASE);
FORMAT.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
}
public static String toPinyin(String text) {
StringBuilder sb = new StringBuilder();
for (char c : text.toCharArray()) {
if (Character.toString(c).matches("[\\u4E00-\\u9FA5]")) { // 中文
try {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, FORMAT);
if (pinyinArray != null && pinyinArray.length > 0) {
sb.append(pinyinArray[0]);
}
} catch (Exception e) {
sb.append("_");
}
} else if (Character.isLetterOrDigit(c)) {
sb.append(c);
} else {
sb.append("_");
}
}
return sb.toString();
}
}

View File

@ -0,0 +1,237 @@
package com.bjtds.brichat.util;
import com.bjtds.brichat.entity.exceltomysql.ColumData;
import com.bjtds.brichat.mapper.opengauss.ExcelToMysqlMapper;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
import java.util.*;
/**
* Excel SAX 解析处理类
* - 支持跳过空单元格
* - 自动生成空表头列名unnamed_col_x
* - 跳过完全空行
* - 批量插入 MySQL
*/
@Slf4j
public class SheetHandler extends DefaultHandler {
private final ReadOnlySharedStringsTable strings;
private final ExcelToMysqlMapper mapper;
private final String tableName;
private final List<Map<String, String>> batch = new ArrayList<>();
private final List<String> headers = new ArrayList<>();
private final Map<Integer, String> cellMap = new LinkedHashMap<>();
private boolean nextIsString;
private String lastContents = "";
private int rowNum = 0;
private int currentColIndex = -1;
private static final int BATCH_SIZE = 500;
public SheetHandler(ReadOnlySharedStringsTable strings, String tableName, ExcelToMysqlMapper mapper) {
this.strings = strings;
this.tableName = tableName;
this.mapper = mapper;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
if ("row".equals(qName)) {
cellMap.clear();
currentColIndex = -1;
} else if ("c".equals(qName)) {
String cellRef = attributes.getValue("r");
currentColIndex = convertColRefToIndex(cellRef);
String cellType = attributes.getValue("t");
nextIsString = "s".equals(cellType);
}
lastContents = "";
}
@Override
public void characters(char[] ch, int start, int length) {
lastContents += new String(ch, start, length);
}
@Override
public void endElement(String uri, String localName, String qName) {
if ("v".equals(qName)) {
String value = lastContents.trim();
if (nextIsString && !value.isEmpty()) {
try {
int idx = Integer.parseInt(value);
value = strings.getItemAt(idx).toString();
} catch (Exception ignored) {
}
nextIsString = false;
}
cellMap.put(currentColIndex, value);
} else if ("row".equals(qName)) {
if (rowNum == 0) {
// 第一行读取表头支持空列自动命名
headers.clear();
int maxIndex = cellMap.keySet().stream().max(Integer::compareTo).orElse(-1);
for (int i = 0; i <= maxIndex; i++) {
String header = cellMap.getOrDefault(i, "").trim();
if (header.isEmpty()) {
header = "unnamed_col_" + (i + 1);
log.warn("⚠️ 检测到空列名,第 {} 列自动命名为 {}", i + 1, header);
}
headers.add(header);
}
log.info("🧭 读取表头(共 {} 列):{}", headers.size(), headers);
} else {
if (headers.isEmpty()) return;
Map<String, String> rowMap = new LinkedHashMap<>();
for (int i = 0; i < headers.size(); i++) {
String key = headers.get(i);
if (key == null || key.trim().isEmpty()) continue; // 防护理论上 headers 已经处理了空列
String val = cellMap.getOrDefault(i, "").trim();
// 空值写入数据库为 null
rowMap.put(key, val.isEmpty() ? null : val);
}
// 跳过完全空行
boolean allNull = rowMap.values().stream().allMatch(Objects::isNull);
if (!allNull) {
batch.add(rowMap);
if (batch.size() >= BATCH_SIZE) {
flushBatch();
}
}
}
rowNum++;
}
}
/** 批量写入数据库 */
public void flushBatch() {
if (batch.isEmpty()) return;
for (Map<String, String> row : batch) {
List<ColumData> dataList = new ArrayList<>();
for (Map.Entry<String, String> entry : row.entrySet()) {
String colName = PinyinUtil.toPinyin(entry.getKey()).toLowerCase();
String colValue = entry.getValue();
// 针对日期/时间字段进行智能转换
if (colValue != null && isDateLikeColumn(colName)) {
colValue = convertExcelDateSmart(colValue);
}
ColumData data = new ColumData();
data.setColumName(colName);
data.setColumValue(colValue);
dataList.add(data);
}
if (!dataList.isEmpty()) {
try {
mapper.insertRow(tableName, dataList);
} catch (Exception e) {
log.error("❌ 插入行失败 [{}]: {} | 数据: {}", tableName, e.getMessage(), row);
}
}
}
log.info("✅ 已插入 {} 行数据到表 [{}]", batch.size(), tableName);
batch.clear();
}
/**
* 判断列名是否为日期/时间类型
*/
private boolean isDateLikeColumn(String colName) {
return colName.contains("date")
|| colName.contains("time")
|| colName.contains("day")
|| colName.contains("created")
|| colName.contains("updated");
}
/**
* 智能解析 Excel 的日期数值
* - 整数部分表示日期
* - 小数部分表示时间
* - 自动输出合适格式日期时间日期时间
*/
/** 智能解析 Excel 日期、时间、日期+时间 */
private String convertExcelDateSmart(String excelVal) {
try {
// 如果是纯数字Excel 内部日期或时间
if (excelVal.matches("\\d+(\\.\\d+)?")) {
double numeric = Double.parseDouble(excelVal);
// Excel 基准日期1899-12-30
Calendar base = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
base.set(1899, Calendar.DECEMBER, 30, 0, 0, 0);
base.set(Calendar.MILLISECOND, 0);
// 分离整数和小数部分
int days = (int) Math.floor(numeric);
double fraction = numeric - days;
// 加上天数
base.add(Calendar.DATE, days);
// 小数部分表示时间
int totalSeconds = (int) Math.round(fraction * 24 * 60 * 60);
int hours = totalSeconds / 3600;
int minutes = (totalSeconds % 3600) / 60;
int seconds = totalSeconds % 60;
if (fraction > 0) {
base.add(Calendar.HOUR_OF_DAY, hours);
base.add(Calendar.MINUTE, minutes);
base.add(Calendar.SECOND, seconds);
}
// 根据类型返回不同格式
if (numeric < 1) {
// 纯时间 自动补上默认日期
return String.format("1970-01-01 %02d:%02d:%02d", hours, minutes, seconds);
} else if (fraction == 0) {
// 纯日期
return String.format("%1$tY-%1$tm-%1$td", base);
} else {
// 日期 + 时间
return String.format("%1$tY-%1$tm-%1$td %1$tH:%1$tM:%1$tS", base);
}
}
// 如果已经是字符串格式例如 "2024/10/15 08:00"直接返回
return excelVal;
} catch (NumberFormatException e) {
return excelVal;
} catch (Exception e) {
log.warn("⚠️ 日期转换失败 [{}]: {}", excelVal, e.getMessage());
return excelVal;
}
}
/** 列字母 -> 索引 */
private int convertColRefToIndex(String cellRef) {
if (cellRef == null || cellRef.isEmpty()) return -1;
String colRef = cellRef.replaceAll("\\d", "");
int index = 0;
for (char c : colRef.toUpperCase().toCharArray()) {
index = index * 26 + (c - 'A' + 1);
}
return index - 1;
}
}