easyexcel plus init

This commit is contained in:
jipengfei-jpf 2024-11-20 12:29:50 +08:00
parent bb74116b89
commit f7d8ceb246
628 changed files with 46172 additions and 2 deletions

15
.editorconfig Normal file
View File

@ -0,0 +1,15 @@
root = true
[*.{groovy, java, kt, xml}]
#缩进风格:空格
indent_style = space
#缩进大小
indent_size = 4
#换行符lf
end_of_line = lf
#字符集utf-8
charset = utf-8
#是否删除行尾的空格
trim_trailing_whitespace = true
#是否在文件的最后插入一个空行
insert_final_newline = true

View File

@ -0,0 +1,9 @@
# 建议先去看文档
[快速开始](https://easyexcel.opensource.alibaba.com/docs/current/) 、[常见问题](https://easyexcel.opensource.alibaba.com/qa/)
# 异常代码
```java
这里写你的代码
```
# 异常提示
大家尽量把问题一次性描述清楚,然后贴上全部异常,这样方便把问题一次性解决掉。
# 其他描述

20
.github/ISSUE_TEMPLATE/bug.md vendored Normal file
View File

@ -0,0 +1,20 @@
---
name: bug
about: 发现一个新的Bug
title: ''
labels: bug
assignees: zhuangjiaju
---
# 建议先去看文档
[快速开始](https://easyexcel.opensource.alibaba.com/docs/current/) 、[常见问题](https://easyexcel.opensource.alibaba.com/qa/)
# 触发场景描述
# 触发Bug的代码
```java
这里写代码
```
# 提示的异常或者没有达到的效果
大家尽量把问题一次性描述清楚,然后贴上全部异常,这样方便把问题一次性解决掉。
至少大家要符合一个原则就是,能让其他人复现出这个问题,如果无法复现,肯定无法解决。

19
.github/ISSUE_TEMPLATE/question.md vendored Normal file
View File

@ -0,0 +1,19 @@
---
name: question
about: 有使用疑问,请先阅读“快速开始”,上面无法解决再提交问题。处理完请关闭问题。
title: ''
labels: help wanted
assignees: ''
---
# 建议先去看文档
[快速开始](https://easyexcel.opensource.alibaba.com/docs/current/) 、[常见问题](https://easyexcel.opensource.alibaba.com/qa/)
# 异常代码
```java
这里写你的代码
```
# 异常提示
大家尽量把问题一次性描述清楚,然后贴上全部异常,这样方便把问题一次性解决掉。
至少大家要符合一个原则就是,能让其他人复现出这个问题,如果无法复现,肯定无法解决。
# 问题描述

12
.github/ISSUE_TEMPLATE/suggest.md vendored Normal file
View File

@ -0,0 +1,12 @@
---
name: suggest
about: 给出一些建议的能
title: ''
labels: suggest
assignees: ''
---
# 建议先去看文档
[快速开始](https://easyexcel.opensource.alibaba.com/docs/current/) 、[常见问题](https://easyexcel.opensource.alibaba.com/qa/)
# 建议描述

View File

@ -0,0 +1,10 @@
## 关联的ISSUE
TODO 请说明关联的异常
## 问题描述&解决
TODO 描述下出了什么问题必要的话提供excel。在PR合并前审核同学一定会复现问题然后再PR合并的。
## 代码注意点
* 确认已经安装了阿里巴巴代码规约插件:[alibaba-java-coding-guidelines ](https://plugins.jetbrains.com/plugin/10046-alibaba-java-coding-guidelines)
* src/main 下面不要出现中文注释
* src/test/core && src/test/demo 下面要新增测试案例请严格参照其他的,包括注释等等什么都不要错
* 如果只是自己测试 全部加入到 src/test/temp
* 如果改动量较大5+文件)尽量先和作者沟通,这个风险很大

57
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,57 @@
#
# Copyright 2009-2021 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: Java CI
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
java: [ 8, 11, 17, 21]
distribution: [ 'adopt' ]
fail-fast: false
max-parallel: 4
name: Test JDK ${{ matrix.java }}
steps:
- uses: actions/checkout@main
- name: Set up JDK
uses: actions/setup-java@main
with:
java-version: ${{ matrix.java }}
distribution: ${{ matrix.distribution }}
- name: Cache local Maven repository
uses: actions/cache@main
with:
path: ~/.m2/repository
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-m2
- name: Chmod
run: chmod +x mvnw
- name: Test with Maven
if: ${{ matrix.java == '8' }}
run: ./mvnw test -B -Dmaven.test.skip=false
- name: Test with Maven
if: ${{ matrix.java != '8' }}
run: ./mvnw test -B -Dmaven.test.skip=false -DargLine="--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/sun.reflect.annotation=ALL-UNNAMED"
- name: Maven Build
run: ./mvnw install -B -V
- name: Java Doc
run: ./mvnw javadoc:javadoc

55
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,55 @@
#
# Copyright 2009-2021 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: Publish package to the Maven Central Repository
on:
release:
types: [created]
jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Check out Git repository
uses: actions/checkout@main
- name: Install Java and Maven
uses: actions/setup-java@main
with:
java-version: 8
distribution: 'adopt'
server-id: ossrh
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
- name: Cache local Maven repository
uses: actions/cache@main
with:
path: ~/.m2/repository
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-m2
- id: install-secret-key
name: Install GPG secret key
run: |
cat <(echo -e "${{ secrets.GPG_PRIVATE_KEY }}") | gpg --batch --import
- name: Publish package
run: |
mvn --batch-mode -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} clean deploy -Dmaven.test.skip=true -Dmaven.javadoc.skip=false -Dgpg.skip=false
env:
MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }}
MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }}

26
.github/workflows/sync2gitee.yml vendored Normal file
View File

@ -0,0 +1,26 @@
# 通过 Github action 在仓库的每一次 commit 后自动同步到 Gitee 上
name: Mirror the Github organization repos to Gitee
on: [push]
jobs:
repo-sync:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@main
with:
persist-credentials: false
- name: Mirror the Github organization repos to Gitee.
uses: Yikun/hub-mirror-action@master
with:
# 必选,需要同步的 Github 这里记住选择的是仓库 或者账号 而不是具体的项目
src: github/alibaba
# 必选,需要同步到的 Gitee 这里记住选择的是仓库 或者账号 而不是具体的项目
dst: gitee/easyexcel
# 必选Gitee公钥对应的私钥https://gitee.com/profile/sshkeys
dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}
# 必选Gitee对应的用于创建仓库的tokenhttps://gitee.com/profile/personal_access_tokens
dst_token: ${{ secrets.GITEE_TOKEN }}
# 如果是组织,指定组织即可,默认为用户 user
account_type: org
# 需要同步的仓库里面的项目
static_list: "easyexcel"

17
.gitignore vendored Normal file
View File

@ -0,0 +1,17 @@
.idea/
*/.settings/
*.idea
.DS_Store
**/.iml*
*.iml
**/.class
**/.classpath
**/.project
*/target/
target/
*.ipr
*.iws
antx.properties
output/
.flattened-pom.xml
dependency-reduced-pom.xml

BIN
.mvn/wrapper/maven-wrapper.jar vendored Normal file

Binary file not shown.

18
.mvn/wrapper/maven-wrapper.properties vendored Normal file
View File

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.0/maven-wrapper-3.1.0.jar

9
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,9 @@
## 前言
非常感谢您愿意协助EasyExcel的开发EasyExcel成长离不开大家的贡献。但是为了合作的更有效率希望我们在贡献代码的时候能按照如下约定。
## 提前沟通
尽量把自己的想法和实现思路提前沟通可以通过issue,钉钉,QQ都可以可能很多问题我们内部已经讨论过由于各种原因后续不会支持但是您这边又开发好了这样容易浪费您的时间。
## 代码规范
请先安装阿里巴巴代码规约插件https://plugins.jetbrains.com/plugin/10046-alibaba-java-coding-guidelines
目前代码规范已经集成了自动校验然后源代码尽量不要有中文注释。在新增功能的时候尽量注意补充junit。core代表每次travis-ci都会跑的测试案例然后demo用于对外看到temp里面随便写。
## 提交分支
建议提交到最新的版本号.x上面比如 3.x之类的版本为了方便其他同学阅读源代码所以目前的思路是master和maven center的最新版本代码保持一致然后您提交过来的代码我们可能会稍微做一些修改。所以提交到开发分支会比较好。fork也可以直接fork该分支。

201
LICENSE Normal file
View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,2 +0,0 @@
# easyexcel-plus
easyexcel作者最新升级版本 快速、简洁、解决大文件内存溢出的java处理Excel工具

157
README_EN.md Normal file
View File

@ -0,0 +1,157 @@
EasyExcel
======================
[![Build Status](https://github.com/alibaba/easyexcel/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/alibaba/easyexcel/actions/workflows/ci.yml?query=branch%3Amaster)
[![Maven central](https://maven-badges.herokuapp.com/maven-central/com.alibaba/easyexcel/badge.svg)](https://maven-badges.herokuapp.com/maven-central/com.alibaba/easyexcel)
[![License](http://img.shields.io/:license-apache-brightgreen.svg)](http://www.apache.org/licenses/LICENSE-2.0.html)
[![](https://img.shields.io/badge/EasyExcel-Check%20Your%20Contribution-orange)](https://opensource.alibaba.com/contribution_leaderboard/details?projectValue=easyexcel)
[Communication Group 1 in QQ(Full): 662022184](https://jq.qq.com/?_wv=1027&k=1T21jJxh)
[Communication Group 2 in QQ(Full): 1097936804](https://jq.qq.com/?_wv=1027&k=j5zEy6Xl)
[Communication Group 3 in QQ(Full): 453928496](https://qm.qq.com/cgi-bin/qm/qr?k=e2ULsA5A0GldhV2CXJ8sIbAyu9I6qqs7&jump_from=webapi)
[Communication Group 4 in QQ: 496594404](https://qm.qq.com/cgi-bin/qm/qr?k=e_aVG1Q7gi0PJUBkbrUGAgbeO3kUEInK&jump_from=webapi)
[Communication Group 1 in DingTalkFull: 21960511](https://qr.dingtalk.com/action/joingroup?code=v1,k1,cchz6k12ci9B08NNqhNRFGXocNVHrZtW0kaOtTKg/Rk=&_dt_no_comment=1&origin=11)
[Communication Group 2 in DingTalkFull: 32796397](https://qr.dingtalk.com/action/joingroup?code=v1,k1,jyU9GtEuNU5S0QTyklqYcYJ8qDZtUuTPMM7uPZTS8Hs=&_dt_no_comment=1&origin=11)
[Communication Group 3 in DingTalkFull: 33797247](https://qr.dingtalk.com/action/joingroup?code=v1,k1,3UGlEScTGQaHpW2cIRo+gkxJ9EVZ5fz26M6nW3uFP30=&_dt_no_comment=1&origin=11)
[Communication Group 4 in DingTalkFull: 33491624](https://qr.dingtalk.com/action/joingroup?code=v1,k1,V14Pb65Too70rQkEaJ9ohb6lZBZbtp6jIL/q9EWh9vA=&_dt_no_comment=1&origin=11)
[Communication Group 5 in DingTalk: 32134498](https://h5.dingtalk.com/circle/healthCheckin.html?dtaction=os&corpId=dingb9fa1325d9dccc3ecac589edd02f1650&5233a=71a83&cbdbhh=qwertyuiop)
[Official Website: https://yuque.com/easyexcel](https://www.yuque.com/easyexcel/doc/easyexcel)
[FAQ](https://www.yuque.com/easyexcel/faq)
#### It is recommended to join a DingTalk group
# EasyExcel, a java toolkit for parsing Excel easily
There are several java frameworks or toolkit which can parse and generate Excel, such as Apache POI or jxl. But they all have some difficulties to handle problems like excessive memory usage. Apache POI framework has a set of SAX mode API can fix some memory overflow problems at some extent, but it still has some flaws. For example, the unzipping and the storage of the unzipping of Excel file in version 07 are done in memory, so the memory consumption is still very high. The EasyExcel toolkit rewrites the logic of POI for parsing Excel version 07. One 3 megabytes Excel file parsed with POI still requires about 100M memory, which can be reduced to a few megabyte by using EasyExcel instead. And yes, there is no memory overflow for even larger excel with EasyExcel. EasyExcel version 03 depends on POI SAX model and does model transformation/encapsulation in the upper layer to make it simpler and more convenient for users.
## Using EasyExcel version 3.0.2+, a machine with 64M RAM can read a 75 megabyte Excel file containing 460,000 rows and 25 columns in 20 seconds
Of course, there is also a very fast mode can be faster, but the memory consumption will be a little more than 100M
![img](img/readme/large.png)
## Version support
* EasyExcel version 2+ works on Java7 or Java6
* EasyExcel version 3+ works on Java8 or java8+
### About version upgrade
* It is not recommended upgrading across major versions, especially across 2 major versions.
* There are some incompatibilities in upgrading from version 2+ to version 3+.
* Using a custom interceptor to modify the style can cause problems, even if it does not compile with errors.
* When reading the Excel file, the `invoke` function will throw an exception, there will not be an additional layer of `ExcelAnalysisException` wrapped here, and it will not compile with errors.
* Style and other annotations involving `boolean` or some enumeration values have been changed, adding the default value. The compiler will report an error, just change the annotation.
* It is recommended to re-test the relevant functions after upgrading across major versions.
### Latest Version
```xml
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.2</version>
</dependency>
```
## Advertising space
### Alibaba New Retail Business Department Recruitment
Alibaba New Retail Business Department sincerely recruit JAVA senior development, technical experts. If you are interested, you can contact us by WeChat, or send your Resume to my email jipengfei.jpf@alibaba-inc.com.
### EasyExcel personnel recruitment
Anyone who wants to participate in this project can apply, mainly responsible for answering questions in the communication group and dealing with the issues. Of course, you can also do some PR.
Since there is no material reward for participating in the open source project, and the current maintainers are also maintaining the project in their spare time. So people who want to join this project need to be persistent, not just on a whim.
The requirements are as follows:
* There are certain java coding skills & good coding habits
* Understand the read&write principles of EasyExcel
* Has passion for open source projects
* Be able to do things consistently for a long time
* Your job is not so busy
## Related Documents
* [Quick Start](https://www.yuque.com/easyexcel/doc/easyexcel)
* [About Us](/abouteasyexcel.md)
* [Update Notes](/update.md)
* [Code Contribution](https://www.yuque.com/easyexcel/doc/contribute)
## Maintainers
姬朋飞(玉霄)、庄家钜、怀宇
## Quick Start
### Read Excel File
DEMO[https://github.com/alibaba/easyexcel/blob/master/src/test/java/com/alibaba/easyexcel/demo/read/ReadTest.java](/src/test/java/com/alibaba/easyexcel/test/demo/read/ReadTest.java)
```java
/**
* The easiest way to read Excel file using EasyExcel toolkit
*
* <p>
* 1. Create an entity object, such as {@link DemoData}, each property of the entity object corresponds to a specific field in any row of Excel.
* 2. When reading each row of an Excel file, create a callback listener for the corresponding row. Refer to{@link DemoDataListener}
* 3. Invoke the read function
* </p>
*/
@Test
public void simpleRead() {
String fileName = TestFileUtil.getPath() + "demo" + File.separator + "demo.xlsx";
// Specify which entity object class to use to read the Excel content. The file stream will close automatically after reading the first sheet of Excel.
EasyExcel.read(fileName, DemoData.class, new DemoDataListener()).sheet().doRead();
}
```
### Write Excel File
DEMO[https://github.com/alibaba/easyexcel/blob/master/src/test/java/com/alibaba/easyexcel/test/demo/write/WriteTest.java](/src/test/java/com/alibaba/easyexcel/test/demo/write/WriteTest.java)
```java
/**
* The easiest way to write Excel file using EasyExcel toolkit
*
* <p>
* 1. Create an entity object, refer to{@link write.demo.plus.easyexcel.test.DemoData}.
* Each property of the entity object corresponds to a specific field of Excel
* 2. Invoke write function
* </p>
*/
@Test
public void simpleWrite() {
String fileName = TestFileUtil.getPath() + "write" + System.currentTimeMillis() + ".xlsx";
// Specify which entity object class to use to write Excel, it will write to the first sheet of Excel with the name template. Then the file stream will be closed automatically.
// With version 03, just pass in the excelType parameter
EasyExcel.write(fileName, DemoData.class).sheet("template").doWrite(data());
}
```
### File Uploading&Downloading
DEMO[https://github.com/alibaba/easyexcel/blob/master/src/test/java/com/alibaba/easyexcel/test/demo/web/WebTest.java](/src/test/java/com/alibaba/easyexcel/test/demo/web/WebTest.java)
```java
/**
* File downloading
*
* Note: returns an Excel with partial data if it fails
*
* <p>
* 1. Create an entity object, refer to{@link DownloadData}.
* Each property of the entity object corresponds to a specific field of Excel
* 2. Specify the returned properties
* 3. Invoke write function, then the OutputStream is automatically closed when it ends.
* </p>
*/
@GetMapping("download")
public void download(HttpServletResponse response) throws IOException {
// Using swagger may cause some problems, please use your browser directly or use postman to invoke this
response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
response.setCharacterEncoding("utf-8");
// URLEncoder.encode function can prevent Chinese garbled code
String fileName = URLEncoder.encode("test", "UTF-8").replaceAll("\\+", "%20");
response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
EasyExcel.write(response.getOutputStream(), DownloadData.class).sheet("template").doWrite(data());
}
/**
* File uploading
*
* <p>
* 1. Create an entity object, refer to{@link UploadData}
* Each property of the entity object corresponds to a specific field of Excel
* 2. When reading each row of an Excel file, create a callback listener for the corresponding row. Refer to{@link UploadDataListener}
* 3. Invoke read function
* </p>
*/
@PostMapping("upload")
@ResponseBody
public String upload(MultipartFile file) throws IOException {
EasyExcel.read(file.getInputStream(), UploadData.class, new UploadDataListener(uploadDAO)).sheet().doRead();
return "success";
}
```
### Contact Us
If you have any questions, Alibaba colleagues can find me in DingTalk, and others can leave messages here. All related questions are well welcomed.

65
docs/API.md Normal file
View File

@ -0,0 +1,65 @@
# 详细参数介绍
## 关于常见类解析
* EasyExcel 入口类,用于构建开始各种操作
* ExcelReaderBuilder ExcelWriterBuilder 构建出一个 ReadWorkbook WriteWorkbook可以理解成一个excel对象一个excel只要构建一个
* ExcelReaderSheetBuilder ExcelWriterSheetBuilder 构建出一个 ReadSheet WriteSheet对象可以理解成excel里面的一页,每一页都要构建一个
* ReadListener 在每一行读取完毕后都会调用ReadListener来处理数据
* WriteHandler 在每一个操作包括创建单元格、创建表格等都会调用WriteHandler来处理数据
* 所有配置都是继承的Workbook的配置会被Sheet继承所以在用EasyExcel设置参数的时候在EasyExcel...sheet()方法之前作用域是整个sheet,之后针对单个sheet
## 读
### 注解
* `ExcelProperty` 指定当前字段对应excel中的那一列。可以根据名字或者Index去匹配。当然也可以不写默认第一个字段就是index=0以此类推。千万注意要么全部不写要么全部用index要么全部用名字去匹配。千万别三个混着用除非你非常了解源代码中三个混着用怎么去排序的。
* `ExcelIgnore` 默认所有字段都会和excel去匹配加了这个注解会忽略该字段
* `DateTimeFormat` 日期转换,用`String`去接收excel日期格式的数据会调用这个注解。里面的`value`参照`java.text.SimpleDateFormat`
* `NumberFormat` 数字转换,用`String`去接收excel数字格式的数据会调用这个注解。里面的`value`参照`java.text.DecimalFormat`
* `ExcelIgnoreUnannotated` 默认不加`ExcelProperty` 的注解的都会参与读写,加了不会参与
### 参数
#### 通用参数
`ReadWorkbook`,`ReadSheet` 都会有的参数,如果为空,默认使用上级。
* `converter` 转换器,默认加载了很多转换器。也可以自定义。
* `readListener` 监听器,在读取数据的过程中会不断的调用监听器。
* `headRowNumber` 需要读的表格有几行头数据。默认有一行头,也就是认为第二行开始起为数据。
* `head``clazz`二选一。读取文件头对应的列表会根据列表匹配数据建议使用class。
* `clazz``head`二选一。读取文件的头对应的class也可以使用注解。如果两个都不指定则会读取全部数据。
* `autoTrim` 字符串、表头等数据自动trim
* `password` 读的时候是否需要使用密码
#### ReadWorkbook理解成excel对象参数
* `excelType` 当前excel的类型 默认会自动判断
* `inputStream``file`二选一。读取文件的流,如果接收到的是流就只用,不用流建议使用`file`参数。因为使用了`inputStream` easyexcel会帮忙创建临时文件最终还是`file`
* `file``inputStream`二选一。读取文件的文件。
* `autoCloseStream` 自动关闭流。
* `readCache` 默认小于5M用 内存超过5M会使用 `EhCache`,这里不建议使用这个参数。
#### ReadSheet就是excel的一个Sheet参数
* `sheetNo` 需要读取Sheet的编码建议使用这个来指定读取哪个Sheet
* `sheetName` 根据名字去匹配Sheet,excel 2003不支持根据名字去匹配
## 写
### 注解
* `ExcelProperty` index 指定写到第几列,默认根据成员变量排序。`value`指定写入的名称,默认成员变量的名字,多个`value`可以参照快速开始中的复杂头
* `ExcelIgnore` 默认所有字段都会写入excel这个注解会忽略这个字段
* `DateTimeFormat` 日期转换,将`Date`写到excel会调用这个注解。里面的`value`参照`java.text.SimpleDateFormat`
* `NumberFormat` 数字转换,用`Number`写excel会调用这个注解。里面的`value`参照`java.text.DecimalFormat`
* `ExcelIgnoreUnannotated` 默认不加`ExcelProperty` 的注解的都会参与读写,加了不会参与
### 参数
#### 通用参数
`WriteWorkbook`,`WriteSheet` ,`WriteTable`都会有的参数,如果为空,默认使用上级。
* `converter` 转换器,默认加载了很多转换器。也可以自定义。
* `writeHandler` 写的处理器。可以实现`WorkbookWriteHandler`,`SheetWriteHandler`,`RowWriteHandler`,`CellWriteHandler`在写入excel的不同阶段会调用
* `relativeHeadRowIndex` 距离多少行后开始。也就是开头空几行
* `needHead` 是否导出头
* `head``clazz`二选一。写入文件的头列表建议使用class。
* `clazz``head`二选一。写入文件的头对应的class也可以使用注解。
* `autoTrim` 字符串、表头等数据自动trim
#### WriteWorkbook理解成excel对象参数
* `excelType` 当前excel的类型 默认`xlsx`
* `outputStream``file`二选一。写入文件的流
* `file``outputStream`二选一。写入的文件
* `templateInputStream` 模板的文件流
* `templateFile` 模板文件
* `autoCloseStream` 自动关闭流。
* `password` 写的时候是否需要使用密码
* `useDefaultStyle` 写的时候是否是使用默认头
#### WriteSheet就是excel的一个Sheet参数
* `sheetNo` 需要写入的编码。默认0
* `sheetName` 需要些的Sheet名称默认同`sheetNo`
#### WriteTable就把excel的一个Sheet,一块区域看一个table参数
* `tableNo` 需要写入的编码。默认0

26
docs/LARGEREAD.md Normal file
View File

@ -0,0 +1,26 @@
# 10M以上文件读取说明
03版没有办法处理相对内存占用大很多。excel 07版本有个共享字符串[共享字符串](https://docs.microsoft.com/zh-cn/office/open-xml/working-with-the-shared-string-table)的概念这个会非常占用内存如果全部读取到内存的话大概是excel文件的大小的3-10倍所以easyexcel用存储文件的然后再反序列化去读取的策略来节约内存。当然需要通过文件反序列化以后效率会降低大概降低30-50%不一定也看命中率可能会超过100%
## 如果对读取效率感觉还能接受就用默认的永久占用单个excel读取整个过程一般不会超过50M(大概率就30M)剩下临时的GC会很快回收
## 默认大文件处理
默认大文件处理会自动判断共享字符串5M以下会使用内存存储大概占用15-50M的内存,超过5M则使用文件存储然后文件存储也要设置多内存M用来存放临时的共享字符串默认20M。除了共享字符串占用内存外其他占用较少所以可以预估10M所以默认大概30M就能读取一个超级大的文件。
## 根据实际需求配置内存
想自定义设置首先要确定你大概愿意花多少内存来读取一个超级大的excel,比如希望读取excel最多占用100M内存是读取过程中永久占用新生代马上回收的不算那就设置使用文件来存储共享字符串的大小判断为20M(小于20M存内存大于存临时文件)然后设置文件存储时临时共享字符串占用内存大小90M差不多
### 如果最大文件条数也就十几二十万然后excel也就是十几二十M而且不会有很高的并发并且内存也较大
```java
// 强制使用内存存储这样大概一个20M的excel使用150M很多临时对象所以100M会一直GC的内存
// 这样效率会比上面的复杂的策略高很多
// 这里再说明下 就是加了个readCache(new MapCache()) 参数而已其他的参照其他demo写 这里没有写全
EasyExcel.read().readCache(new MapCache());
```
### 对并发要求较高,而且都是经常有超级大文件
```java
// 第一个参数的意思是 多少M共享字符串以后 采用文件存储 单位MB 默认5M
// 第二个参数 文件存储时内存存放多少M缓存数据 默认20M
// 比如 你希望用100M内存(这里说的是解析过程中的永久占用,临时对象不算)来解析excel前面算过了 大概是 20M+90M 所以设置参数为:20 和 90
// 这里再说明下 就是加了个readCacheSelector(new SimpleReadCacheSelector(5, 20))参数而已其他的参照其他demo写 这里没有写全
EasyExcel.read().readCacheSelector(new SimpleReadCacheSelector(5, 20));
```
### 关于maxCacheActivateSize 也就是前面第二个参数的详细说明
easyexcel在使用文件存储的时候会把共享字符串拆分成1000条一批然后放到文件存储。然后excel来读取共享字符串大概率是按照顺序的所以默认20M的1000条的数据放在内存命中后直接返回没命中去读文件。所以不能设置太小太小了很难命中一直去读取文件太大了的话会占用过多的内存。
### 如何判断 maxCacheActivateSize是否需要调整
开启debug日志会输出`Already put :4000000` 最后一次输出大概可以得出值为400W,然后看`Cache misses count:4001`得到值为4K400W/4K=1000 这代表已经`maxCacheActivateSize` 已经非常合理了。如果小于500 问题就非常大了500到1000 应该都还行。

View File

@ -0,0 +1,64 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ai.chat2db.excel</groupId>
<artifactId>easyexcel-plus-parent</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<url>https://github.com/CodePhiliaX/easyexcel-plus</url>
<packaging>jar</packaging>
<artifactId>easyexcel-plus-core</artifactId>
<name>easyexcel-plus-core</name>
<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.5</version>
</dependency>
<dependency>
<groupId>org.ehcache</groupId>
<artifactId>ehcache</artifactId>
<version>3.9.11</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.36</version>
</dependency>
<!-- provided -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.32</version>
</dependency>
<dependency>
<groupId>ai.chat2db.excel</groupId>
<artifactId>easyexcel-plus-support</artifactId>
<version>1.0.1-SNAPSHOT</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,8 @@
package ai.chat2db.excel;
/**
* This is actually {@link EasyExcelFactory}, and short names look better.
*
* @author jipengfei
*/
public class EasyExcel extends EasyExcelFactory {}

View File

@ -0,0 +1,356 @@
package ai.chat2db.excel;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import ai.chat2db.excel.read.builder.ExcelReaderBuilder;
import ai.chat2db.excel.read.builder.ExcelReaderSheetBuilder;
import ai.chat2db.excel.read.listener.ReadListener;
import ai.chat2db.excel.write.builder.ExcelWriterBuilder;
import ai.chat2db.excel.write.builder.ExcelWriterSheetBuilder;
import ai.chat2db.excel.write.builder.ExcelWriterTableBuilder;
/**
* Reader and writer factory class
*
* @author jipengfei
*/
public class EasyExcelFactory {
/**
* Build excel the write
*
* @return
*/
public static ExcelWriterBuilder write() {
return new ExcelWriterBuilder();
}
/**
* Build excel the write
*
* @param file File to write
* @return Excel writer builder
*/
public static ExcelWriterBuilder write(File file) {
return write(file, null);
}
/**
* Build excel the write
*
* @param file File to write
* @param head Annotate the class for configuration information
* @return Excel writer builder
*/
public static ExcelWriterBuilder write(File file, Class head) {
ExcelWriterBuilder excelWriterBuilder = new ExcelWriterBuilder();
excelWriterBuilder.file(file);
if (head != null) {
excelWriterBuilder.head(head);
}
return excelWriterBuilder;
}
/**
* Build excel the write
*
* @param pathName File path to write
* @return Excel writer builder
*/
public static ExcelWriterBuilder write(String pathName) {
return write(pathName, null);
}
/**
* Build excel the write
*
* @param pathName File path to write
* @param head Annotate the class for configuration information
* @return Excel writer builder
*/
public static ExcelWriterBuilder write(String pathName, Class head) {
ExcelWriterBuilder excelWriterBuilder = new ExcelWriterBuilder();
excelWriterBuilder.file(pathName);
if (head != null) {
excelWriterBuilder.head(head);
}
return excelWriterBuilder;
}
/**
* Build excel the write
*
* @param outputStream Output stream to write
* @return Excel writer builder
*/
public static ExcelWriterBuilder write(OutputStream outputStream) {
return write(outputStream, null);
}
/**
* Build excel the write
*
* @param outputStream Output stream to write
* @param head Annotate the class for configuration information.
* @return Excel writer builder
*/
public static ExcelWriterBuilder write(OutputStream outputStream, Class head) {
ExcelWriterBuilder excelWriterBuilder = new ExcelWriterBuilder();
excelWriterBuilder.file(outputStream);
if (head != null) {
excelWriterBuilder.head(head);
}
return excelWriterBuilder;
}
/**
* Build excel the <code>writerSheet</code>
*
* @return Excel sheet writer builder
*/
public static ExcelWriterSheetBuilder writerSheet() {
return writerSheet(null, null);
}
/**
* Build excel the <code>writerSheet</code>
*
* @param sheetNo Index of sheet,0 base.
* @return Excel sheet writer builder.
*/
public static ExcelWriterSheetBuilder writerSheet(Integer sheetNo) {
return writerSheet(sheetNo, null);
}
/**
* Build excel the 'writerSheet'
*
* @param sheetName The name of sheet.
* @return Excel sheet writer builder.
*/
public static ExcelWriterSheetBuilder writerSheet(String sheetName) {
return writerSheet(null, sheetName);
}
/**
* Build excel the 'writerSheet'
*
* @param sheetNo Index of sheet,0 base.
* @param sheetName The name of sheet.
* @return Excel sheet writer builder.
*/
public static ExcelWriterSheetBuilder writerSheet(Integer sheetNo, String sheetName) {
ExcelWriterSheetBuilder excelWriterSheetBuilder = new ExcelWriterSheetBuilder();
if (sheetNo != null) {
excelWriterSheetBuilder.sheetNo(sheetNo);
}
if (sheetName != null) {
excelWriterSheetBuilder.sheetName(sheetName);
}
return excelWriterSheetBuilder;
}
/**
* Build excel the <code>writerTable</code>
*
* @return Excel table writer builder.
*/
public static ExcelWriterTableBuilder writerTable() {
return writerTable(null);
}
/**
* Build excel the 'writerTable'
*
* @param tableNo Index of table,0 base.
* @return Excel table writer builder.
*/
public static ExcelWriterTableBuilder writerTable(Integer tableNo) {
ExcelWriterTableBuilder excelWriterTableBuilder = new ExcelWriterTableBuilder();
if (tableNo != null) {
excelWriterTableBuilder.tableNo(tableNo);
}
return excelWriterTableBuilder;
}
/**
* Build excel the read
*
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read() {
return new ExcelReaderBuilder();
}
/**
* Build excel the read
*
* @param file File to read.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(File file) {
return read(file, null, null);
}
/**
* Build excel the read
*
* @param file File to read.
* @param readListener Read listener.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(File file, ReadListener readListener) {
return read(file, null, readListener);
}
/**
* Build excel the read
*
* @param file File to read.
* @param head Annotate the class for configuration information.
* @param readListener Read listener.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(File file, Class head, ReadListener readListener) {
ExcelReaderBuilder excelReaderBuilder = new ExcelReaderBuilder();
excelReaderBuilder.file(file);
if (head != null) {
excelReaderBuilder.head(head);
}
if (readListener != null) {
excelReaderBuilder.registerReadListener(readListener);
}
return excelReaderBuilder;
}
/**
* Build excel the read
*
* @param pathName File path to read.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(String pathName) {
return read(pathName, null, null);
}
/**
* Build excel the read
*
* @param pathName File path to read.
* @param readListener Read listener.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(String pathName, ReadListener readListener) {
return read(pathName, null, readListener);
}
/**
* Build excel the read
*
* @param pathName File path to read.
* @param head Annotate the class for configuration information.
* @param readListener Read listener.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(String pathName, Class head, ReadListener readListener) {
ExcelReaderBuilder excelReaderBuilder = new ExcelReaderBuilder();
excelReaderBuilder.file(pathName);
if (head != null) {
excelReaderBuilder.head(head);
}
if (readListener != null) {
excelReaderBuilder.registerReadListener(readListener);
}
return excelReaderBuilder;
}
/**
* Build excel the read
*
* @param inputStream Input stream to read.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(InputStream inputStream) {
return read(inputStream, null, null);
}
/**
* Build excel the read
*
* @param inputStream Input stream to read.
* @param readListener Read listener.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(InputStream inputStream, ReadListener readListener) {
return read(inputStream, null, readListener);
}
/**
* Build excel the read
*
* @param inputStream Input stream to read.
* @param head Annotate the class for configuration information.
* @param readListener Read listener.
* @return Excel reader builder.
*/
public static ExcelReaderBuilder read(InputStream inputStream, Class head, ReadListener readListener) {
ExcelReaderBuilder excelReaderBuilder = new ExcelReaderBuilder();
excelReaderBuilder.file(inputStream);
if (head != null) {
excelReaderBuilder.head(head);
}
if (readListener != null) {
excelReaderBuilder.registerReadListener(readListener);
}
return excelReaderBuilder;
}
/**
* Build excel the 'readSheet'
*
* @return Excel sheet reader builder.
*/
public static ExcelReaderSheetBuilder readSheet() {
return readSheet(null, null);
}
/**
* Build excel the 'readSheet'
*
* @param sheetNo Index of sheet,0 base.
* @return Excel sheet reader builder.
*/
public static ExcelReaderSheetBuilder readSheet(Integer sheetNo) {
return readSheet(sheetNo, null);
}
/**
* Build excel the 'readSheet'
*
* @param sheetName The name of sheet.
* @return Excel sheet reader builder.
*/
public static ExcelReaderSheetBuilder readSheet(String sheetName) {
return readSheet(null, sheetName);
}
/**
* Build excel the 'readSheet'
*
* @param sheetNo Index of sheet,0 base.
* @param sheetName The name of sheet.
* @return Excel sheet reader builder.
*/
public static ExcelReaderSheetBuilder readSheet(Integer sheetNo, String sheetName) {
ExcelReaderSheetBuilder excelReaderSheetBuilder = new ExcelReaderSheetBuilder();
if (sheetNo != null) {
excelReaderSheetBuilder.sheetNo(sheetNo);
}
if (sheetName != null) {
excelReaderSheetBuilder.sheetName(sheetName);
}
return excelReaderSheetBuilder;
}
}

View File

@ -0,0 +1,123 @@
package ai.chat2db.excel;
import java.io.Closeable;
import java.util.Arrays;
import java.util.List;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.ReadWorkbook;
import ai.chat2db.excel.analysis.ExcelAnalyser;
import ai.chat2db.excel.analysis.ExcelAnalyserImpl;
import ai.chat2db.excel.analysis.ExcelReadExecutor;
import ai.chat2db.excel.context.AnalysisContext;
import lombok.extern.slf4j.Slf4j;
/**
* Excel readers are all read in event mode.
*
* @author jipengfei
*/
@Slf4j
public class ExcelReader implements Closeable {
/**
* Analyser
*/
private final ExcelAnalyser excelAnalyser;
public ExcelReader(ReadWorkbook readWorkbook) {
excelAnalyser = new ExcelAnalyserImpl(readWorkbook);
}
/**
* Parse all sheet content by default
*
* @deprecated lease use {@link #readAll()}
*/
@Deprecated
public void read() {
readAll();
}
/***
* Parse all sheet content by default
*/
public void readAll() {
excelAnalyser.analysis(null, Boolean.TRUE);
}
/**
* Parse the specified sheetSheetNo start from 0
*
* @param readSheet Read sheet
*/
public ExcelReader read(ReadSheet... readSheet) {
return read(Arrays.asList(readSheet));
}
/**
* Read multiple sheets.
*
* @param readSheetList
* @return
*/
public ExcelReader read(List<ReadSheet> readSheetList) {
excelAnalyser.analysis(readSheetList, Boolean.FALSE);
return this;
}
/**
* Context for the entire execution process
*
* @return
*/
public AnalysisContext analysisContext() {
return excelAnalyser.analysisContext();
}
/**
* Current executor
*
* @return
*/
public ExcelReadExecutor excelExecutor() {
return excelAnalyser.excelExecutor();
}
/**
* @return
* @deprecated please use {@link #analysisContext()}
*/
@Deprecated
public AnalysisContext getAnalysisContext() {
return analysisContext();
}
/**
* Complete the entire read file.Release the cache and close stream.
*/
public void finish() {
if (excelAnalyser != null) {
excelAnalyser.finish();
}
}
@Override
public void close() {
finish();
}
/**
* Prevents calls to {@link #finish} from freeing the cache
*
*/
@Override
protected void finalize() {
try {
finish();
} catch (Throwable e) {
log.warn("Destroy object failed", e);
}
}
}

View File

@ -0,0 +1,172 @@
package ai.chat2db.excel;
import java.io.Closeable;
import java.util.Collection;
import java.util.function.Supplier;
import ai.chat2db.excel.context.WriteContext;
import ai.chat2db.excel.write.ExcelBuilder;
import ai.chat2db.excel.write.ExcelBuilderImpl;
import ai.chat2db.excel.write.metadata.WriteSheet;
import ai.chat2db.excel.write.metadata.WriteTable;
import ai.chat2db.excel.write.metadata.WriteWorkbook;
import ai.chat2db.excel.write.metadata.fill.FillConfig;
import lombok.extern.slf4j.Slf4j;
/**
* Excel Writer This tool is used to write value out to Excel via POI. This object can perform the following two
* functions.
*
* <pre>
* 1. Create a new empty Excel workbook, write the value to the stream after the value is filled.
* 2. Edit existing Excel, write the original Excel file, or write it to other places.}
* </pre>
*
* @author jipengfei
*/
@Slf4j
public class ExcelWriter implements Closeable {
private final ExcelBuilder excelBuilder;
/**
* Create new writer
*
* @param writeWorkbook
*/
public ExcelWriter(WriteWorkbook writeWorkbook) {
excelBuilder = new ExcelBuilderImpl(writeWorkbook);
}
/**
* Write data to a sheet
*
* @param data Data to be written
* @param writeSheet Write to this sheet
* @return this current writer
*/
public ExcelWriter write(Collection<?> data, WriteSheet writeSheet) {
return write(data, writeSheet, null);
}
/**
* Write data to a sheet
*
* @param supplier Data to be written
* @param writeSheet Write to this sheet
* @return this current writer
*/
public ExcelWriter write(Supplier<Collection<?>> supplier, WriteSheet writeSheet) {
return write(supplier.get(), writeSheet, null);
}
/**
* Write value to a sheet
*
* @param data Data to be written
* @param writeSheet Write to this sheet
* @param writeTable Write to this table
* @return this
*/
public ExcelWriter write(Collection<?> data, WriteSheet writeSheet, WriteTable writeTable) {
excelBuilder.addContent(data, writeSheet, writeTable);
return this;
}
/**
* Write value to a sheet
*
* @param supplier Data to be written
* @param writeSheet Write to this sheet
* @param writeTable Write to this table
* @return this
*/
public ExcelWriter write(Supplier<Collection<?>> supplier, WriteSheet writeSheet, WriteTable writeTable) {
excelBuilder.addContent(supplier.get(), writeSheet, writeTable);
return this;
}
/**
* Fill value to a sheet
*
* @param data
* @param writeSheet
* @return
*/
public ExcelWriter fill(Object data, WriteSheet writeSheet) {
return fill(data, null, writeSheet);
}
/**
* Fill value to a sheet
*
* @param data
* @param fillConfig
* @param writeSheet
* @return
*/
public ExcelWriter fill(Object data, FillConfig fillConfig, WriteSheet writeSheet) {
excelBuilder.fill(data, fillConfig, writeSheet);
return this;
}
/**
* Fill value to a sheet
*
* @param supplier
* @param writeSheet
* @return
*/
public ExcelWriter fill(Supplier<Object> supplier, WriteSheet writeSheet) {
return fill(supplier.get(), null, writeSheet);
}
/**
* Fill value to a sheet
*
* @param supplier
* @param fillConfig
* @param writeSheet
* @return
*/
public ExcelWriter fill(Supplier<Object> supplier, FillConfig fillConfig, WriteSheet writeSheet) {
excelBuilder.fill(supplier.get(), fillConfig, writeSheet);
return this;
}
/**
* Close IO
*/
public void finish() {
if (excelBuilder != null) {
excelBuilder.finish(false);
}
}
/**
* The context of the entire writing process
*
* @return
*/
public WriteContext writeContext() {
return excelBuilder.writeContext();
}
@Override
public void close() {
finish();
}
/**
* Prevents calls to {@link #finish} from freeing the cache
*/
@Override
protected void finalize() {
try {
finish();
} catch (Throwable e) {
log.warn("Destroy object failed", e);
}
}
}

View File

@ -0,0 +1,43 @@
package ai.chat2db.excel.analysis;
import java.util.List;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.context.AnalysisContext;
/**
* Excel file analyser
*
* @author jipengfei
*/
public interface ExcelAnalyser {
/**
* parse the sheet
*
* @param readSheetList
* Which sheets you need to read.
* @param readAll
* The <code>readSheetList</code> parameter is ignored, and all sheets are read.
*/
void analysis(List<ReadSheet> readSheetList, Boolean readAll);
/**
* Complete the entire read file.Release the cache and close stream
*/
void finish();
/**
* Acquisition excel executor
*
* @return Excel file Executor
*/
ExcelReadExecutor excelExecutor();
/**
* get the analysis context.
*
* @return analysis context
*/
AnalysisContext analysisContext();
}

View File

@ -0,0 +1,236 @@
package ai.chat2db.excel.analysis;
import ai.chat2db.excel.analysis.v07.XlsxSaxAnalyser;
import ai.chat2db.excel.exception.ExcelAnalysisException;
import ai.chat2db.excel.exception.ExcelAnalysisStopException;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.ReadWorkbook;
import ai.chat2db.excel.read.metadata.holder.ReadWorkbookHolder;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadWorkbookHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadWorkbookHolder;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder;
import ai.chat2db.excel.analysis.csv.CsvExcelReadExecutor;
import ai.chat2db.excel.analysis.v03.XlsSaxAnalyser;
import ai.chat2db.excel.context.AnalysisContext;
import ai.chat2db.excel.context.csv.CsvReadContext;
import ai.chat2db.excel.context.csv.DefaultCsvReadContext;
import ai.chat2db.excel.context.xls.DefaultXlsReadContext;
import ai.chat2db.excel.context.xls.XlsReadContext;
import ai.chat2db.excel.context.xlsx.DefaultXlsxReadContext;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import ai.chat2db.excel.support.ExcelTypeEnum;
import ai.chat2db.excel.util.ClassUtils;
import ai.chat2db.excel.util.DateUtils;
import ai.chat2db.excel.util.FileUtils;
import ai.chat2db.excel.util.NumberDataFormatterUtils;
import ai.chat2db.excel.util.StringUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.util.List;
/**
* @author jipengfei
*/
public class ExcelAnalyserImpl implements ExcelAnalyser {
private static final Logger LOGGER = LoggerFactory.getLogger(ExcelAnalyserImpl.class);
private AnalysisContext analysisContext;
private ExcelReadExecutor excelReadExecutor;
/**
* Prevent multiple shutdowns
*/
private boolean finished = false;
public ExcelAnalyserImpl(ReadWorkbook readWorkbook) {
try {
choiceExcelExecutor(readWorkbook);
} catch (RuntimeException e) {
finish();
throw e;
} catch (Throwable e) {
finish();
throw new ExcelAnalysisException(e);
}
}
private void choiceExcelExecutor(ReadWorkbook readWorkbook) throws Exception {
ExcelTypeEnum excelType = ExcelTypeEnum.valueOf(readWorkbook);
switch (excelType) {
case XLS:
POIFSFileSystem poifsFileSystem;
if (readWorkbook.getFile() != null) {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
} else {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
}
// So in encrypted excel, it looks like XLS but it's actually XLSX
if (poifsFileSystem.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
InputStream decryptedStream = null;
try {
decryptedStream = DocumentFactoryHelper
.getDecryptedStream(poifsFileSystem.getRoot().getFileSystem(), readWorkbook.getPassword());
XlsxReadContext xlsxReadContext = new DefaultXlsxReadContext(readWorkbook, ExcelTypeEnum.XLSX);
analysisContext = xlsxReadContext;
excelReadExecutor = new XlsxSaxAnalyser(xlsxReadContext, decryptedStream);
return;
} finally {
IOUtils.closeQuietly(decryptedStream);
// as we processed the full stream already, we can close the filesystem here
// otherwise file handles are leaked
poifsFileSystem.close();
}
}
if (readWorkbook.getPassword() != null) {
Biff8EncryptionKey.setCurrentUserPassword(readWorkbook.getPassword());
}
XlsReadContext xlsReadContext = new DefaultXlsReadContext(readWorkbook, ExcelTypeEnum.XLS);
xlsReadContext.xlsReadWorkbookHolder().setPoifsFileSystem(poifsFileSystem);
analysisContext = xlsReadContext;
excelReadExecutor = new XlsSaxAnalyser(xlsReadContext);
break;
case XLSX:
XlsxReadContext xlsxReadContext = new DefaultXlsxReadContext(readWorkbook, ExcelTypeEnum.XLSX);
analysisContext = xlsxReadContext;
excelReadExecutor = new XlsxSaxAnalyser(xlsxReadContext, null);
break;
case CSV:
CsvReadContext csvReadContext = new DefaultCsvReadContext(readWorkbook, ExcelTypeEnum.CSV);
analysisContext = csvReadContext;
excelReadExecutor = new CsvExcelReadExecutor(csvReadContext);
break;
default:
break;
}
}
@Override
public void analysis(List<ReadSheet> readSheetList, Boolean readAll) {
try {
if (!readAll && CollectionUtils.isEmpty(readSheetList)) {
throw new IllegalArgumentException("Specify at least one read sheet.");
}
analysisContext.readWorkbookHolder().setParameterSheetDataList(readSheetList);
analysisContext.readWorkbookHolder().setReadAll(readAll);
try {
excelReadExecutor.execute();
} catch (ExcelAnalysisStopException e) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Custom stop!");
}
}
} catch (RuntimeException e) {
finish();
throw e;
} catch (Throwable e) {
finish();
throw new ExcelAnalysisException(e);
}
}
@Override
public void finish() {
if (finished) {
return;
}
finished = true;
if (analysisContext == null || analysisContext.readWorkbookHolder() == null) {
return;
}
ReadWorkbookHolder readWorkbookHolder = analysisContext.readWorkbookHolder();
Throwable throwable = null;
try {
if (readWorkbookHolder.getReadCache() != null) {
readWorkbookHolder.getReadCache().destroy();
}
} catch (Throwable t) {
throwable = t;
}
try {
if ((readWorkbookHolder instanceof XlsxReadWorkbookHolder)
&& ((XlsxReadWorkbookHolder) readWorkbookHolder).getOpcPackage() != null) {
((XlsxReadWorkbookHolder) readWorkbookHolder).getOpcPackage().revert();
}
} catch (Throwable t) {
throwable = t;
}
try {
if ((readWorkbookHolder instanceof XlsReadWorkbookHolder)
&& ((XlsReadWorkbookHolder) readWorkbookHolder).getPoifsFileSystem() != null) {
((XlsReadWorkbookHolder) readWorkbookHolder).getPoifsFileSystem().close();
}
} catch (Throwable t) {
throwable = t;
}
// close csv.
// https://github.com/alibaba/easyexcel/issues/2309
try {
if ((readWorkbookHolder instanceof CsvReadWorkbookHolder)
&& ((CsvReadWorkbookHolder) readWorkbookHolder).getCsvParser() != null
&& analysisContext.readWorkbookHolder().getAutoCloseStream()) {
((CsvReadWorkbookHolder) readWorkbookHolder).getCsvParser().close();
}
} catch (Throwable t) {
throwable = t;
}
try {
if (analysisContext.readWorkbookHolder().getAutoCloseStream()
&& readWorkbookHolder.getInputStream() != null) {
readWorkbookHolder.getInputStream().close();
}
} catch (Throwable t) {
throwable = t;
}
try {
if (readWorkbookHolder.getTempFile() != null) {
FileUtils.delete(readWorkbookHolder.getTempFile());
}
} catch (Throwable t) {
throwable = t;
}
clearEncrypt03();
removeThreadLocalCache();
if (throwable != null) {
throw new ExcelAnalysisException("Can not close IO.", throwable);
}
}
private void removeThreadLocalCache() {
NumberDataFormatterUtils.removeThreadLocalCache();
DateUtils.removeThreadLocalCache();
ClassUtils.removeThreadLocalCache();
}
private void clearEncrypt03() {
if (StringUtils.isEmpty(analysisContext.readWorkbookHolder().getPassword())
|| !ExcelTypeEnum.XLS.equals(analysisContext.readWorkbookHolder().getExcelType())) {
return;
}
Biff8EncryptionKey.setCurrentUserPassword(null);
}
@Override
public ExcelReadExecutor excelExecutor() {
return excelReadExecutor;
}
@Override
public AnalysisContext analysisContext() {
return analysisContext;
}
}

View File

@ -0,0 +1,25 @@
package ai.chat2db.excel.analysis;
import java.util.List;
import ai.chat2db.excel.read.metadata.ReadSheet;
/**
* Excel file Executor
*
* @author Jiaju Zhuang
*/
public interface ExcelReadExecutor {
/**
* Returns the actual sheet in excel
*
* @return Actual sheet in excel
*/
List<ReadSheet> sheetList();
/**
* Read the sheet.
*/
void execute();
}

View File

@ -0,0 +1,147 @@
package ai.chat2db.excel.analysis.csv;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import ai.chat2db.excel.analysis.ExcelReadExecutor;
import ai.chat2db.excel.enums.ByteOrderMarkEnum;
import ai.chat2db.excel.enums.CellDataTypeEnum;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.exception.ExcelAnalysisException;
import ai.chat2db.excel.exception.ExcelAnalysisStopSheetException;
import ai.chat2db.excel.metadata.Cell;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.holder.ReadRowHolder;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadWorkbookHolder;
import ai.chat2db.excel.util.SheetUtils;
import ai.chat2db.excel.util.StringUtils;
import ai.chat2db.excel.context.csv.CsvReadContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
/**
* read executor
*
* @author zhuangjiaju
*/
@Slf4j
public class CsvExcelReadExecutor implements ExcelReadExecutor {
private final List<ReadSheet> sheetList;
private final CsvReadContext csvReadContext;
public CsvExcelReadExecutor(CsvReadContext csvReadContext) {
this.csvReadContext = csvReadContext;
sheetList = new ArrayList<>();
ReadSheet readSheet = new ReadSheet();
sheetList.add(readSheet);
readSheet.setSheetNo(0);
}
@Override
public List<ReadSheet> sheetList() {
return sheetList;
}
@Override
public void execute() {
CSVParser csvParser;
try {
csvParser = csvParser();
csvReadContext.csvReadWorkbookHolder().setCsvParser(csvParser);
} catch (IOException e) {
throw new ExcelAnalysisException(e);
}
for (ReadSheet readSheet : sheetList) {
readSheet = SheetUtils.match(readSheet, csvReadContext);
if (readSheet == null) {
continue;
}
try {
csvReadContext.currentSheet(readSheet);
int rowIndex = 0;
for (CSVRecord record : csvParser) {
dealRecord(record, rowIndex++);
}
} catch (ExcelAnalysisStopSheetException e) {
if (log.isDebugEnabled()) {
log.debug("Custom stop!", e);
}
}
// The last sheet is read
csvReadContext.analysisEventProcessor().endSheet(csvReadContext);
}
}
private CSVParser csvParser() throws IOException {
CsvReadWorkbookHolder csvReadWorkbookHolder = csvReadContext.csvReadWorkbookHolder();
CSVFormat csvFormat = csvReadWorkbookHolder.getCsvFormat();
ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName(
csvReadContext.csvReadWorkbookHolder().getCharset().name());
if (csvReadWorkbookHolder.getMandatoryUseInputStream()) {
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
}
if (csvReadWorkbookHolder.getFile() != null) {
return buildCsvParser(csvFormat, Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()),
byteOrderMark);
}
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
}
private CSVParser buildCsvParser(CSVFormat csvFormat, InputStream inputStream, ByteOrderMarkEnum byteOrderMark)
throws IOException {
if (byteOrderMark == null) {
return csvFormat.parse(
new InputStreamReader(inputStream, csvReadContext.csvReadWorkbookHolder().getCharset()));
}
return csvFormat.parse(new InputStreamReader(new BOMInputStream(inputStream, byteOrderMark.getByteOrderMark()),
csvReadContext.csvReadWorkbookHolder().getCharset()));
}
private void dealRecord(CSVRecord record, int rowIndex) {
Map<Integer, Cell> cellMap = new LinkedHashMap<>();
Iterator<String> cellIterator = record.iterator();
int columnIndex = 0;
Boolean autoTrim = csvReadContext.currentReadHolder().globalConfiguration().getAutoTrim();
while (cellIterator.hasNext()) {
String cellString = cellIterator.next();
ReadCellData<String> readCellData = new ReadCellData<>();
readCellData.setRowIndex(rowIndex);
readCellData.setColumnIndex(columnIndex);
// csv is an empty string of whether <code>,,</code> is read or <code>,"",</code>
if (StringUtils.isNotBlank(cellString)) {
readCellData.setType(CellDataTypeEnum.STRING);
readCellData.setStringValue(autoTrim ? cellString.trim() : cellString);
} else {
readCellData.setType(CellDataTypeEnum.EMPTY);
}
cellMap.put(columnIndex++, readCellData);
}
RowTypeEnum rowType = MapUtils.isEmpty(cellMap) ? RowTypeEnum.EMPTY : RowTypeEnum.DATA;
ReadRowHolder readRowHolder = new ReadRowHolder(rowIndex, rowType,
csvReadContext.readWorkbookHolder().getGlobalConfiguration(), cellMap);
csvReadContext.readRowHolder(readRowHolder);
csvReadContext.csvReadSheetHolder().setCellMap(cellMap);
csvReadContext.csvReadSheetHolder().setRowIndex(rowIndex);
csvReadContext.analysisEventProcessor().endRow(csvReadContext);
}
}

View File

@ -0,0 +1,8 @@
package ai.chat2db.excel.analysis.v03;
/**
* Need to ignore the current handler without reading the current sheet.
*
* @author Jiaju Zhuang
*/
public interface IgnorableXlsRecordHandler extends XlsRecordHandler {}

View File

@ -0,0 +1,64 @@
package ai.chat2db.excel.analysis.v03;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import ai.chat2db.excel.exception.ExcelAnalysisException;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.analysis.v03.handlers.BofRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.BoundSheetRecordHandler;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* In some cases, you need to know the number of sheets in advance and only read the file once in advance.
*
* @author Jiaju Zhuang
*/
public class XlsListSheetListener implements HSSFListener {
private final XlsReadContext xlsReadContext;
private static final Map<Short, XlsRecordHandler> XLS_RECORD_HANDLER_MAP = new HashMap<Short, XlsRecordHandler>();
static {
XLS_RECORD_HANDLER_MAP.put(BOFRecord.sid, new BofRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BoundSheetRecord.sid, new BoundSheetRecordHandler());
}
public XlsListSheetListener(XlsReadContext xlsReadContext) {
this.xlsReadContext = xlsReadContext;
xlsReadContext.xlsReadWorkbookHolder().setNeedReadSheet(Boolean.FALSE);
}
@Override
public void processRecord(Record record) {
XlsRecordHandler handler = XLS_RECORD_HANDLER_MAP.get(record.getSid());
if (handler == null) {
return;
}
handler.processRecord(xlsReadContext, record);
}
public void execute() {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
HSSFListener formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener =
new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
try {
factory.processWorkbookEvents(request, xlsReadContext.xlsReadWorkbookHolder().getPoifsFileSystem());
} catch (IOException e) {
throw new ExcelAnalysisException(e);
}
}
}

View File

@ -0,0 +1,29 @@
package ai.chat2db.excel.analysis.v03;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Intercepts handle xls reads.
*
* @author Dan Zheng
*/
public interface XlsRecordHandler {
/**
* Whether to support
*
* @param xlsReadContext
* @param record
* @return
*/
boolean support(XlsReadContext xlsReadContext, Record record);
/**
* Processing record
*
* @param xlsReadContext
* @param record
*/
void processRecord(XlsReadContext xlsReadContext, Record record);
}

View File

@ -0,0 +1,176 @@
package ai.chat2db.excel.analysis.v03;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import ai.chat2db.excel.analysis.ExcelReadExecutor;
import ai.chat2db.excel.exception.ExcelAnalysisException;
import ai.chat2db.excel.exception.ExcelAnalysisStopException;
import ai.chat2db.excel.exception.ExcelAnalysisStopSheetException;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadWorkbookHolder;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.HyperlinkRecord;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ai.chat2db.excel.analysis.v03.handlers.BlankRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.BofRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.BoolErrRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.BoundSheetRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.DummyRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.EofRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.FormulaRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.HyperlinkRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.IndexRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.LabelRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.LabelSstRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.MergeCellsRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.NoteRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.NumberRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.ObjRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.RkRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.SstRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.StringRecordHandler;
import ai.chat2db.excel.analysis.v03.handlers.TextObjectRecordHandler;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* A text extractor for Excel files.
* <p>
* Returns the textual content of the file, suitable for indexing by something like Lucene, but not really intended for
* display to the user.
* </p>
*
* <p>
* To turn an excel file into a CSV or similar, then see the XLS2CSVmra example
* </p>
*
*
* @author jipengfei
* @see <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java">XLS2CSVmra</a>
*/
@Slf4j
public class XlsSaxAnalyser implements HSSFListener, ExcelReadExecutor {
private static final Logger LOGGER = LoggerFactory.getLogger(XlsSaxAnalyser.class);
private static final short DUMMY_RECORD_SID = -1;
private final XlsReadContext xlsReadContext;
private static final Map<Short, XlsRecordHandler> XLS_RECORD_HANDLER_MAP = new HashMap<Short, XlsRecordHandler>(32);
static {
XLS_RECORD_HANDLER_MAP.put(BlankRecord.sid, new BlankRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BOFRecord.sid, new BofRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BoolErrRecord.sid, new BoolErrRecordHandler());
XLS_RECORD_HANDLER_MAP.put(BoundSheetRecord.sid, new BoundSheetRecordHandler());
XLS_RECORD_HANDLER_MAP.put(DUMMY_RECORD_SID, new DummyRecordHandler());
XLS_RECORD_HANDLER_MAP.put(EOFRecord.sid, new EofRecordHandler());
XLS_RECORD_HANDLER_MAP.put(FormulaRecord.sid, new FormulaRecordHandler());
XLS_RECORD_HANDLER_MAP.put(HyperlinkRecord.sid, new HyperlinkRecordHandler());
XLS_RECORD_HANDLER_MAP.put(IndexRecord.sid, new IndexRecordHandler());
XLS_RECORD_HANDLER_MAP.put(LabelRecord.sid, new LabelRecordHandler());
XLS_RECORD_HANDLER_MAP.put(LabelSSTRecord.sid, new LabelSstRecordHandler());
XLS_RECORD_HANDLER_MAP.put(MergeCellsRecord.sid, new MergeCellsRecordHandler());
XLS_RECORD_HANDLER_MAP.put(NoteRecord.sid, new NoteRecordHandler());
XLS_RECORD_HANDLER_MAP.put(NumberRecord.sid, new NumberRecordHandler());
XLS_RECORD_HANDLER_MAP.put(ObjRecord.sid, new ObjRecordHandler());
XLS_RECORD_HANDLER_MAP.put(RKRecord.sid, new RkRecordHandler());
XLS_RECORD_HANDLER_MAP.put(SSTRecord.sid, new SstRecordHandler());
XLS_RECORD_HANDLER_MAP.put(StringRecord.sid, new StringRecordHandler());
XLS_RECORD_HANDLER_MAP.put(TextObjectRecord.sid, new TextObjectRecordHandler());
}
public XlsSaxAnalyser(XlsReadContext xlsReadContext) {
this.xlsReadContext = xlsReadContext;
}
@Override
public List<ReadSheet> sheetList() {
try {
if (xlsReadContext.readWorkbookHolder().getActualSheetDataList() == null) {
new XlsListSheetListener(xlsReadContext).execute();
}
} catch (ExcelAnalysisStopException e) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Custom stop!");
}
}
return xlsReadContext.readWorkbookHolder().getActualSheetDataList();
}
@Override
public void execute() {
XlsReadWorkbookHolder xlsReadWorkbookHolder = xlsReadContext.xlsReadWorkbookHolder();
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
xlsReadWorkbookHolder.setFormatTrackingHSSFListener(new FormatTrackingHSSFListener(listener));
EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener =
new EventWorkbookBuilder.SheetRecordCollectingListener(
xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
xlsReadWorkbookHolder.setHssfWorkbook(workbookBuildingListener.getStubHSSFWorkbook());
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
request.addListenerForAllRecords(xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
try {
factory.processWorkbookEvents(request, xlsReadWorkbookHolder.getPoifsFileSystem());
} catch (IOException e) {
throw new ExcelAnalysisException(e);
}
// There are some special xls that do not have the terminator "[EOF]", so an additional
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
}
@Override
public void processRecord(Record record) {
XlsRecordHandler handler = XLS_RECORD_HANDLER_MAP.get(record.getSid());
if (handler == null) {
return;
}
boolean ignoreRecord =
(handler instanceof IgnorableXlsRecordHandler) && xlsReadContext.xlsReadWorkbookHolder().getIgnoreRecord();
if (ignoreRecord) {
// No need to read the current sheet
return;
}
if (!handler.support(xlsReadContext, record)) {
return;
}
try {
handler.processRecord(xlsReadContext, record);
} catch (ExcelAnalysisStopSheetException e) {
if (log.isDebugEnabled()) {
log.debug("Custom stop!", e);
}
xlsReadContext.xlsReadWorkbookHolder().setIgnoreRecord(Boolean.TRUE);
xlsReadContext.xlsReadWorkbookHolder().setCurrentSheetStopped(Boolean.TRUE);
}
}
}

View File

@ -0,0 +1,19 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.XlsRecordHandler;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Abstract xls record handler
*
* @author Jiaju Zhuang
**/
public abstract class AbstractXlsRecordHandler implements XlsRecordHandler {
@Override
public boolean support(XlsReadContext xlsReadContext, Record record) {
return true;
}
}

View File

@ -0,0 +1,23 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class BlankRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
BlankRecord br = (BlankRecord)record;
xlsReadContext.xlsReadSheetHolder().getCellMap().put((int)br.getColumn(),
ReadCellData.newEmptyInstance(br.getRow(), (int)br.getColumn()));
}
}

View File

@ -0,0 +1,75 @@
package ai.chat2db.excel.analysis.v03.handlers;
import java.util.ArrayList;
import java.util.List;
import ai.chat2db.excel.exception.ExcelAnalysisStopException;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadWorkbookHolder;
import ai.chat2db.excel.util.SheetUtils;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class BofRecordHandler extends AbstractXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
BOFRecord br = (BOFRecord) record;
XlsReadWorkbookHolder xlsReadWorkbookHolder = xlsReadContext.xlsReadWorkbookHolder();
if (br.getType() == BOFRecord.TYPE_WORKBOOK) {
xlsReadWorkbookHolder.setReadSheetIndex(null);
xlsReadWorkbookHolder.setIgnoreRecord(Boolean.FALSE);
return;
}
if (br.getType() != BOFRecord.TYPE_WORKSHEET) {
return;
}
// Init read sheet Data
initReadSheetDataList(xlsReadWorkbookHolder);
Integer readSheetIndex = xlsReadWorkbookHolder.getReadSheetIndex();
if (readSheetIndex == null) {
readSheetIndex = 0;
xlsReadWorkbookHolder.setReadSheetIndex(readSheetIndex);
}
ReadSheet actualReadSheet = xlsReadWorkbookHolder.getActualSheetDataList().get(readSheetIndex);
assert actualReadSheet != null : "Can't find the sheet.";
// Copy the parameter to the current sheet
ReadSheet readSheet = SheetUtils.match(actualReadSheet, xlsReadContext);
if (readSheet != null) {
xlsReadContext.currentSheet(readSheet);
xlsReadContext.xlsReadWorkbookHolder().setIgnoreRecord(Boolean.FALSE);
} else {
xlsReadContext.xlsReadWorkbookHolder().setIgnoreRecord(Boolean.TRUE);
}
xlsReadContext.xlsReadWorkbookHolder().setCurrentSheetStopped(Boolean.FALSE);
// Go read the next one
xlsReadWorkbookHolder.setReadSheetIndex(xlsReadWorkbookHolder.getReadSheetIndex() + 1);
}
private void initReadSheetDataList(XlsReadWorkbookHolder xlsReadWorkbookHolder) {
if (xlsReadWorkbookHolder.getActualSheetDataList() != null) {
return;
}
BoundSheetRecord[] boundSheetRecords =
BoundSheetRecord.orderByBofPosition(xlsReadWorkbookHolder.getBoundSheetRecordList());
List<ReadSheet> readSheetDataList = new ArrayList<ReadSheet>();
for (int i = 0; i < boundSheetRecords.length; i++) {
BoundSheetRecord boundSheetRecord = boundSheetRecords[i];
ReadSheet readSheet = new ReadSheet(i, boundSheetRecord.getSheetname());
readSheetDataList.add(readSheet);
}
xlsReadWorkbookHolder.setActualSheetDataList(readSheetDataList);
// Just need to get the list of sheets
if (!xlsReadWorkbookHolder.getNeedReadSheet()) {
throw new ExcelAnalysisStopException("Just need to get the list of sheets.");
}
}
}

View File

@ -0,0 +1,25 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class BoolErrRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
BoolErrRecord ber = (BoolErrRecord)record;
xlsReadContext.xlsReadSheetHolder().getCellMap().put((int)ber.getColumn(),
ReadCellData.newInstance(ber.getBooleanValue(), ber.getRow(), (int)ber.getColumn()));
xlsReadContext.xlsReadSheetHolder().setTempRowType(RowTypeEnum.DATA);
}
}

View File

@ -0,0 +1,21 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class BoundSheetRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
BoundSheetRecord bsr = (BoundSheetRecord)record;
xlsReadContext.xlsReadWorkbookHolder().getBoundSheetRecordList().add(bsr);
}
}

View File

@ -0,0 +1,44 @@
package ai.chat2db.excel.analysis.v03.handlers;
import java.util.LinkedHashMap;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.Cell;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.read.metadata.holder.ReadRowHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class DummyRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
XlsReadSheetHolder xlsReadSheetHolder = xlsReadContext.xlsReadSheetHolder();
if (record instanceof LastCellOfRowDummyRecord) {
// End of this row
LastCellOfRowDummyRecord lcrdr = (LastCellOfRowDummyRecord)record;
xlsReadSheetHolder.setRowIndex(lcrdr.getRow());
xlsReadContext.readRowHolder(new ReadRowHolder(lcrdr.getRow(), xlsReadSheetHolder.getTempRowType(),
xlsReadContext.readSheetHolder().getGlobalConfiguration(), xlsReadSheetHolder.getCellMap()));
xlsReadContext.analysisEventProcessor().endRow(xlsReadContext);
xlsReadSheetHolder.setCellMap(new LinkedHashMap<Integer, Cell>());
xlsReadSheetHolder.setTempRowType(RowTypeEnum.EMPTY);
} else if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mcdr = (MissingCellDummyRecord)record;
// https://github.com/alibaba/easyexcel/issues/2236
// Some abnormal XLS, in the case of data already exist, or there will be a "MissingCellDummyRecord"
// records, so if the existing data, empty data is ignored
xlsReadSheetHolder.getCellMap().putIfAbsent(mcdr.getColumn(),
ReadCellData.newEmptyInstance(mcdr.getRow(), mcdr.getColumn()));
}
}
}

View File

@ -0,0 +1,50 @@
package ai.chat2db.excel.analysis.v03.handlers;
import java.util.LinkedHashMap;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.Cell;
import ai.chat2db.excel.read.metadata.holder.ReadRowHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import ai.chat2db.excel.util.BooleanUtils;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class EofRecordHandler extends AbstractXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
if (xlsReadContext.readSheetHolder() == null) {
return;
}
//Represents the current sheet does not need to be read or the user manually stopped reading the sheet.
if (BooleanUtils.isTrue(xlsReadContext.xlsReadWorkbookHolder().getIgnoreRecord())) {
// When the user manually stops reading the sheet, the method to end the sheet needs to be called.
if (BooleanUtils.isTrue(xlsReadContext.xlsReadWorkbookHolder().getCurrentSheetStopped())) {
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
}
return;
}
// Sometimes tables lack the end record of the last column
if (!xlsReadContext.xlsReadSheetHolder().getCellMap().isEmpty()) {
XlsReadSheetHolder xlsReadSheetHolder = xlsReadContext.xlsReadSheetHolder();
// Forge a termination data
xlsReadContext.readRowHolder(new ReadRowHolder(xlsReadContext.xlsReadSheetHolder().getRowIndex() + 1,
xlsReadSheetHolder.getTempRowType(),
xlsReadContext.readSheetHolder().getGlobalConfiguration(), xlsReadSheetHolder.getCellMap()));
xlsReadContext.analysisEventProcessor().endRow(xlsReadContext);
xlsReadSheetHolder.setCellMap(new LinkedHashMap<Integer, Cell>());
xlsReadSheetHolder.setTempRowType(RowTypeEnum.EMPTY);
}
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
}
}

View File

@ -0,0 +1,89 @@
package ai.chat2db.excel.analysis.v03.handlers;
import java.math.BigDecimal;
import java.util.Map;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.constant.BuiltinFormats;
import ai.chat2db.excel.constant.EasyExcelConstants;
import ai.chat2db.excel.enums.CellDataTypeEnum;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.Cell;
import ai.chat2db.excel.metadata.data.DataFormatData;
import ai.chat2db.excel.metadata.data.FormulaData;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.ss.usermodel.CellType;
/**
* Record handler
*
* @author Dan Zheng
*/
@Slf4j
public class FormulaRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
private static final String ERROR = "#VALUE!";
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
FormulaRecord frec = (FormulaRecord)record;
Map<Integer, Cell> cellMap = xlsReadContext.xlsReadSheetHolder().getCellMap();
ReadCellData<?> tempCellData = new ReadCellData<>();
tempCellData.setRowIndex(frec.getRow());
tempCellData.setColumnIndex((int)frec.getColumn());
CellType cellType = CellType.forInt(frec.getCachedResultType());
String formulaValue = null;
try {
formulaValue = HSSFFormulaParser.toFormulaString(xlsReadContext.xlsReadWorkbookHolder().getHssfWorkbook(),
frec.getParsedExpression());
} catch (Exception e) {
log.debug("Get formula value error.", e);
}
FormulaData formulaData = new FormulaData();
formulaData.setFormulaValue(formulaValue);
tempCellData.setFormulaData(formulaData);
xlsReadContext.xlsReadSheetHolder().setTempRowType(RowTypeEnum.DATA);
switch (cellType) {
case STRING:
// Formula result is a string
// This is stored in the next record
tempCellData.setType(CellDataTypeEnum.STRING);
xlsReadContext.xlsReadSheetHolder().setTempCellData(tempCellData);
break;
case NUMERIC:
tempCellData.setType(CellDataTypeEnum.NUMBER);
tempCellData.setOriginalNumberValue(BigDecimal.valueOf(frec.getValue()));
tempCellData.setNumberValue(
tempCellData.getOriginalNumberValue().round(EasyExcelConstants.EXCEL_MATH_CONTEXT));
int dataFormat =
xlsReadContext.xlsReadWorkbookHolder().getFormatTrackingHSSFListener().getFormatIndex(frec);
DataFormatData dataFormatData = new DataFormatData();
dataFormatData.setIndex((short)dataFormat);
dataFormatData.setFormat(BuiltinFormats.getBuiltinFormat(dataFormatData.getIndex(),
xlsReadContext.xlsReadWorkbookHolder().getFormatTrackingHSSFListener().getFormatString(frec),
xlsReadContext.readSheetHolder().getGlobalConfiguration().getLocale()));
tempCellData.setDataFormatData(dataFormatData);
cellMap.put((int)frec.getColumn(), tempCellData);
break;
case ERROR:
tempCellData.setType(CellDataTypeEnum.ERROR);
tempCellData.setStringValue(ERROR);
cellMap.put((int)frec.getColumn(), tempCellData);
break;
case BOOLEAN:
tempCellData.setType(CellDataTypeEnum.BOOLEAN);
tempCellData.setBooleanValue(frec.getCachedBooleanValue());
cellMap.put((int)frec.getColumn(), tempCellData);
break;
default:
tempCellData.setType(CellDataTypeEnum.EMPTY);
cellMap.put((int)frec.getColumn(), tempCellData);
break;
}
}
}

View File

@ -0,0 +1,30 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.metadata.CellExtra;
import org.apache.poi.hssf.record.HyperlinkRecord;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class HyperlinkRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public boolean support(XlsReadContext xlsReadContext, Record record) {
return xlsReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.HYPERLINK);
}
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
HyperlinkRecord hr = (HyperlinkRecord)record;
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.HYPERLINK, hr.getAddress(), hr.getFirstRow(),
hr.getLastRow(), hr.getFirstColumn(), hr.getLastColumn());
xlsReadContext.xlsReadSheetHolder().setCellExtra(cellExtra);
xlsReadContext.analysisEventProcessor().extra(xlsReadContext);
}
}

View File

@ -0,0 +1,22 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import org.apache.poi.hssf.record.IndexRecord;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Jiaju Zhuang
*/
public class IndexRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
if (xlsReadContext.readSheetHolder() == null) {
return;
}
xlsReadContext.readSheetHolder().setApproximateTotalRowNumber(((IndexRecord)record).getLastRowAdd1());
}
}

View File

@ -0,0 +1,28 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class LabelRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
LabelRecord lrec = (LabelRecord)record;
String data = lrec.getValue();
if (data != null && xlsReadContext.currentReadHolder().globalConfiguration().getAutoTrim()) {
data = data.trim();
}
xlsReadContext.xlsReadSheetHolder().getCellMap().put((int)lrec.getColumn(),
ReadCellData.newInstance(data, lrec.getRow(), (int)lrec.getColumn()));
xlsReadContext.xlsReadSheetHolder().setTempRowType(RowTypeEnum.DATA);
}
}

View File

@ -0,0 +1,42 @@
package ai.chat2db.excel.analysis.v03.handlers;
import java.util.Map;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.cache.ReadCache;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.Cell;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class LabelSstRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
LabelSSTRecord lsrec = (LabelSSTRecord)record;
ReadCache readCache = xlsReadContext.readWorkbookHolder().getReadCache();
Map<Integer, Cell> cellMap = xlsReadContext.xlsReadSheetHolder().getCellMap();
if (readCache == null) {
cellMap.put((int)lsrec.getColumn(), ReadCellData.newEmptyInstance(lsrec.getRow(), (int)lsrec.getColumn()));
return;
}
String data = readCache.get(lsrec.getSSTIndex());
if (data == null) {
cellMap.put((int)lsrec.getColumn(), ReadCellData.newEmptyInstance(lsrec.getRow(), (int)lsrec.getColumn()));
return;
}
if (xlsReadContext.currentReadHolder().globalConfiguration().getAutoTrim()) {
data = data.trim();
}
cellMap.put((int)lsrec.getColumn(), ReadCellData.newInstance(data, lsrec.getRow(), (int)lsrec.getColumn()));
xlsReadContext.xlsReadSheetHolder().setTempRowType(RowTypeEnum.DATA);
}
}

View File

@ -0,0 +1,35 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.metadata.CellExtra;
import org.apache.poi.hssf.record.MergeCellsRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.ss.util.CellRangeAddress;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class MergeCellsRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public boolean support(XlsReadContext xlsReadContext, Record record) {
return xlsReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.MERGE);
}
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
MergeCellsRecord mcr = (MergeCellsRecord)record;
for (int i = 0; i < mcr.getNumAreas(); i++) {
CellRangeAddress cellRangeAddress = mcr.getAreaAt(i);
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.MERGE, null, cellRangeAddress.getFirstRow(),
cellRangeAddress.getLastRow(), cellRangeAddress.getFirstColumn(), cellRangeAddress.getLastColumn());
xlsReadContext.xlsReadSheetHolder().setCellExtra(cellExtra);
xlsReadContext.analysisEventProcessor().extra(xlsReadContext);
}
}
}

View File

@ -0,0 +1,31 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.metadata.CellExtra;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.Record;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class NoteRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public boolean support(XlsReadContext xlsReadContext, Record record) {
return xlsReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT);
}
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
NoteRecord nr = (NoteRecord)record;
String text = xlsReadContext.xlsReadSheetHolder().getObjectCacheMap().get(nr.getShapeId());
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.COMMENT, text, nr.getRow(), nr.getColumn());
xlsReadContext.xlsReadSheetHolder().setCellExtra(cellExtra);
xlsReadContext.analysisEventProcessor().extra(xlsReadContext);
}
}

View File

@ -0,0 +1,38 @@
package ai.chat2db.excel.analysis.v03.handlers;
import java.math.BigDecimal;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.constant.BuiltinFormats;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.data.DataFormatData;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class NumberRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
NumberRecord nr = (NumberRecord)record;
ReadCellData<?> cellData = ReadCellData.newInstanceOriginal(BigDecimal.valueOf(nr.getValue()), nr.getRow(),
(int)nr.getColumn());
short dataFormat = (short)xlsReadContext.xlsReadWorkbookHolder().getFormatTrackingHSSFListener().getFormatIndex(
nr);
DataFormatData dataFormatData = new DataFormatData();
dataFormatData.setIndex(dataFormat);
dataFormatData.setFormat(BuiltinFormats.getBuiltinFormat(dataFormat,
xlsReadContext.xlsReadWorkbookHolder().getFormatTrackingHSSFListener().getFormatString(nr),
xlsReadContext.readSheetHolder().getGlobalConfiguration().getLocale()));
cellData.setDataFormatData(dataFormatData);
xlsReadContext.xlsReadSheetHolder().getCellMap().put((int)nr.getColumn(), cellData);
xlsReadContext.xlsReadSheetHolder().setTempRowType(RowTypeEnum.DATA);
}
}

View File

@ -0,0 +1,30 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import org.apache.poi.hssf.record.CommonObjectDataSubRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SubRecord;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Jiaju Zhuang
*/
public class ObjRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
ObjRecord or = (ObjRecord)record;
for (SubRecord subRecord : or.getSubRecords()) {
if (subRecord instanceof CommonObjectDataSubRecord) {
CommonObjectDataSubRecord codsr = (CommonObjectDataSubRecord)subRecord;
if (CommonObjectDataSubRecord.OBJECT_TYPE_COMMENT == codsr.getObjectType()) {
xlsReadContext.xlsReadSheetHolder().setTempObjectIndex(codsr.getObjectId());
}
break;
}
}
}
}

View File

@ -0,0 +1,23 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.context.xls.XlsReadContext;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
/**
* Record handler
*
* @author Dan Zheng
*/
public class RkRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
RKRecord re = (RKRecord)record;
xlsReadContext.xlsReadSheetHolder().getCellMap().put((int)re.getColumn(),
ReadCellData.newEmptyInstance(re.getRow(), (int)re.getColumn()));
}
}

View File

@ -0,0 +1,20 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.cache.XlsCache;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class SstRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
xlsReadContext.readWorkbookHolder().setReadCache(new XlsCache((SSTRecord)record));
}
}

View File

@ -0,0 +1,35 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.metadata.data.CellData;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.StringRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Dan Zheng
*/
public class StringRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(StringRecordHandler.class);
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
// String for formula
StringRecord srec = (StringRecord)record;
XlsReadSheetHolder xlsReadSheetHolder = xlsReadContext.xlsReadSheetHolder();
CellData<?> tempCellData = xlsReadSheetHolder.getTempCellData();
if (tempCellData == null) {
LOGGER.warn("String type formula but no value found.");
return;
}
tempCellData.setStringValue(srec.getString());
xlsReadSheetHolder.getCellMap().put(tempCellData.getColumnIndex(), tempCellData);
xlsReadSheetHolder.setTempCellData(null);
}
}

View File

@ -0,0 +1,38 @@
package ai.chat2db.excel.analysis.v03.handlers;
import ai.chat2db.excel.analysis.v03.IgnorableXlsRecordHandler;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ai.chat2db.excel.context.xls.XlsReadContext;
/**
* Record handler
*
* @author Jiaju Zhuang
*/
public class TextObjectRecordHandler extends AbstractXlsRecordHandler implements IgnorableXlsRecordHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(TextObjectRecordHandler.class);
@Override
public boolean support(XlsReadContext xlsReadContext, Record record) {
return xlsReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT);
}
@Override
public void processRecord(XlsReadContext xlsReadContext, Record record) {
TextObjectRecord tor = (TextObjectRecord)record;
XlsReadSheetHolder xlsReadSheetHolder = xlsReadContext.xlsReadSheetHolder();
Integer tempObjectIndex = xlsReadSheetHolder.getTempObjectIndex();
if (tempObjectIndex == null) {
LOGGER.debug("tempObjectIndex is null.");
return;
}
xlsReadSheetHolder.getObjectCacheMap().put(tempObjectIndex, tor.getStr().getString());
xlsReadSheetHolder.setTempObjectIndex(null);
}
}

View File

@ -0,0 +1,292 @@
package ai.chat2db.excel.analysis.v07;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import ai.chat2db.excel.cache.ReadCache;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.exception.ExcelAnalysisException;
import ai.chat2db.excel.exception.ExcelAnalysisStopSheetException;
import ai.chat2db.excel.metadata.CellExtra;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder;
import ai.chat2db.excel.util.FileUtils;
import ai.chat2db.excel.util.MapUtils;
import ai.chat2db.excel.util.SheetUtils;
import ai.chat2db.excel.util.StringUtils;
import ai.chat2db.excel.analysis.ExcelReadExecutor;
import ai.chat2db.excel.analysis.v07.handlers.sax.SharedStringsTableHandler;
import ai.chat2db.excel.analysis.v07.handlers.sax.XlsxRowHandler;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.Comments;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbookPr;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* @author jipengfei
*/
@Slf4j
public class XlsxSaxAnalyser implements ExcelReadExecutor {
/**
* Storage sheet SharedStrings
*/
public static final PackagePartName SHARED_STRINGS_PART_NAME;
static {
try {
SHARED_STRINGS_PART_NAME = PackagingURIHelper.createPartName("/xl/sharedStrings.xml");
} catch (InvalidFormatException e) {
log.error("Initialize the XlsxSaxAnalyser failure", e);
throw new ExcelAnalysisException("Initialize the XlsxSaxAnalyser failure", e);
}
}
private final XlsxReadContext xlsxReadContext;
private final List<ReadSheet> sheetList;
private final Map<Integer, InputStream> sheetMap;
/**
* excel comments key: sheetNo value: CommentsTable
*/
private final Map<Integer, CommentsTable> commentsTableMap;
public XlsxSaxAnalyser(XlsxReadContext xlsxReadContext, InputStream decryptedStream) throws Exception {
this.xlsxReadContext = xlsxReadContext;
// Initialize cache
XlsxReadWorkbookHolder xlsxReadWorkbookHolder = xlsxReadContext.xlsxReadWorkbookHolder();
OPCPackage pkg = readOpcPackage(xlsxReadWorkbookHolder, decryptedStream);
xlsxReadWorkbookHolder.setOpcPackage(pkg);
// Read the Shared information Strings
PackagePart sharedStringsTablePackagePart = pkg.getPart(SHARED_STRINGS_PART_NAME);
if (sharedStringsTablePackagePart != null) {
// Specify default cache
defaultReadCache(xlsxReadWorkbookHolder, sharedStringsTablePackagePart);
// Analysis sharedStringsTable.xml
analysisSharedStringsTable(sharedStringsTablePackagePart.getInputStream(), xlsxReadWorkbookHolder);
}
XSSFReader xssfReader = new XSSFReader(pkg);
analysisUse1904WindowDate(xssfReader, xlsxReadWorkbookHolder);
// set style table
setStylesTable(xlsxReadWorkbookHolder, xssfReader);
sheetList = new ArrayList<>();
sheetMap = new HashMap<>();
commentsTableMap = new HashMap<>();
Map<Integer, PackageRelationshipCollection> packageRelationshipCollectionMap = MapUtils.newHashMap();
xlsxReadWorkbookHolder.setPackageRelationshipCollectionMap(packageRelationshipCollectionMap);
XSSFReader.SheetIterator ite = (XSSFReader.SheetIterator)xssfReader.getSheetsData();
int index = 0;
if (!ite.hasNext()) {
throw new ExcelAnalysisException("Can not find any sheet!");
}
while (ite.hasNext()) {
InputStream inputStream = ite.next();
sheetList.add(new ReadSheet(index, ite.getSheetName()));
sheetMap.put(index, inputStream);
if (xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT)) {
Comments comments = ite.getSheetComments();
if (comments instanceof CommentsTable) {
commentsTableMap.put(index, (CommentsTable) comments);
}
}
if (xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.HYPERLINK)) {
PackageRelationshipCollection packageRelationshipCollection = Optional.ofNullable(ite.getSheetPart())
.map(packagePart -> {
try {
return packagePart.getRelationships();
} catch (InvalidFormatException e) {
log.warn("Reading the Relationship failed", e);
return null;
}
}).orElse(null);
if (packageRelationshipCollection != null) {
packageRelationshipCollectionMap.put(index, packageRelationshipCollection);
}
}
index++;
}
}
private void setStylesTable(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, XSSFReader xssfReader) {
try {
xlsxReadWorkbookHolder.setStylesTable(xssfReader.getStylesTable());
} catch (Exception e) {
log.warn(
"Currently excel cannot get style information, but it doesn't affect the data analysis.You can try to"
+ " save the file with office again or ignore the current error.",
e);
}
}
private void defaultReadCache(XlsxReadWorkbookHolder xlsxReadWorkbookHolder,
PackagePart sharedStringsTablePackagePart) {
ReadCache readCache = xlsxReadWorkbookHolder.getReadCacheSelector().readCache(sharedStringsTablePackagePart);
xlsxReadWorkbookHolder.setReadCache(readCache);
readCache.init(xlsxReadContext);
}
private void analysisUse1904WindowDate(XSSFReader xssfReader, XlsxReadWorkbookHolder xlsxReadWorkbookHolder)
throws Exception {
if (xlsxReadWorkbookHolder.globalConfiguration().getUse1904windowing() != null) {
return;
}
InputStream workbookXml = xssfReader.getWorkbookData();
WorkbookDocument ctWorkbook = WorkbookDocument.Factory.parse(workbookXml);
CTWorkbook wb = ctWorkbook.getWorkbook();
CTWorkbookPr prefix = wb.getWorkbookPr();
if (prefix != null && prefix.getDate1904()) {
xlsxReadWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.TRUE);
} else {
xlsxReadWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.FALSE);
}
}
private void analysisSharedStringsTable(InputStream sharedStringsTableInputStream,
XlsxReadWorkbookHolder xlsxReadWorkbookHolder) {
ContentHandler handler = new SharedStringsTableHandler(xlsxReadWorkbookHolder.getReadCache());
parseXmlSource(sharedStringsTableInputStream, handler);
xlsxReadWorkbookHolder.getReadCache().putFinished();
}
private OPCPackage readOpcPackage(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, InputStream decryptedStream)
throws Exception {
if (decryptedStream == null && xlsxReadWorkbookHolder.getFile() != null) {
return OPCPackage.open(xlsxReadWorkbookHolder.getFile());
}
if (xlsxReadWorkbookHolder.getMandatoryUseInputStream()) {
if (decryptedStream != null) {
return OPCPackage.open(decryptedStream);
} else {
return OPCPackage.open(xlsxReadWorkbookHolder.getInputStream());
}
}
File readTempFile = FileUtils.createCacheTmpFile();
xlsxReadWorkbookHolder.setTempFile(readTempFile);
File tempFile = new File(readTempFile.getPath(), UUID.randomUUID() + ".xlsx");
if (decryptedStream != null) {
FileUtils.writeToFile(tempFile, decryptedStream, false);
} else {
FileUtils.writeToFile(tempFile, xlsxReadWorkbookHolder.getInputStream(),
xlsxReadWorkbookHolder.getAutoCloseStream());
}
return OPCPackage.open(tempFile, PackageAccess.READ);
}
@Override
public List<ReadSheet> sheetList() {
return sheetList;
}
private void parseXmlSource(InputStream inputStream, ContentHandler handler) {
InputSource inputSource = new InputSource(inputStream);
try {
SAXParserFactory saxFactory;
String xlsxSAXParserFactoryName = xlsxReadContext.xlsxReadWorkbookHolder().getSaxParserFactoryName();
if (StringUtils.isEmpty(xlsxSAXParserFactoryName)) {
saxFactory = SAXParserFactory.newInstance();
} else {
saxFactory = SAXParserFactory.newInstance(xlsxSAXParserFactoryName, null);
}
try {
saxFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
} catch (Throwable ignore) {}
try {
saxFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
} catch (Throwable ignore) {}
try {
saxFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
} catch (Throwable ignore) {}
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setContentHandler(handler);
xmlReader.parse(inputSource);
inputStream.close();
} catch (IOException | ParserConfigurationException | SAXException e) {
throw new ExcelAnalysisException(e);
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
throw new ExcelAnalysisException("Can not close 'inputStream'!");
}
}
}
}
@Override
public void execute() {
for (ReadSheet readSheet : sheetList) {
readSheet = SheetUtils.match(readSheet, xlsxReadContext);
if (readSheet != null) {
try {
xlsxReadContext.currentSheet(readSheet);
parseXmlSource(sheetMap.get(readSheet.getSheetNo()), new XlsxRowHandler(xlsxReadContext));
// Read comments
readComments(readSheet);
} catch (ExcelAnalysisStopSheetException e) {
if (log.isDebugEnabled()) {
log.debug("Custom stop!", e);
}
}
// The last sheet is read
xlsxReadContext.analysisEventProcessor().endSheet(xlsxReadContext);
}
}
}
private void readComments(ReadSheet readSheet) {
if (!xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT)) {
return;
}
CommentsTable commentsTable = commentsTableMap.get(readSheet.getSheetNo());
if (commentsTable == null) {
return;
}
Iterator<CellAddress> cellAddresses = commentsTable.getCellAddresses();
while (cellAddresses.hasNext()) {
CellAddress cellAddress = cellAddresses.next();
XSSFComment cellComment = commentsTable.findCellComment(cellAddress);
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.COMMENT, cellComment.getString().toString(),
cellAddress.getRow(), cellAddress.getColumn());
xlsxReadContext.readSheetHolder().setCellExtra(cellExtra);
xlsxReadContext.analysisEventProcessor().extra(xlsxReadContext);
}
}
}

View File

@ -0,0 +1,17 @@
package ai.chat2db.excel.analysis.v07.handlers;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
/**
* Cell Value Handler
*
* @author jipengfei
*/
public abstract class AbstractCellValueTagHandler extends AbstractXlsxTagHandler {
@Override
public void characters(XlsxReadContext xlsxReadContext, char[] ch, int start, int length) {
xlsxReadContext.xlsxReadSheetHolder().getTempData().append(ch, start, length);
}
}

View File

@ -0,0 +1,32 @@
package ai.chat2db.excel.analysis.v07.handlers;
import org.xml.sax.Attributes;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
/**
* Abstract tag handler
*
* @author Jiaju Zhuang
*/
public abstract class AbstractXlsxTagHandler implements XlsxTagHandler {
@Override
public boolean support(XlsxReadContext xlsxReadContext) {
return true;
}
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
}
@Override
public void endElement(XlsxReadContext xlsxReadContext, String name) {
}
@Override
public void characters(XlsxReadContext xlsxReadContext, char[] ch, int start, int length) {
}
}

View File

@ -0,0 +1,34 @@
package ai.chat2db.excel.analysis.v07.handlers;
import ai.chat2db.excel.metadata.data.FormulaData;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import org.xml.sax.Attributes;
/**
* Cell Handler
*
* @author jipengfei
*/
public class CellFormulaTagHandler extends AbstractXlsxTagHandler {
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
XlsxReadSheetHolder xlsxReadSheetHolder = xlsxReadContext.xlsxReadSheetHolder();
xlsxReadSheetHolder.setTempFormula(new StringBuilder());
}
@Override
public void endElement(XlsxReadContext xlsxReadContext, String name) {
XlsxReadSheetHolder xlsxReadSheetHolder = xlsxReadContext.xlsxReadSheetHolder();
FormulaData formulaData = new FormulaData();
formulaData.setFormulaValue(xlsxReadSheetHolder.getTempFormula().toString());
xlsxReadSheetHolder.getTempCellData().setFormulaData(formulaData);
}
@Override
public void characters(XlsxReadContext xlsxReadContext, char[] ch, int start, int length) {
xlsxReadContext.xlsxReadSheetHolder().getTempFormula().append(ch, start, length);
}
}

View File

@ -0,0 +1,10 @@
package ai.chat2db.excel.analysis.v07.handlers;
/**
* Cell inline string value handler
*
* @author jipengfei
*/
public class CellInlineStringValueTagHandler extends AbstractCellValueTagHandler {
}

View File

@ -0,0 +1,110 @@
package ai.chat2db.excel.analysis.v07.handlers;
import java.math.BigDecimal;
import ai.chat2db.excel.constant.EasyExcelConstants;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import ai.chat2db.excel.enums.CellDataTypeEnum;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
import ai.chat2db.excel.util.BooleanUtils;
import ai.chat2db.excel.util.PositionUtils;
import ai.chat2db.excel.util.StringUtils;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import org.xml.sax.Attributes;
/**
* Cell Handler
*
* @author jipengfei
*/
public class CellTagHandler extends AbstractXlsxTagHandler {
private static final int DEFAULT_FORMAT_INDEX = 0;
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
XlsxReadSheetHolder xlsxReadSheetHolder = xlsxReadContext.xlsxReadSheetHolder();
xlsxReadSheetHolder.setColumnIndex(PositionUtils.getCol(attributes.getValue(ExcelXmlConstants.ATTRIBUTE_R),
xlsxReadSheetHolder.getColumnIndex()));
// t="s" ,it means String
// t="str" ,it means String,but does not need to be read in the 'sharedStrings.xml'
// t="inlineStr" ,it means String,but does not need to be read in the 'sharedStrings.xml'
// t="b" ,it means Boolean
// t="e" ,it means Error
// t="n" ,it means Number
// t is null ,it means Empty or Number
CellDataTypeEnum type = CellDataTypeEnum.buildFromCellType(attributes.getValue(ExcelXmlConstants.ATTRIBUTE_T));
xlsxReadSheetHolder.setTempCellData(new ReadCellData<>(type));
xlsxReadSheetHolder.setTempData(new StringBuilder());
// Put in data transformation information
String dateFormatIndex = attributes.getValue(ExcelXmlConstants.ATTRIBUTE_S);
int dateFormatIndexInteger;
if (StringUtils.isEmpty(dateFormatIndex)) {
dateFormatIndexInteger = DEFAULT_FORMAT_INDEX;
} else {
dateFormatIndexInteger = Integer.parseInt(dateFormatIndex);
}
xlsxReadSheetHolder.getTempCellData().setDataFormatData(
xlsxReadContext.xlsxReadWorkbookHolder().dataFormatData(dateFormatIndexInteger));
}
@Override
public void endElement(XlsxReadContext xlsxReadContext, String name) {
XlsxReadSheetHolder xlsxReadSheetHolder = xlsxReadContext.xlsxReadSheetHolder();
ReadCellData<?> tempCellData = xlsxReadSheetHolder.getTempCellData();
StringBuilder tempData = xlsxReadSheetHolder.getTempData();
String tempDataString = tempData.toString();
CellDataTypeEnum oldType = tempCellData.getType();
switch (oldType) {
case STRING:
// In some cases, although cell type is a string, it may be an empty tag
if (StringUtils.isEmpty(tempDataString)) {
break;
}
String stringValue = xlsxReadContext.readWorkbookHolder().getReadCache().get(
Integer.valueOf(tempDataString));
tempCellData.setStringValue(stringValue);
break;
case DIRECT_STRING:
case ERROR:
tempCellData.setStringValue(tempDataString);
tempCellData.setType(CellDataTypeEnum.STRING);
break;
case BOOLEAN:
if (StringUtils.isEmpty(tempDataString)) {
tempCellData.setType(CellDataTypeEnum.EMPTY);
break;
}
tempCellData.setBooleanValue(BooleanUtils.valueOf(tempData.toString()));
break;
case NUMBER:
case EMPTY:
if (StringUtils.isEmpty(tempDataString)) {
tempCellData.setType(CellDataTypeEnum.EMPTY);
break;
}
tempCellData.setType(CellDataTypeEnum.NUMBER);
tempCellData.setOriginalNumberValue(new BigDecimal(tempDataString));
tempCellData.setNumberValue(
tempCellData.getOriginalNumberValue().round(EasyExcelConstants.EXCEL_MATH_CONTEXT));
break;
default:
throw new IllegalStateException("Cannot set values now");
}
if (tempCellData.getStringValue() != null
&& xlsxReadContext.currentReadHolder().globalConfiguration().getAutoTrim()) {
tempCellData.setStringValue(tempCellData.getStringValue().trim());
}
tempCellData.checkEmpty();
tempCellData.setRowIndex(xlsxReadSheetHolder.getRowIndex());
tempCellData.setColumnIndex(xlsxReadSheetHolder.getColumnIndex());
xlsxReadSheetHolder.getCellMap().put(xlsxReadSheetHolder.getColumnIndex(), tempCellData);
}
}

View File

@ -0,0 +1,10 @@
package ai.chat2db.excel.analysis.v07.handlers;
/**
* Cell Value Handler
*
* @author jipengfei
*/
public class CellValueTagHandler extends AbstractCellValueTagHandler {
}

View File

@ -0,0 +1,23 @@
package ai.chat2db.excel.analysis.v07.handlers;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import ai.chat2db.excel.util.PositionUtils;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import org.xml.sax.Attributes;
/**
* Cell Handler
*
* @author jipengfei
*/
public class CountTagHandler extends AbstractXlsxTagHandler {
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
String d = attributes.getValue(ExcelXmlConstants.ATTRIBUTE_REF);
String totalStr = d.substring(d.indexOf(":") + 1);
xlsxReadContext.readSheetHolder().setApproximateTotalRowNumber(PositionUtils.getRow(totalStr) + 1);
}
}

View File

@ -0,0 +1,58 @@
package ai.chat2db.excel.analysis.v07.handlers;
import java.util.Optional;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.metadata.CellExtra;
import ai.chat2db.excel.util.StringUtils;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.xml.sax.Attributes;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
/**
* Cell Handler
*
* @author Jiaju Zhuang
*/
public class HyperlinkTagHandler extends AbstractXlsxTagHandler {
@Override
public boolean support(XlsxReadContext xlsxReadContext) {
return xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.HYPERLINK);
}
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
String ref = attributes.getValue(ExcelXmlConstants.ATTRIBUTE_REF);
if (StringUtils.isEmpty(ref)) {
return;
}
// Hyperlink has 2 case:
// case 1In the 'location' tag
String location = attributes.getValue(ExcelXmlConstants.ATTRIBUTE_LOCATION);
if (location != null) {
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.HYPERLINK, location, ref);
xlsxReadContext.readSheetHolder().setCellExtra(cellExtra);
xlsxReadContext.analysisEventProcessor().extra(xlsxReadContext);
return;
}
// case 2, In the 'r:id' tag, Then go to 'PackageRelationshipCollection' to get inside
String rId = attributes.getValue(ExcelXmlConstants.ATTRIBUTE_RID);
PackageRelationshipCollection packageRelationshipCollection = xlsxReadContext.xlsxReadSheetHolder()
.getPackageRelationshipCollection();
if (rId == null || packageRelationshipCollection == null) {
return;
}
Optional.ofNullable(packageRelationshipCollection.getRelationshipByID(rId))
.map(PackageRelationship::getTargetURI)
.ifPresent(uri -> {
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.HYPERLINK, uri.toString(), ref);
xlsxReadContext.readSheetHolder().setCellExtra(cellExtra);
xlsxReadContext.analysisEventProcessor().extra(xlsxReadContext);
});
}
}

View File

@ -0,0 +1,34 @@
package ai.chat2db.excel.analysis.v07.handlers;
import ai.chat2db.excel.enums.CellExtraTypeEnum;
import ai.chat2db.excel.metadata.CellExtra;
import ai.chat2db.excel.util.StringUtils;
import org.xml.sax.Attributes;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
/**
* Cell Handler
*
* @author Jiaju Zhuang
*/
public class MergeCellTagHandler extends AbstractXlsxTagHandler {
@Override
public boolean support(XlsxReadContext xlsxReadContext) {
return xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.MERGE);
}
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
String ref = attributes.getValue(ExcelXmlConstants.ATTRIBUTE_REF);
if (StringUtils.isEmpty(ref)) {
return;
}
CellExtra cellExtra = new CellExtra(CellExtraTypeEnum.MERGE, null, ref);
xlsxReadContext.readSheetHolder().setCellExtra(cellExtra);
xlsxReadContext.analysisEventProcessor().extra(xlsxReadContext);
}
}

View File

@ -0,0 +1,71 @@
package ai.chat2db.excel.analysis.v07.handlers;
import java.util.LinkedHashMap;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import ai.chat2db.excel.enums.CellDataTypeEnum;
import ai.chat2db.excel.enums.RowTypeEnum;
import ai.chat2db.excel.metadata.Cell;
import ai.chat2db.excel.metadata.data.ReadCellData;
import ai.chat2db.excel.read.metadata.holder.ReadRowHolder;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
import ai.chat2db.excel.util.PositionUtils;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import org.apache.commons.collections4.MapUtils;
import org.xml.sax.Attributes;
/**
* Cell Handler
*
* @author jipengfei
*/
public class RowTagHandler extends AbstractXlsxTagHandler {
@Override
public void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes) {
XlsxReadSheetHolder xlsxReadSheetHolder = xlsxReadContext.xlsxReadSheetHolder();
int rowIndex = PositionUtils.getRowByRowTagt(attributes.getValue(ExcelXmlConstants.ATTRIBUTE_R),
xlsxReadSheetHolder.getRowIndex());
Integer lastRowIndex = xlsxReadContext.readSheetHolder().getRowIndex();
while (lastRowIndex + 1 < rowIndex) {
xlsxReadContext.readRowHolder(new ReadRowHolder(lastRowIndex + 1, RowTypeEnum.EMPTY,
xlsxReadSheetHolder.getGlobalConfiguration(), new LinkedHashMap<Integer, Cell>()));
xlsxReadContext.analysisEventProcessor().endRow(xlsxReadContext);
xlsxReadSheetHolder.setColumnIndex(null);
xlsxReadSheetHolder.setCellMap(new LinkedHashMap<Integer, Cell>());
lastRowIndex++;
}
xlsxReadSheetHolder.setRowIndex(rowIndex);
}
@Override
public void endElement(XlsxReadContext xlsxReadContext, String name) {
XlsxReadSheetHolder xlsxReadSheetHolder = xlsxReadContext.xlsxReadSheetHolder();
RowTypeEnum rowType = MapUtils.isEmpty(xlsxReadSheetHolder.getCellMap()) ? RowTypeEnum.EMPTY : RowTypeEnum.DATA;
// It's possible that all of the cells in the row are empty
if (rowType == RowTypeEnum.DATA) {
boolean hasData = false;
for (Cell cell : xlsxReadSheetHolder.getCellMap().values()) {
if (!(cell instanceof ReadCellData)) {
hasData = true;
break;
}
ReadCellData<?> readCellData = (ReadCellData<?>)cell;
if (readCellData.getType() != CellDataTypeEnum.EMPTY) {
hasData = true;
break;
}
}
if (!hasData) {
rowType = RowTypeEnum.EMPTY;
}
}
xlsxReadContext.readRowHolder(new ReadRowHolder(xlsxReadSheetHolder.getRowIndex(), rowType,
xlsxReadSheetHolder.getGlobalConfiguration(), xlsxReadSheetHolder.getCellMap()));
xlsxReadContext.analysisEventProcessor().endRow(xlsxReadContext);
xlsxReadSheetHolder.setColumnIndex(null);
xlsxReadSheetHolder.setCellMap(new LinkedHashMap<>());
}
}

View File

@ -0,0 +1,54 @@
package ai.chat2db.excel.analysis.v07.handlers;
import org.xml.sax.Attributes;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
/**
* Tag handler
*
* @author Dan Zheng
*/
public interface XlsxTagHandler {
/**
* Whether to support
*
* @param xlsxReadContext
* @return
*/
boolean support(XlsxReadContext xlsxReadContext);
/**
* Start handle
*
* @param xlsxReadContext
* xlsxReadContext
* @param name
* Tag name
* @param attributes
* Tag attributes
*/
void startElement(XlsxReadContext xlsxReadContext, String name, Attributes attributes);
/**
* End handle
*
* @param xlsxReadContext
* xlsxReadContext
* @param name
* Tag name
*/
void endElement(XlsxReadContext xlsxReadContext, String name);
/**
* Read data
*
* @param xlsxReadContext
* @param ch
* @param start
* @param length
*/
void characters(XlsxReadContext xlsxReadContext, char[] ch, int start, int length);
}

View File

@ -0,0 +1,181 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package ai.chat2db.excel.analysis.v07.handlers.sax;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import ai.chat2db.excel.cache.ReadCache;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
/**
* Sax read sharedStringsTable.xml
*
* @author Jiaju Zhuang
*/
public class SharedStringsTableHandler extends DefaultHandler {
private static final Pattern UTF_PATTTERN = Pattern.compile("_x([0-9A-Fa-f]{4})_");
/**
* The final piece of data
*/
private StringBuilder currentData;
/**
* Current element data
*/
private StringBuilder currentElementData;
private final ReadCache readCache;
/**
* Some fields in the T tag need to be ignored
*/
private boolean ignoreTagt = false;
/**
* The only time you need to read the characters in the T tag is when it is used
*/
private boolean isTagt = false;
public SharedStringsTableHandler(ReadCache readCache) {
this.readCache = readCache;
}
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) {
if (name == null) {
return;
}
switch (name) {
case ExcelXmlConstants.SHAREDSTRINGS_T_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_X_T_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_NS2_T_TAG:
currentElementData = null;
isTagt = true;
break;
case ExcelXmlConstants.SHAREDSTRINGS_SI_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_X_SI_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_NS2_SI_TAG:
currentData = null;
break;
case ExcelXmlConstants.SHAREDSTRINGS_RPH_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_X_RPH_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_NS2_RPH_TAG:
ignoreTagt = true;
break;
default:
// ignore
}
}
@Override
public void endElement(String uri, String localName, String name) {
if (name == null) {
return;
}
switch (name) {
case ExcelXmlConstants.SHAREDSTRINGS_T_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_X_T_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_NS2_T_TAG:
if (currentElementData != null) {
if (currentData == null) {
currentData = new StringBuilder();
}
currentData.append(currentElementData);
}
isTagt = false;
break;
case ExcelXmlConstants.SHAREDSTRINGS_SI_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_X_SI_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_NS2_SI_TAG:
if (currentData == null) {
readCache.put(null);
} else {
readCache.put(utfDecode(currentData.toString()));
}
break;
case ExcelXmlConstants.SHAREDSTRINGS_RPH_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_X_RPH_TAG:
case ExcelXmlConstants.SHAREDSTRINGS_NS2_RPH_TAG:
ignoreTagt = false;
break;
default:
// ignore
}
}
@Override
public void characters(char[] ch, int start, int length) {
if (!isTagt || ignoreTagt) {
return;
}
if (currentElementData == null) {
currentElementData = new StringBuilder();
}
currentElementData.append(ch, start, length);
}
/**
* from poi XSSFRichTextString
*
* @param value the string to decode
* @return the decoded string or null if the input string is null
* <p>
* For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
* the characters are escaped using the Unicode numerical character representation escape character
* format _xHHHH_, where H represents a hexadecimal character in the character's value.
* <p>
* Example: The Unicode character 0D is invalid in an XML 1.0 document,
* so it shall be escaped as <code>_x000D_</code>.
* </p>
* See section 3.18.9 in the OOXML spec.
* @see org.apache.poi.xssf.usermodel.XSSFRichTextString#utfDecode(String)
*/
static String utfDecode(String value) {
if (value == null || !value.contains("_x")) {
return value;
}
StringBuilder buf = new StringBuilder();
Matcher m = UTF_PATTTERN.matcher(value);
int idx = 0;
while (m.find()) {
int pos = m.start();
if (pos > idx) {
buf.append(value, idx, pos);
}
String code = m.group(1);
int icode = Integer.decode("0x" + code);
buf.append((char)icode);
idx = m.end();
}
// small optimization: don't go via StringBuilder if not necessary,
// the encodings are very rare, so we should almost always go via this shortcut.
if (idx == 0) {
return value;
}
buf.append(value.substring(idx));
return buf.toString();
}
}

View File

@ -0,0 +1,103 @@
package ai.chat2db.excel.analysis.v07.handlers.sax;
import java.util.HashMap;
import java.util.Map;
import ai.chat2db.excel.constant.ExcelXmlConstants;
import ai.chat2db.excel.analysis.v07.handlers.CellFormulaTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.CellInlineStringValueTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.CellTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.CellValueTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.CountTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.HyperlinkTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.MergeCellTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.RowTagHandler;
import ai.chat2db.excel.analysis.v07.handlers.XlsxTagHandler;
import ai.chat2db.excel.context.xlsx.XlsxReadContext;
import lombok.extern.slf4j.Slf4j;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* @author jipengfei
*/
@Slf4j
public class XlsxRowHandler extends DefaultHandler {
private final XlsxReadContext xlsxReadContext;
private static final Map<String, XlsxTagHandler> XLSX_CELL_HANDLER_MAP = new HashMap<>(64);
static {
CellFormulaTagHandler cellFormulaTagHandler = new CellFormulaTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.CELL_FORMULA_TAG, cellFormulaTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_CELL_FORMULA_TAG, cellFormulaTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_CELL_FORMULA_TAG, cellFormulaTagHandler);
CellInlineStringValueTagHandler cellInlineStringValueTagHandler = new CellInlineStringValueTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.CELL_INLINE_STRING_VALUE_TAG, cellInlineStringValueTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_CELL_INLINE_STRING_VALUE_TAG, cellInlineStringValueTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_CELL_INLINE_STRING_VALUE_TAG, cellInlineStringValueTagHandler);
CellTagHandler cellTagHandler = new CellTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.CELL_TAG, cellTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_CELL_TAG, cellTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_CELL_TAG, cellTagHandler);
CellValueTagHandler cellValueTagHandler = new CellValueTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.CELL_VALUE_TAG, cellValueTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_CELL_VALUE_TAG, cellValueTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_CELL_VALUE_TAG, cellValueTagHandler);
CountTagHandler countTagHandler = new CountTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.DIMENSION_TAG, countTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_DIMENSION_TAG, countTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_DIMENSION_TAG, countTagHandler);
HyperlinkTagHandler hyperlinkTagHandler = new HyperlinkTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.HYPERLINK_TAG, hyperlinkTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_HYPERLINK_TAG, hyperlinkTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_HYPERLINK_TAG, hyperlinkTagHandler);
MergeCellTagHandler mergeCellTagHandler = new MergeCellTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.MERGE_CELL_TAG, mergeCellTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_MERGE_CELL_TAG, mergeCellTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_MERGE_CELL_TAG, mergeCellTagHandler);
RowTagHandler rowTagHandler = new RowTagHandler();
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.ROW_TAG, rowTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.X_ROW_TAG, rowTagHandler);
XLSX_CELL_HANDLER_MAP.put(ExcelXmlConstants.NS2_ROW_TAG, rowTagHandler);
}
public XlsxRowHandler(XlsxReadContext xlsxReadContext) {
this.xlsxReadContext = xlsxReadContext;
}
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
XlsxTagHandler handler = XLSX_CELL_HANDLER_MAP.get(name);
if (handler == null || !handler.support(xlsxReadContext)) {
return;
}
xlsxReadContext.xlsxReadSheetHolder().getTagDeque().push(name);
handler.startElement(xlsxReadContext, name, attributes);
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
String currentTag = xlsxReadContext.xlsxReadSheetHolder().getTagDeque().peek();
if (currentTag == null) {
return;
}
XlsxTagHandler handler = XLSX_CELL_HANDLER_MAP.get(currentTag);
if (handler == null || !handler.support(xlsxReadContext)) {
return;
}
handler.characters(xlsxReadContext, ch, start, length);
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
XlsxTagHandler handler = XLSX_CELL_HANDLER_MAP.get(name);
if (handler == null || !handler.support(xlsxReadContext)) {
return;
}
handler.endElement(xlsxReadContext, name);
xlsxReadContext.xlsxReadSheetHolder().getTagDeque().pop();
}
}

View File

@ -0,0 +1,17 @@
package ai.chat2db.excel.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Ignore convert excel
*
* @author Jiaju Zhuang
*/
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ExcelIgnore {}

View File

@ -0,0 +1,18 @@
package ai.chat2db.excel.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Ignore all unannotated fields.
*
* @author Jiaju Zhuang
*/
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ExcelIgnoreUnannotated {
}

View File

@ -0,0 +1,69 @@
package ai.chat2db.excel.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import ai.chat2db.excel.converters.AutoConverter;
import ai.chat2db.excel.converters.Converter;
import ai.chat2db.excel.annotation.format.DateTimeFormat;
/**
* @author jipengfei
*/
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ExcelProperty {
/**
* The name of the sheet header.
*
* <p>
* write: It automatically merges when you have more than one head
* <p>
* read: When you have multiple heads, take the last one
*
* @return The name of the sheet header
*/
String[] value() default {""};
/**
* Index of column
*
* Read or write it on the index of column, If it's equal to -1, it's sorted by Java class.
*
* priority: index &gt; order &gt; default sort
*
* @return Index of column
*/
int index() default -1;
/**
* Defines the sort order for an column.
*
* priority: index &gt; order &gt; default sort
*
* @return Order of column
*/
int order() default Integer.MAX_VALUE;
/**
* Force the current field to use this converter.
*
* @return Converter
*/
Class<? extends Converter<?>> converter() default AutoConverter.class;
/**
*
* default @see com.alibaba.excel.util.TypeUtil if default is not meet you can set format
*
* @return Format string
* @deprecated please use {@link DateTimeFormat}
*/
@Deprecated
String format() default "";
}

View File

@ -0,0 +1,40 @@
package ai.chat2db.excel.annotation.format;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import ai.chat2db.excel.enums.BooleanEnum;
/**
* Convert date format.
*
* <p>
* write: It can be used on classes {@link java.util.Date}
* <p>
* read: It can be used on classes {@link String}
*
* @author Jiaju Zhuang
*/
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface DateTimeFormat {
/**
*
* Specific format reference {@link java.text.SimpleDateFormat}
*
* @return Format pattern
*/
String value() default "";
/**
* True if date uses 1904 windowing, or false if using 1900 date windowing.
*
* @return True if date uses 1904 windowing, or false if using 1900 date windowing.
*/
BooleanEnum use1904windowing() default BooleanEnum.DEFAULT;
}

View File

@ -0,0 +1,39 @@
package ai.chat2db.excel.annotation.format;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.math.RoundingMode;
/**
* Convert number format.
*
* <p>
* write: It can be used on classes that inherit {@link Number}
* <p>
* read: It can be used on classes {@link String}
*
* @author Jiaju Zhuang
*/
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface NumberFormat {
/**
*
* Specific format reference {@link java.text.DecimalFormat}
*
* @return Format pattern
*/
String value() default "";
/**
* Rounded by default
*
* @return RoundingMode
*/
RoundingMode roundingMode() default RoundingMode.HALF_UP;
}

View File

@ -0,0 +1,27 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Set the width of the table
*
* @author Jiaju Zhuang
*/
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ColumnWidth {
/**
* Column width
* <p>
* -1 means the default column width is used
*
* @return Column width
*/
int value() default -1;
}

View File

@ -0,0 +1,91 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import ai.chat2db.excel.enums.BooleanEnum;
import org.apache.poi.common.usermodel.fonts.FontCharset;
import org.apache.poi.hssf.usermodel.HSSFPalette;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.IndexedColors;
/**
* Custom content styles.
*
* @author Jiaju Zhuang
*/
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ContentFontStyle {
/**
* The name for the font (i.e. Arial)
*/
String fontName() default "";
/**
* Height in the familiar unit of measure - points
*/
short fontHeightInPoints() default -1;
/**
* Whether to use italics or not
*/
BooleanEnum italic() default BooleanEnum.DEFAULT;
/**
* Whether to use a strikeout horizontal line through the text or not
*/
BooleanEnum strikeout() default BooleanEnum.DEFAULT;
/**
* The color for the font
*
* @see Font#COLOR_NORMAL
* @see Font#COLOR_RED
* @see HSSFPalette#getColor(short)
* @see IndexedColors
*/
short color() default -1;
/**
* Set normal, super or subscript.
*
* @see Font#SS_NONE
* @see Font#SS_SUPER
* @see Font#SS_SUB
*/
short typeOffset() default -1;
/**
* set type of text underlining to use
*
* @see Font#U_NONE
* @see Font#U_SINGLE
* @see Font#U_DOUBLE
* @see Font#U_SINGLE_ACCOUNTING
* @see Font#U_DOUBLE_ACCOUNTING
*/
byte underline() default -1;
/**
* Set character-set to use.
*
* @see FontCharset
* @see Font#ANSI_CHARSET
* @see Font#DEFAULT_CHARSET
* @see Font#SYMBOL_CHARSET
*/
int charset() default -1;
/**
* Bold
*/
BooleanEnum bold() default BooleanEnum.DEFAULT;
}

View File

@ -0,0 +1,31 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* The regions of the loop merge
*
* @author Jiaju Zhuang
*/
@Target({ElementType.FIELD})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ContentLoopMerge {
/**
* Each row
*
* @return
*/
int eachRow() default 1;
/**
* Extend column
*
* @return
*/
int columnExtend() default 1;
}

View File

@ -0,0 +1,27 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Set the height of each table
*
* @author Jiaju Zhuang
*/
@Target({ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ContentRowHeight {
/**
* Set the content height
* <p>
* -1 mean the auto set height
*
* @return Content height
*/
short value() default -1;
}

View File

@ -0,0 +1,162 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import ai.chat2db.excel.enums.BooleanEnum;
import ai.chat2db.excel.enums.poi.BorderStyleEnum;
import ai.chat2db.excel.enums.poi.FillPatternTypeEnum;
import ai.chat2db.excel.enums.poi.HorizontalAlignmentEnum;
import ai.chat2db.excel.enums.poi.VerticalAlignmentEnum;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.IgnoredErrorType;
import org.apache.poi.ss.usermodel.IndexedColors;
/**
* Custom content styles
*
* @author Jiaju Zhuang
*/
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface ContentStyle {
/**
* Set the data format (must be a valid format). Built in formats are defined at {@link BuiltinFormats}.
*/
short dataFormat() default -1;
/**
* Set the cell's using this style to be hidden
*/
BooleanEnum hidden() default BooleanEnum.DEFAULT;
/**
* Set the cell's using this style to be locked
*/
BooleanEnum locked() default BooleanEnum.DEFAULT;
/**
* Turn on or off "Quote Prefix" or "123 Prefix" for the style, which is used to tell Excel that the thing which
* looks like a number or a formula shouldn't be treated as on. Turning this on is somewhat (but not completely, see
* {@link IgnoredErrorType}) like prefixing the cell value with a ' in Excel
*/
BooleanEnum quotePrefix() default BooleanEnum.DEFAULT;
/**
* Set the type of horizontal alignment for the cell
*/
HorizontalAlignmentEnum horizontalAlignment() default HorizontalAlignmentEnum.DEFAULT;
/**
* Set whether the text should be wrapped. Setting this flag to <code>true</code> make all content visible within a
* cell by displaying it on multiple lines
*
*/
BooleanEnum wrapped() default BooleanEnum.DEFAULT;
/**
* Set the type of vertical alignment for the cell
*/
VerticalAlignmentEnum verticalAlignment() default VerticalAlignmentEnum.DEFAULT;
/**
* Set the degree of rotation for the text in the cell.
*
* Note: HSSF uses values from -90 to 90 degrees, whereas XSSF uses values from 0 to 180 degrees. The
* implementations of this method will map between these two value-ranges accordingly, however the corresponding
* getter is returning values in the range mandated by the current type of Excel file-format that this CellStyle is
* applied to.
*/
short rotation() default -1;
/**
* Set the number of spaces to indent the text in the cell
*/
short indent() default -1;
/**
* Set the type of border to use for the left border of the cell
*/
BorderStyleEnum borderLeft() default BorderStyleEnum.DEFAULT;
/**
* Set the type of border to use for the right border of the cell
*/
BorderStyleEnum borderRight() default BorderStyleEnum.DEFAULT;
/**
* Set the type of border to use for the top border of the cell
*/
BorderStyleEnum borderTop() default BorderStyleEnum.DEFAULT;
/**
* Set the type of border to use for the bottom border of the cell
*/
BorderStyleEnum borderBottom() default BorderStyleEnum.DEFAULT;
/**
* Set the color to use for the left border
*
* @see IndexedColors
*/
short leftBorderColor() default -1;
/**
* Set the color to use for the right border
*
* @see IndexedColors
*
*/
short rightBorderColor() default -1;
/**
* Set the color to use for the top border
*
* @see IndexedColors
*
*/
short topBorderColor() default -1;
/**
* Set the color to use for the bottom border
*
* @see IndexedColors
*
*/
short bottomBorderColor() default -1;
/**
* Setting to one fills the cell with the foreground color... No idea about other values
*
* @see FillPatternType#SOLID_FOREGROUND
*/
FillPatternTypeEnum fillPatternType() default FillPatternTypeEnum.DEFAULT;
/**
* Set the background fill color.
*
* @see IndexedColors
*
*/
short fillBackgroundColor() default -1;
/**
* Set the foreground fill color <i>Note: Ensure Foreground color is set prior to background color.</i>
*
* @see IndexedColors
*
*/
short fillForegroundColor() default -1;
/**
* Controls if the Cell should be auto-sized to shrink to fit if the text is too long
*/
BooleanEnum shrinkToFit() default BooleanEnum.DEFAULT;
}

View File

@ -0,0 +1,91 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import ai.chat2db.excel.enums.BooleanEnum;
import org.apache.poi.common.usermodel.fonts.FontCharset;
import org.apache.poi.hssf.usermodel.HSSFPalette;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.IndexedColors;
/**
* Custom header styles.
*
* @author Jiaju Zhuang
*/
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface HeadFontStyle {
/**
* The name for the font (i.e. Arial)
*/
String fontName() default "";
/**
* Height in the familiar unit of measure - points
*/
short fontHeightInPoints() default -1;
/**
* Whether to use italics or not
*/
BooleanEnum italic() default BooleanEnum.DEFAULT;
/**
* Whether to use a strikeout horizontal line through the text or not
*/
BooleanEnum strikeout() default BooleanEnum.DEFAULT;
/**
* The color for the font
*
* @see Font#COLOR_NORMAL
* @see Font#COLOR_RED
* @see HSSFPalette#getColor(short)
* @see IndexedColors
*/
short color() default -1;
/**
* Set normal, super or subscript.
*
* @see Font#SS_NONE
* @see Font#SS_SUPER
* @see Font#SS_SUB
*/
short typeOffset() default -1;
/**
* set type of text underlining to use
*
* @see Font#U_NONE
* @see Font#U_SINGLE
* @see Font#U_DOUBLE
* @see Font#U_SINGLE_ACCOUNTING
* @see Font#U_DOUBLE_ACCOUNTING
*/
byte underline() default -1;
/**
* Set character-set to use.
*
* @see FontCharset
* @see Font#ANSI_CHARSET
* @see Font#DEFAULT_CHARSET
* @see Font#SYMBOL_CHARSET
*/
int charset() default -1;
/**
* Bold
*/
BooleanEnum bold() default BooleanEnum.DEFAULT;
}

View File

@ -0,0 +1,26 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Set the height of each table
*
* @author Jiaju Zhuang
*/
@Target({ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface HeadRowHeight {
/**
* Set the header height
* <p>
* -1 mean the auto set height
*
* @return Header height
*/
short value() default -1;
}

View File

@ -0,0 +1,156 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import ai.chat2db.excel.enums.BooleanEnum;
import ai.chat2db.excel.enums.poi.BorderStyleEnum;
import ai.chat2db.excel.enums.poi.FillPatternTypeEnum;
import ai.chat2db.excel.enums.poi.HorizontalAlignmentEnum;
import ai.chat2db.excel.enums.poi.VerticalAlignmentEnum;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.IgnoredErrorType;
import org.apache.poi.ss.usermodel.IndexedColors;
/**
* Custom header styles
*
* @author Jiaju Zhuang
*/
@Target({ElementType.FIELD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface HeadStyle {
/**
* Set the data format (must be a valid format). Built in formats are defined at {@link BuiltinFormats}.
*/
short dataFormat() default -1;
/**
* Set the cell's using this style to be hidden
*/
BooleanEnum hidden() default BooleanEnum.DEFAULT;
/**
* Set the cell's using this style to be locked
*/
BooleanEnum locked() default BooleanEnum.DEFAULT;
/**
* Turn on or off "Quote Prefix" or "123 Prefix" for the style, which is used to tell Excel that the thing which
* looks like a number or a formula shouldn't be treated as on. Turning this on is somewhat (but not completely, see
* {@link IgnoredErrorType}) like prefixing the cell value with a ' in Excel
*/
BooleanEnum quotePrefix() default BooleanEnum.DEFAULT;
/**
* Set the type of horizontal alignment for the cell
*/
HorizontalAlignmentEnum horizontalAlignment() default HorizontalAlignmentEnum.DEFAULT;
/**
* Set whether the text should be wrapped. Setting this flag to <code>true</code> make all content visible within a
* cell by displaying it on multiple lines
*/
BooleanEnum wrapped() default BooleanEnum.DEFAULT;
/**
* Set the type of vertical alignment for the cell
*/
VerticalAlignmentEnum verticalAlignment() default VerticalAlignmentEnum.DEFAULT;
/**
* Set the degree of rotation for the text in the cell.
*
* Note: HSSF uses values from -90 to 90 degrees, whereas XSSF uses values from 0 to 180 degrees. The
* implementations of this method will map between these two value-ranges accordingly, however the corresponding
* getter is returning values in the range mandated by the current type of Excel file-format that this CellStyle is
* applied to.
*/
short rotation() default -1;
/**
* Set the number of spaces to indent the text in the cell
*/
short indent() default -1;
/**
* Set the type of border to use for the left border of the cell
*/
BorderStyleEnum borderLeft() default BorderStyleEnum.DEFAULT;
/**
* Set the type of border to use for the right border of the cell
*/
BorderStyleEnum borderRight() default BorderStyleEnum.DEFAULT;
/**
* Set the type of border to use for the top border of the cell
*/
BorderStyleEnum borderTop() default BorderStyleEnum.DEFAULT;
/**
* Set the type of border to use for the bottom border of the cell
*/
BorderStyleEnum borderBottom() default BorderStyleEnum.DEFAULT;
/**
* Set the color to use for the left border
*
* @see IndexedColors
*/
short leftBorderColor() default -1;
/**
* Set the color to use for the right border
*
* @see IndexedColors
*/
short rightBorderColor() default -1;
/**
* Set the color to use for the top border
*
* @see IndexedColors
*/
short topBorderColor() default -1;
/**
* Set the color to use for the bottom border
*
* @see IndexedColors
*/
short bottomBorderColor() default -1;
/**
* Setting to one fills the cell with the foreground color... No idea about other values
*
* @see FillPatternType#SOLID_FOREGROUND
*/
FillPatternTypeEnum fillPatternType() default FillPatternTypeEnum.DEFAULT;
/**
* Set the background fill color.
*
* @see IndexedColors
*/
short fillBackgroundColor() default -1;
/**
* Set the foreground fill color <i>Note: Ensure Foreground color is set prior to background color.</i>
*
* @see IndexedColors
*/
short fillForegroundColor() default -1;
/**
* Controls if the Cell should be auto-sized to shrink to fit if the text is too long
*/
BooleanEnum shrinkToFit() default BooleanEnum.DEFAULT;
}

View File

@ -0,0 +1,45 @@
package ai.chat2db.excel.annotation.write.style;
import java.lang.annotation.ElementType;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* Merge the cells once
*
* @author Jiaju Zhuang
*/
@Target({ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Inherited
public @interface OnceAbsoluteMerge {
/**
* First row
*
* @return
*/
int firstRowIndex() default -1;
/**
* Last row
*
* @return
*/
int lastRowIndex() default -1;
/**
* First column
*
* @return
*/
int firstColumnIndex() default -1;
/**
* Last row
*
* @return
*/
int lastColumnIndex() default -1;
}

View File

@ -0,0 +1,163 @@
package ai.chat2db.excel.cache;
import java.io.File;
import java.util.ArrayList;
import java.util.UUID;
import ai.chat2db.excel.context.AnalysisContext;
import ai.chat2db.excel.util.FileUtils;
import ai.chat2db.excel.util.ListUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.ehcache.CacheManager;
import org.ehcache.config.CacheConfiguration;
import org.ehcache.config.builders.CacheConfigurationBuilder;
import org.ehcache.config.builders.CacheManagerBuilder;
import org.ehcache.config.builders.ResourcePoolsBuilder;
import org.ehcache.config.units.EntryUnit;
import org.ehcache.config.units.MemoryUnit;
/**
* Default cache
*
* @author Jiaju Zhuang
*/
@Slf4j
public class Ehcache implements ReadCache {
public static final int BATCH_COUNT = 100;
/**
* Key index
*/
private int activeIndex = 0;
public static final int DEBUG_CACHE_MISS_SIZE = 1000;
public static final int DEBUG_WRITE_SIZE = 100 * 10000;
private ArrayList<String> dataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT);
private static final CacheManager FILE_CACHE_MANAGER;
private static final CacheConfiguration<Integer, ArrayList> FILE_CACHE_CONFIGURATION;
private static final CacheManager ACTIVE_CACHE_MANAGER;
private static final File CACHE_PATH_FILE;
private final CacheConfiguration<Integer, ArrayList> activeCacheConfiguration;
/**
* Bulk storage data
*/
private org.ehcache.Cache<Integer, ArrayList> fileCache;
/**
* Currently active cache
*/
private org.ehcache.Cache<Integer, ArrayList> activeCache;
private String cacheAlias;
/**
* Count the number of cache misses
*/
private int cacheMiss = 0;
@Deprecated
public Ehcache(Integer maxCacheActivateSize) {
this(maxCacheActivateSize, null);
}
public Ehcache(Integer maxCacheActivateSize, Integer maxCacheActivateBatchCount) {
// In order to be compatible with the code
// If the user set up `maxCacheActivateSize`, then continue using it
if (maxCacheActivateSize != null) {
this.activeCacheConfiguration = CacheConfigurationBuilder
.newCacheConfigurationBuilder(Integer.class, ArrayList.class,
ResourcePoolsBuilder.newResourcePoolsBuilder()
.heap(maxCacheActivateSize, MemoryUnit.MB))
.build();
} else {
this.activeCacheConfiguration = CacheConfigurationBuilder
.newCacheConfigurationBuilder(Integer.class, ArrayList.class,
ResourcePoolsBuilder.newResourcePoolsBuilder()
.heap(maxCacheActivateBatchCount, EntryUnit.ENTRIES))
.build();
}
}
static {
CACHE_PATH_FILE = FileUtils.createCacheTmpFile();
FILE_CACHE_MANAGER =
CacheManagerBuilder.newCacheManagerBuilder().with(CacheManagerBuilder.persistence(CACHE_PATH_FILE)).build(
true);
ACTIVE_CACHE_MANAGER = CacheManagerBuilder.newCacheManagerBuilder().build(true);
FILE_CACHE_CONFIGURATION = CacheConfigurationBuilder
.newCacheConfigurationBuilder(Integer.class, ArrayList.class, ResourcePoolsBuilder.newResourcePoolsBuilder()
.disk(20, MemoryUnit.GB)).build();
}
@Override
public void init(AnalysisContext analysisContext) {
cacheAlias = UUID.randomUUID().toString();
try {
fileCache = FILE_CACHE_MANAGER.createCache(cacheAlias, FILE_CACHE_CONFIGURATION);
} catch (IllegalStateException e) {
//fix Issue #2693,Temporary files may be deleted if there is no operation for a long time, so they need
// to be recreated.
if (CACHE_PATH_FILE.exists()) {
throw e;
}
synchronized (Ehcache.class) {
if (!CACHE_PATH_FILE.exists()) {
if (log.isDebugEnabled()) {
log.debug("cache file dir is not exist retry create");
}
FileUtils.createDirectory(CACHE_PATH_FILE);
}
}
fileCache = FILE_CACHE_MANAGER.createCache(cacheAlias, FILE_CACHE_CONFIGURATION);
}
activeCache = ACTIVE_CACHE_MANAGER.createCache(cacheAlias, activeCacheConfiguration);
}
@Override
public void put(String value) {
dataList.add(value);
if (dataList.size() >= BATCH_COUNT) {
fileCache.put(activeIndex, dataList);
activeIndex++;
dataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT);
}
if (log.isDebugEnabled()) {
int alreadyPut = activeIndex * BATCH_COUNT + dataList.size();
if (alreadyPut % DEBUG_WRITE_SIZE == 0) {
log.debug("Already put :{}", alreadyPut);
}
}
}
@Override
public String get(Integer key) {
if (key == null || key < 0) {
return null;
}
int route = key / BATCH_COUNT;
ArrayList<String> dataList = activeCache.get(route);
if (dataList == null) {
dataList = fileCache.get(route);
activeCache.put(route, dataList);
if (log.isDebugEnabled()) {
if (cacheMiss++ % DEBUG_CACHE_MISS_SIZE == 0) {
log.debug("Cache misses count:{}", cacheMiss);
}
}
}
return dataList.get(key % BATCH_COUNT);
}
@Override
public void putFinished() {
if (CollectionUtils.isEmpty(dataList)) {
return;
}
fileCache.put(activeIndex, dataList);
}
@Override
public void destroy() {
FILE_CACHE_MANAGER.removeCache(cacheAlias);
ACTIVE_CACHE_MANAGER.removeCache(cacheAlias);
}
}

View File

@ -0,0 +1,38 @@
package ai.chat2db.excel.cache;
import java.util.ArrayList;
import java.util.List;
import ai.chat2db.excel.context.AnalysisContext;
/**
* Putting temporary data directly into a map is a little more efficient but very memory intensive
*
* @author Jiaju Zhuang
*/
public class MapCache implements ReadCache {
private final List<String> cache = new ArrayList<>();
@Override
public void init(AnalysisContext analysisContext) {}
@Override
public void put(String value) {
cache.add(value);
}
@Override
public String get(Integer key) {
if (key == null || key < 0) {
return null;
}
return cache.get(key);
}
@Override
public void putFinished() {}
@Override
public void destroy() {}
}

View File

@ -0,0 +1,47 @@
package ai.chat2db.excel.cache;
import ai.chat2db.excel.context.AnalysisContext;
/**
* Read cache
*
* @author Jiaju Zhuang
*/
public interface ReadCache {
/**
* Initialize cache
*
* @param analysisContext
* A context is the main anchorage point of a excel reader.
*/
void init(AnalysisContext analysisContext);
/**
* Automatically generate the key and put it in the cache.Key start from 0
*
* @param value
* Cache value
*/
void put(String value);
/**
* Get value
*
* @param key
* Index
* @return Value
*/
String get(Integer key);
/**
* It's called when all the values are put in
*/
void putFinished();
/**
* Called when the excel read is complete
*/
void destroy();
}

View File

@ -0,0 +1,37 @@
package ai.chat2db.excel.cache;
import org.apache.poi.hssf.record.SSTRecord;
import ai.chat2db.excel.context.AnalysisContext;
/**
*
* Use SSTRecord.
*
* @author Jiaju Zhuang
*/
public class XlsCache implements ReadCache {
private final SSTRecord sstRecord;
public XlsCache(SSTRecord sstRecord) {
this.sstRecord = sstRecord;
}
@Override
public void init(AnalysisContext analysisContext) {}
@Override
public void put(String value) {}
@Override
public String get(Integer key) {
return sstRecord.getString(key).toString();
}
@Override
public void putFinished() {}
@Override
public void destroy() {}
}

View File

@ -0,0 +1,22 @@
package ai.chat2db.excel.cache.selector;
import ai.chat2db.excel.cache.ReadCache;
import org.apache.poi.openxml4j.opc.PackagePart;
/**
* Choose a eternal cache
*
* @author Jiaju Zhuang
**/
public class EternalReadCacheSelector implements ReadCacheSelector {
private ReadCache readCache;
public EternalReadCacheSelector(ReadCache readCache) {
this.readCache = readCache;
}
@Override
public ReadCache readCache(PackagePart sharedStringsTablePackagePart) {
return readCache;
}
}

View File

@ -0,0 +1,20 @@
package ai.chat2db.excel.cache.selector;
import ai.chat2db.excel.cache.ReadCache;
import org.apache.poi.openxml4j.opc.PackagePart;
/**
* Select the cache
*
* @author Jiaju Zhuang
**/
public interface ReadCacheSelector {
/**
* Select a cache
*
* @param sharedStringsTablePackagePart
* @return
*/
ReadCache readCache(PackagePart sharedStringsTablePackagePart);
}

View File

@ -0,0 +1,110 @@
package ai.chat2db.excel.cache.selector;
import java.io.IOException;
import ai.chat2db.excel.cache.Ehcache;
import ai.chat2db.excel.cache.MapCache;
import ai.chat2db.excel.cache.ReadCache;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simple cache selector
*
* @author Jiaju Zhuang
**/
@Getter
@Setter
@EqualsAndHashCode
public class SimpleReadCacheSelector implements ReadCacheSelector {
private static final Logger LOGGER = LoggerFactory.getLogger(SimpleReadCacheSelector.class);
/**
* Convert bytes to megabytes
*/
private static final long B2M = 1000 * 1000L;
/**
* If it's less than 5M, use map cache, or use ehcache.unit MB.
*/
private static final long DEFAULT_MAX_USE_MAP_CACHE_SIZE = 5;
/**
* Maximum batch of `SharedStrings` stored in memory.
* The batch size is 100.{@link Ehcache#BATCH_COUNT}
*/
private static final int DEFAULT_MAX_EHCACHE_ACTIVATE_BATCH_COUNT = 20;
/**
* Shared strings exceeding this value will use {@link Ehcache},or use {@link MapCache}.unit MB.
*/
private Long maxUseMapCacheSize;
/**
* Maximum size of cache activation.unit MB.
*
* @deprecated Please use maxCacheActivateBatchCount to control the size of the occupied memory
*/
@Deprecated
private Integer maxCacheActivateSize;
/**
* Maximum batch of `SharedStrings` stored in memory.
* The batch size is 100.{@link Ehcache#BATCH_COUNT}
*/
private Integer maxCacheActivateBatchCount;
public SimpleReadCacheSelector() {
}
/**
* Parameter maxCacheActivateSize has already been abandoned
*
* @param maxUseMapCacheSize
* @param maxCacheActivateSize
*/
@Deprecated
public SimpleReadCacheSelector(Long maxUseMapCacheSize, Integer maxCacheActivateSize) {
this.maxUseMapCacheSize = maxUseMapCacheSize;
this.maxCacheActivateSize = maxCacheActivateSize;
}
@Override
public ReadCache readCache(PackagePart sharedStringsTablePackagePart) {
long size = sharedStringsTablePackagePart.getSize();
if (size < 0) {
try {
size = sharedStringsTablePackagePart.getInputStream().available();
} catch (IOException e) {
LOGGER.warn("Unable to get file size, default used MapCache");
return new MapCache();
}
}
if (maxUseMapCacheSize == null) {
maxUseMapCacheSize = DEFAULT_MAX_USE_MAP_CACHE_SIZE;
}
if (size < maxUseMapCacheSize * B2M) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Use map cache.size:{}", size);
}
return new MapCache();
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Use ehcache.size:{}", size);
}
// In order to be compatible with the code
// If the user set up `maxCacheActivateSize`, then continue using it
if (maxCacheActivateSize != null) {
return new Ehcache(maxCacheActivateSize, maxCacheActivateBatchCount);
} else {
if (maxCacheActivateBatchCount == null) {
maxCacheActivateBatchCount = DEFAULT_MAX_EHCACHE_ACTIVATE_BATCH_COUNT;
}
return new Ehcache(maxCacheActivateSize, maxCacheActivateBatchCount);
}
}
}

View File

@ -0,0 +1,530 @@
package ai.chat2db.excel.constant;
import java.util.Locale;
import java.util.Map;
import ai.chat2db.excel.util.MapUtils;
import ai.chat2db.excel.util.StringUtils;
/**
* Excel's built-in format conversion.Currently only supports Chinese.
*
* <p>
* If it is not Chinese, it is recommended to directly modify the builtinFormats, which will better support
* internationalization in the future.
*
* <p>
* Specific correspondence please see:
* https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.numberingformat?view=openxml-2.8.1
*
* @author Jiaju Zhuang
**/
public class BuiltinFormats {
private static final String RESERVED = "reserved-";
public static short GENERAL = 0;
public static final String[] BUILTIN_FORMATS_ALL_LANGUAGES = {
// 0
"General",
// 1
"0",
// 2
"0.00",
// 3
"#,##0",
// 4
"#,##0.00",
// 5
"\"\"#,##0_);(\"\"#,##0)",
// 6
"\"\"#,##0_);[Red](\"\"#,##0)",
// 7
"\"\"#,##0.00_);(\"\"#,##0.00)",
// 8
"\"\"#,##0.00_);[Red](\"\"#,##0.00)",
// 9
"0%",
// 10
"0.00%",
// 11
"0.00E+00",
// 12
"# ?/?",
// 13
"# ??/??",
// 14
// The official documentation shows "m/d/yy", but the actual test is "yyyy/m/d".
"yyyy/m/d",
// 15
"d-mmm-yy",
// 16
"d-mmm",
// 17
"mmm-yy",
// 18
"h:mm AM/PM",
// 19
"h:mm:ss AM/PM",
// 20
"h:mm",
// 21
"h:mm:ss",
// 22
// The official documentation shows "m/d/yy h:mm", but the actual test is "yyyy-m-d h:mm".
"yyyy-m-d h:mm",
// 23-36 No specific correspondence found in the official documentation.
// 23
null,
// 24
null,
// 25
null,
// 26
null,
// 27
null,
// 28
null,
// 29
null,
// 30
null,
// 31
null,
// 32
null,
// 33
null,
// 34
null,
// 35
null,
// 36
null,
// 37
"#,##0_);(#,##0)",
// 38
"#,##0_);[Red](#,##0)",
// 39
"#,##0.00_);(#,##0.00)",
// 40
"#,##0.00_);[Red](#,##0.00)",
// 41
"_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)",
// 42
"_(\"\"* #,##0_);_(\"\"* (#,##0);_(\"\"* \"-\"_);_(@_)",
// 43
"_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)",
// 44
"_(\"\"* #,##0.00_);_(\"\"* (#,##0.00);_(\"\"* \"-\"??_);_(@_)",
// 45
"mm:ss",
// 46
"[h]:mm:ss",
// 47
"mm:ss.0",
// 48
"##0.0E+0",
// 49
"@",
};
public static final String[] BUILTIN_FORMATS_CN = {
// 0
"General",
// 1
"0",
// 2
"0.00",
// 3
"#,##0",
// 4
"#,##0.00",
// 5
"\"\"#,##0_);(\"\"#,##0)",
// 6
"\"\"#,##0_);[Red](\"\"#,##0)",
// 7
"\"\"#,##0.00_);(\"\"#,##0.00)",
// 8
"\"\"#,##0.00_);[Red](\"\"#,##0.00)",
// 9
"0%",
// 10
"0.00%",
// 11
"0.00E+00",
// 12
"# ?/?",
// 13
"# ??/??",
// 14
// The official documentation shows "m/d/yy", but the actual test is "yyyy/m/d".
"yyyy/m/d",
// 15
"d-mmm-yy",
// 16
"d-mmm",
// 17
"mmm-yy",
// 18
"h:mm AM/PM",
// 19
"h:mm:ss AM/PM",
// 20
"h:mm",
// 21
"h:mm:ss",
// 22
// The official documentation shows "m/d/yy h:mm", but the actual test is "yyyy-m-d h:mm".
"yyyy-m-d h:mm",
// 23-26 No specific correspondence found in the official documentation.
// 23
null,
// 24
null,
// 25
null,
// 26
null,
// 27
"yyyy\"\"m\"\"",
// 28
"m\"\"d\"\"",
// 29
"m\"\"d\"\"",
// 30
"m-d-yy",
// 31
"yyyy\"\"m\"\"d\"\"",
// 32
"h\"\"mm\"\"",
// 33
"h\"\"mm\"\"ss\"\"",
// 34
"上午/下午h\"\"mm\"\"",
// 35
"上午/下午h\"\"mm\"\"ss\"\"",
// 36
"yyyy\"\"m\"\"",
// 37
"#,##0_);(#,##0)",
// 38
"#,##0_);[Red](#,##0)",
// 39
"#,##0.00_);(#,##0.00)",
// 40
"#,##0.00_);[Red](#,##0.00)",
// 41
"_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)",
// 42
"_(\"\"* #,##0_);_(\"\"* (#,##0);_(\"\"* \"-\"_);_(@_)",
// 43
"_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)",
// 44
"_(\"\"* #,##0.00_);_(\"\"* (#,##0.00);_(\"\"* \"-\"??_);_(@_)",
// 45
"mm:ss",
// 46
"[h]:mm:ss",
// 47
"mm:ss.0",
// 48
"##0.0E+0",
// 49
"@",
// 50
"yyyy\"\"m\"\"",
// 51
"m\"\"d\"\"",
// 52
"yyyy\"\"m\"\"",
// 53
"m\"\"d\"\"",
// 54
"m\"\"d\"\"",
// 55
"上午/下午h\"\"mm\"\"",
// 56
"上午/下午h\"\"mm\"\"ss\"\"",
// 57
"yyyy\"\"m\"\"",
// 58
"m\"\"d\"\"",
// 59
"t0",
// 60
"t0.00",
// 61
"t#,##0",
// 62
"t#,##0.00",
// 63-66 No specific correspondence found in the official documentation.
// 63
null,
// 64
null,
// 65
null,
// 66
null,
// 67
"t0%",
// 68
"t0.00%",
// 69
"t# ?/?",
// 70
"t# ??/??",
// 71
"ว/ด/ปปปป",
// 72
"ว-ดดด-ปป",
// 73
"ว-ดดด",
// 74
"ดดด-ปป",
// 75
"ช:นน",
// 76
"ช:นน:ทท",
// 77
"ว/ด/ปปปป ช:นน",
// 78
"นน:ทท",
// 79
"[ช]:นน:ทท",
// 80
"นน:ทท.0",
// 81
"d/m/bb",
// end
};
public static final String[] BUILTIN_FORMATS_US = {
// 0
"General",
// 1
"0",
// 2
"0.00",
// 3
"#,##0",
// 4
"#,##0.00",
// 5
"\"$\"#,##0_);(\"$\"#,##0)",
// 6
"\"$\"#,##0_);[Red](\"$\"#,##0)",
// 7
"\"$\"#,##0.00_);(\"$\"#,##0.00)",
// 8
"\"$\"#,##0.00_);[Red](\"$\"#,##0.00)",
// 9
"0%",
// 10
"0.00%",
// 11
"0.00E+00",
// 12
"# ?/?",
// 13
"# ??/??",
// 14
// The official documentation shows "m/d/yy", but the actual test is "yyyy/m/d".
"yyyy/m/d",
// 15
"d-mmm-yy",
// 16
"d-mmm",
// 17
"mmm-yy",
// 18
"h:mm AM/PM",
// 19
"h:mm:ss AM/PM",
// 20
"h:mm",
// 21
"h:mm:ss",
// 22
// The official documentation shows "m/d/yy h:mm", but the actual test is "yyyy-m-d h:mm".
"yyyy-m-d h:mm",
// 23-26 No specific correspondence found in the official documentation.
// 23
null,
// 24
null,
// 25
null,
// 26
null,
// 27
"yyyy\"\"m\"\"",
// 28
"m\"\"d\"\"",
// 29
"m\"\"d\"\"",
// 30
"m-d-yy",
// 31
"yyyy\"\"m\"\"d\"\"",
// 32
"h\"\"mm\"\"",
// 33
"h\"\"mm\"\"ss\"\"",
// 34
"上午/下午h\"\"mm\"\"",
// 35
"上午/下午h\"\"mm\"\"ss\"\"",
// 36
"yyyy\"\"m\"\"",
// 37
"#,##0_);(#,##0)",
// 38
"#,##0_);[Red](#,##0)",
// 39
"#,##0.00_);(#,##0.00)",
// 40
"#,##0.00_);[Red](#,##0.00)",
// 41
"_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)",
// 42
"_(\"$\"* #,##0_);_(\"$\"* (#,##0);_(\"$\"* \"-\"_);_(@_)",
// 43
"_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)",
// 44
"_(\"$\"* #,##0.00_);_(\"$\"* (#,##0.00);_(\"$\"* \"-\"??_);_(@_)",
// 45
"mm:ss",
// 46
"[h]:mm:ss",
// 47
"mm:ss.0",
// 48
"##0.0E+0",
// 49
"@",
// 50
"yyyy\"\"m\"\"",
// 51
"m\"\"d\"\"",
// 52
"yyyy\"\"m\"\"",
// 53
"m\"\"d\"\"",
// 54
"m\"\"d\"\"",
// 55
"上午/下午h\"\"mm\"\"",
// 56
"上午/下午h\"\"mm\"\"ss\"\"",
// 57
"yyyy\"\"m\"\"",
// 58
"m\"\"d\"\"",
// 59
"t0",
// 60
"t0.00",
// 61
"t#,##0",
// 62
"t#,##0.00",
// 63-66 No specific correspondence found in the official documentation.
// 63
null,
// 64
null,
// 65
null,
// 66
null,
// 67
"t0%",
// 68
"t0.00%",
// 69
"t# ?/?",
// 70
"t# ??/??",
// 71
"ว/ด/ปปปป",
// 72
"ว-ดดด-ปป",
// 73
"ว-ดดด",
// 74
"ดดด-ปป",
// 75
"ช:นน",
// 76
"ช:นน:ทท",
// 77
"ว/ด/ปปปป ช:นน",
// 78
"นน:ทท",
// 79
"[ช]:นน:ทท",
// 80
"นน:ทท.0",
// 81
"d/m/bb",
// end
};
public static final Map<String, Short> BUILTIN_FORMATS_MAP_CN = buildMap(BUILTIN_FORMATS_CN);
public static final Map<String, Short> BUILTIN_FORMATS_MAP_US = buildMap(BUILTIN_FORMATS_US);
public static final short MIN_CUSTOM_DATA_FORMAT_INDEX = 82;
public static String getBuiltinFormat(Short index, String defaultFormat, Locale locale) {
if (index == null || index <= 0) {
return defaultFormat;
}
// Give priority to checking if it is the default value for all languages
if (index < BUILTIN_FORMATS_ALL_LANGUAGES.length) {
String format = BUILTIN_FORMATS_ALL_LANGUAGES[index];
if (format != null) {
return format;
}
}
// In other cases, give priority to using the externally provided format
if (!StringUtils.isEmpty(defaultFormat) && !defaultFormat.startsWith(RESERVED)) {
return defaultFormat;
}
// Finally, try using the built-in format
String[] builtinFormat = switchBuiltinFormats(locale);
if (index >= builtinFormat.length) {
return defaultFormat;
}
return builtinFormat[index];
}
public static String[] switchBuiltinFormats(Locale locale) {
if (locale != null && Locale.US.getCountry().equals(locale.getCountry())) {
return BUILTIN_FORMATS_US;
}
return BUILTIN_FORMATS_CN;
}
public static Map<String, Short> switchBuiltinFormatsMap(Locale locale) {
if (locale != null && Locale.US.getCountry().equals(locale.getCountry())) {
return BUILTIN_FORMATS_MAP_US;
}
return BUILTIN_FORMATS_MAP_CN;
}
private static Map<String, Short> buildMap(String[] builtinFormats) {
Map<String, Short> map = MapUtils.newHashMapWithExpectedSize(builtinFormats.length);
for (int i = 0; i < builtinFormats.length; i++) {
map.put(builtinFormats[i], (short)i);
}
return map;
}
}

View File

@ -0,0 +1,19 @@
package ai.chat2db.excel.constant;
import java.math.MathContext;
import java.math.RoundingMode;
/**
* Used to store constant
*
* @author Jiaju Zhuang
*/
public class EasyExcelConstants {
/**
* Excel by default with 15 to store Numbers, and the double in Java can use to store number 17, led to the accuracy
* will be a problem. So you need to set up 15 to deal with precision
*/
public static final MathContext EXCEL_MATH_CONTEXT = new MathContext(15, RoundingMode.HALF_UP);
}

View File

@ -0,0 +1,99 @@
package ai.chat2db.excel.constant;
/**
* @author jipengfei
*/
public class ExcelXmlConstants {
public static final String DIMENSION_TAG = "dimension";
public static final String ROW_TAG = "row";
public static final String CELL_FORMULA_TAG = "f";
public static final String CELL_VALUE_TAG = "v";
/**
* When the data is "inlineStr" his tag is "t"
*/
public static final String CELL_INLINE_STRING_VALUE_TAG = "t";
public static final String CELL_TAG = "c";
public static final String MERGE_CELL_TAG = "mergeCell";
public static final String HYPERLINK_TAG = "hyperlink";
public static final String X_DIMENSION_TAG = "x:dimension";
public static final String NS2_DIMENSION_TAG = "ns2:dimension";
public static final String X_ROW_TAG = "x:row";
public static final String NS2_ROW_TAG = "ns2:row";
public static final String X_CELL_FORMULA_TAG = "x:f";
public static final String NS2_CELL_FORMULA_TAG = "ns2:f";
public static final String X_CELL_VALUE_TAG = "x:v";
public static final String NS2_CELL_VALUE_TAG = "ns2:v";
/**
* When the data is "inlineStr" his tag is "t"
*/
public static final String X_CELL_INLINE_STRING_VALUE_TAG = "x:t";
public static final String NS2_CELL_INLINE_STRING_VALUE_TAG = "ns2:t";
public static final String X_CELL_TAG = "x:c";
public static final String NS2_CELL_TAG = "ns2:c";
public static final String X_MERGE_CELL_TAG = "x:mergeCell";
public static final String NS2_MERGE_CELL_TAG = "ns2:mergeCell";
public static final String X_HYPERLINK_TAG = "x:hyperlink";
public static final String NS2_HYPERLINK_TAG = "ns2:hyperlink";
/**
* s attribute
*/
public static final String ATTRIBUTE_S = "s";
/**
* ref attribute
*/
public static final String ATTRIBUTE_REF = "ref";
/**
* r attribute
*/
public static final String ATTRIBUTE_R = "r";
/**
* t attribute
*/
public static final String ATTRIBUTE_T = "t";
/**
* location attribute
*/
public static final String ATTRIBUTE_LOCATION = "location";
/**
* rId attribute
*/
public static final String ATTRIBUTE_RID = "r:id";
/**
* Cell range split
*/
public static final String CELL_RANGE_SPLIT = ":";
// The following is a constant read the `SharedStrings.xml`
/**
* text
*/
public static final String SHAREDSTRINGS_T_TAG = "t";
public static final String SHAREDSTRINGS_X_T_TAG = "x:t";
public static final String SHAREDSTRINGS_NS2_T_TAG = "ns2:t";
/**
* SharedStringItem
*/
public static final String SHAREDSTRINGS_SI_TAG = "si";
public static final String SHAREDSTRINGS_X_SI_TAG = "x:si";
public static final String SHAREDSTRINGS_NS2_SI_TAG = "ns2:si";
/**
* Mac 2016 2017 will have this extra field to ignore
*/
public static final String SHAREDSTRINGS_RPH_TAG = "rPh";
public static final String SHAREDSTRINGS_X_RPH_TAG = "x:rPh";
public static final String SHAREDSTRINGS_NS2_RPH_TAG = "ns2:rPh";
}

View File

@ -0,0 +1,34 @@
package ai.chat2db.excel.constant;
/**
* Order constant.
*
* @author Jiaju Zhuang
*/
public class OrderConstant {
/**
* The system's own style
*/
public static int DEFAULT_DEFINE_STYLE = -70000;
/**
* Annotation style definition
*/
public static int ANNOTATION_DEFINE_STYLE = -60000;
/**
* Define style.
*/
public static final int DEFINE_STYLE = -50000;
/**
* default order.
*/
public static int DEFAULT_ORDER = 0;
/**
* Sorting of styles written to cells.
*/
public static int FILL_STYLE = 50000;
}

View File

@ -0,0 +1,147 @@
package ai.chat2db.excel.context;
import java.io.InputStream;
import java.util.List;
import ai.chat2db.excel.event.AnalysisEventListener;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.holder.ReadHolder;
import ai.chat2db.excel.read.metadata.holder.ReadRowHolder;
import ai.chat2db.excel.read.metadata.holder.ReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.ReadWorkbookHolder;
import ai.chat2db.excel.read.processor.AnalysisEventProcessor;
import ai.chat2db.excel.support.ExcelTypeEnum;
/**
*
* A context is the main anchorage point of a excel reader.
*
* @author jipengfei
*/
public interface AnalysisContext {
/**
* Select the current table
*
* @param readSheet
* sheet to read
*/
void currentSheet(ReadSheet readSheet);
/**
* All information about the workbook you are currently working on
*
* @return Current workbook holder
*/
ReadWorkbookHolder readWorkbookHolder();
/**
* All information about the sheet you are currently working on
*
* @return Current sheet holder
*/
ReadSheetHolder readSheetHolder();
/**
* Set row of currently operated cell
*
* @param readRowHolder
* Current row holder
*/
void readRowHolder(ReadRowHolder readRowHolder);
/**
* Row of currently operated cell
*
* @return Current row holder
*/
ReadRowHolder readRowHolder();
/**
* The current read operation corresponds to the <code>readSheetHolder</code> or <code>readWorkbookHolder</code>
*
* @return Current holder
*/
ReadHolder currentReadHolder();
/**
* Custom attribute
*
* @return
*/
Object getCustom();
/**
* Event processor
*
* @return
*/
AnalysisEventProcessor analysisEventProcessor();
/**
* Data that the customer needs to read
*
* @return
*/
List<ReadSheet> readSheetList();
/**
* Data that the customer needs to read
*
* @param readSheetList
*/
void readSheetList(List<ReadSheet> readSheetList);
/**
*
* get excel type
*
* @return excel type
* @deprecated please use {@link #readWorkbookHolder()}
*/
@Deprecated
ExcelTypeEnum getExcelType();
/**
* get in io
*
* @return file io
* @deprecated please use {@link #readWorkbookHolder()}
*/
@Deprecated
InputStream getInputStream();
/**
* get current row
*
* @return
* @deprecated please use {@link #readRowHolder()}
*/
@Deprecated
Integer getCurrentRowNum();
/**
* get total row ,Data may be inaccurate
*
* @return
* @deprecated please use {@link #readRowHolder()}
*/
@Deprecated
Integer getTotalCount();
/**
* get current result
*
* @return get current result
* @deprecated please use {@link #readRowHolder()}
*/
@Deprecated
Object getCurrentRowAnalysisResult();
/**
* Interrupt execution
*
* @deprecated please use {@link AnalysisEventListener#hasNext(AnalysisContext)}
*/
@Deprecated
void interrupt();
}

View File

@ -0,0 +1,174 @@
package ai.chat2db.excel.context;
import java.io.InputStream;
import java.util.List;
import ai.chat2db.excel.exception.ExcelAnalysisException;
import ai.chat2db.excel.read.metadata.ReadSheet;
import ai.chat2db.excel.read.metadata.ReadWorkbook;
import ai.chat2db.excel.read.metadata.holder.ReadHolder;
import ai.chat2db.excel.read.metadata.holder.ReadRowHolder;
import ai.chat2db.excel.read.metadata.holder.ReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.ReadWorkbookHolder;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadWorkbookHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadWorkbookHolder;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder;
import ai.chat2db.excel.read.processor.AnalysisEventProcessor;
import ai.chat2db.excel.read.processor.DefaultAnalysisEventProcessor;
import ai.chat2db.excel.support.ExcelTypeEnum;
import lombok.extern.slf4j.Slf4j;
/**
* @author jipengfei
*/
@Slf4j
public class AnalysisContextImpl implements AnalysisContext {
/**
* The Workbook currently written
*/
private ReadWorkbookHolder readWorkbookHolder;
/**
* Current sheet holder
*/
private ReadSheetHolder readSheetHolder;
/**
* Current row holder
*/
private ReadRowHolder readRowHolder;
/**
* Configuration of currently operated cell
*/
private ReadHolder currentReadHolder;
/**
* Event processor
*/
private final AnalysisEventProcessor analysisEventProcessor;
public AnalysisContextImpl(ReadWorkbook readWorkbook, ExcelTypeEnum actualExcelType) {
if (readWorkbook == null) {
throw new IllegalArgumentException("Workbook argument cannot be null");
}
switch (actualExcelType) {
case XLS:
readWorkbookHolder = new XlsReadWorkbookHolder(readWorkbook);
break;
case XLSX:
readWorkbookHolder = new XlsxReadWorkbookHolder(readWorkbook);
break;
case CSV:
readWorkbookHolder = new CsvReadWorkbookHolder(readWorkbook);
break;
default:
break;
}
currentReadHolder = readWorkbookHolder;
analysisEventProcessor = new DefaultAnalysisEventProcessor();
if (log.isDebugEnabled()) {
log.debug("Initialization 'AnalysisContextImpl' complete");
}
}
@Override
public void currentSheet(ReadSheet readSheet) {
switch (readWorkbookHolder.getExcelType()) {
case XLS:
readSheetHolder = new XlsReadSheetHolder(readSheet, readWorkbookHolder);
break;
case XLSX:
readSheetHolder = new XlsxReadSheetHolder(readSheet, readWorkbookHolder);
break;
case CSV:
readSheetHolder = new CsvReadSheetHolder(readSheet, readWorkbookHolder);
break;
default:
break;
}
currentReadHolder = readSheetHolder;
if (readWorkbookHolder.getHasReadSheet().contains(readSheetHolder.getSheetNo())) {
throw new ExcelAnalysisException("Cannot read sheet repeatedly.");
}
readWorkbookHolder.getHasReadSheet().add(readSheetHolder.getSheetNo());
if (log.isDebugEnabled()) {
log.debug("Began to read{}", readSheetHolder);
}
}
@Override
public ReadWorkbookHolder readWorkbookHolder() {
return readWorkbookHolder;
}
@Override
public ReadSheetHolder readSheetHolder() {
return readSheetHolder;
}
@Override
public ReadRowHolder readRowHolder() {
return readRowHolder;
}
@Override
public void readRowHolder(ReadRowHolder readRowHolder) {
this.readRowHolder = readRowHolder;
}
@Override
public ReadHolder currentReadHolder() {
return currentReadHolder;
}
@Override
public Object getCustom() {
return readWorkbookHolder.getCustomObject();
}
@Override
public AnalysisEventProcessor analysisEventProcessor() {
return analysisEventProcessor;
}
@Override
public List<ReadSheet> readSheetList() {
return null;
}
@Override
public void readSheetList(List<ReadSheet> readSheetList) {
}
@Override
public ExcelTypeEnum getExcelType() {
return readWorkbookHolder.getExcelType();
}
@Override
public InputStream getInputStream() {
return readWorkbookHolder.getInputStream();
}
@Override
public Integer getCurrentRowNum() {
return readRowHolder.getRowIndex();
}
@Override
public Integer getTotalCount() {
return readSheetHolder.getTotal();
}
@Override
public Object getCurrentRowAnalysisResult() {
return readRowHolder.getCurrentRowAnalysisResult();
}
@Override
public void interrupt() {
throw new ExcelAnalysisException("interrupt error");
}
}

View File

@ -0,0 +1,110 @@
package ai.chat2db.excel.context;
import java.io.OutputStream;
import ai.chat2db.excel.enums.WriteTypeEnum;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import ai.chat2db.excel.write.metadata.WriteSheet;
import ai.chat2db.excel.write.metadata.WriteTable;
import ai.chat2db.excel.write.metadata.holder.WriteHolder;
import ai.chat2db.excel.write.metadata.holder.WriteSheetHolder;
import ai.chat2db.excel.write.metadata.holder.WriteTableHolder;
import ai.chat2db.excel.write.metadata.holder.WriteWorkbookHolder;
/**
* Write context
*
* @author jipengfei
*/
public interface WriteContext {
/**
* If the current sheet already exists, select it; if not, create it
*
* @param writeSheet
* Current sheet
* @param writeType
*/
void currentSheet(WriteSheet writeSheet, WriteTypeEnum writeType);
/**
* If the current table already exists, select it; if not, create it
*
* @param writeTable
*/
void currentTable(WriteTable writeTable);
/**
* All information about the workbook you are currently working on
*
* @return
*/
WriteWorkbookHolder writeWorkbookHolder();
/**
* All information about the sheet you are currently working on
*
* @return
*/
WriteSheetHolder writeSheetHolder();
/**
* All information about the table you are currently working on
*
* @return
*/
WriteTableHolder writeTableHolder();
/**
* Configuration of currently operated cell. May be 'writeSheetHolder' or 'writeTableHolder' or
* 'writeWorkbookHolder'
*
* @return
*/
WriteHolder currentWriteHolder();
/**
* close
*
* @param onException
*/
void finish(boolean onException);
/**
* Current sheet
*
* @return
* @deprecated please us e{@link #writeSheetHolder()}
*/
@Deprecated
Sheet getCurrentSheet();
/**
* Need head
*
* @return
* @deprecated please us e{@link #writeSheetHolder()}
*/
@Deprecated
boolean needHead();
/**
* Get outputStream
*
* @return
* @deprecated please us e{@link #writeWorkbookHolder()} ()}
*/
@Deprecated
OutputStream getOutputStream();
/**
* Get workbook
*
* @return
* @deprecated please us e{@link #writeWorkbookHolder()} ()}
*/
@Deprecated
Workbook getWorkbook();
}

View File

@ -0,0 +1,525 @@
package ai.chat2db.excel.context;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Map;
import java.util.UUID;
import ai.chat2db.excel.enums.WriteTypeEnum;
import ai.chat2db.excel.exception.ExcelGenerateException;
import ai.chat2db.excel.metadata.CellRange;
import ai.chat2db.excel.metadata.Head;
import ai.chat2db.excel.metadata.data.WriteCellData;
import ai.chat2db.excel.metadata.property.ExcelContentProperty;
import ai.chat2db.excel.write.handler.context.CellWriteHandlerContext;
import ai.chat2db.excel.write.handler.context.RowWriteHandlerContext;
import ai.chat2db.excel.write.handler.context.SheetWriteHandlerContext;
import ai.chat2db.excel.write.handler.context.WorkbookWriteHandlerContext;
import ai.chat2db.excel.support.ExcelTypeEnum;
import ai.chat2db.excel.util.ClassUtils;
import ai.chat2db.excel.util.DateUtils;
import ai.chat2db.excel.util.FileUtils;
import ai.chat2db.excel.util.ListUtils;
import ai.chat2db.excel.util.NumberDataFormatterUtils;
import ai.chat2db.excel.util.StringUtils;
import ai.chat2db.excel.util.WorkBookUtil;
import ai.chat2db.excel.util.WriteHandlerUtils;
import ai.chat2db.excel.write.metadata.WriteSheet;
import ai.chat2db.excel.write.metadata.WriteTable;
import ai.chat2db.excel.write.metadata.WriteWorkbook;
import ai.chat2db.excel.write.metadata.holder.WriteHolder;
import ai.chat2db.excel.write.metadata.holder.WriteSheetHolder;
import ai.chat2db.excel.write.metadata.holder.WriteTableHolder;
import ai.chat2db.excel.write.metadata.holder.WriteWorkbookHolder;
import ai.chat2db.excel.write.property.ExcelWriteHeadProperty;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.crypt.EncryptionMode;
import org.apache.poi.poifs.crypt.Encryptor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A context is the main anchorage point of a excel writer.
*
* @author jipengfei
*/
public class WriteContextImpl implements WriteContext {
private static final Logger LOGGER = LoggerFactory.getLogger(WriteContextImpl.class);
private static final String NO_SHEETS = "no sheets";
/**
* The Workbook currently written
*/
private WriteWorkbookHolder writeWorkbookHolder;
/**
* Current sheet holder
*/
private WriteSheetHolder writeSheetHolder;
/**
* The table currently written
*/
private WriteTableHolder writeTableHolder;
/**
* Configuration of currently operated cell
*/
private WriteHolder currentWriteHolder;
/**
* Prevent multiple shutdowns
*/
private boolean finished = false;
public WriteContextImpl(WriteWorkbook writeWorkbook) {
if (writeWorkbook == null) {
throw new IllegalArgumentException("Workbook argument cannot be null");
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Begin to Initialization 'WriteContextImpl'");
}
initCurrentWorkbookHolder(writeWorkbook);
WorkbookWriteHandlerContext workbookWriteHandlerContext = WriteHandlerUtils.createWorkbookWriteHandlerContext(
this);
WriteHandlerUtils.beforeWorkbookCreate(workbookWriteHandlerContext);
try {
WorkBookUtil.createWorkBook(writeWorkbookHolder);
} catch (Exception e) {
throw new ExcelGenerateException("Create workbook failure", e);
}
WriteHandlerUtils.afterWorkbookCreate(workbookWriteHandlerContext);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Initialization 'WriteContextImpl' complete");
}
}
private void initCurrentWorkbookHolder(WriteWorkbook writeWorkbook) {
writeWorkbookHolder = new WriteWorkbookHolder(writeWorkbook);
currentWriteHolder = writeWorkbookHolder;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("CurrentConfiguration is writeWorkbookHolder");
}
}
/**
* @param writeSheet
*/
@Override
public void currentSheet(WriteSheet writeSheet, WriteTypeEnum writeType) {
if (writeSheet == null) {
throw new IllegalArgumentException("Sheet argument cannot be null");
}
if (selectSheetFromCache(writeSheet)) {
return;
}
initCurrentSheetHolder(writeSheet);
// Workbook handler need to supplementary execution
WorkbookWriteHandlerContext workbookWriteHandlerContext = WriteHandlerUtils.createWorkbookWriteHandlerContext(
this);
WriteHandlerUtils.beforeWorkbookCreate(workbookWriteHandlerContext, true);
WriteHandlerUtils.afterWorkbookCreate(workbookWriteHandlerContext, true);
// Initialization current sheet
initSheet(writeType);
}
private boolean selectSheetFromCache(WriteSheet writeSheet) {
writeSheetHolder = null;
Integer sheetNo = writeSheet.getSheetNo();
if (sheetNo == null && StringUtils.isEmpty(writeSheet.getSheetName())) {
sheetNo = 0;
}
if (sheetNo != null) {
writeSheetHolder = writeWorkbookHolder.getHasBeenInitializedSheetIndexMap().get(sheetNo);
}
if (writeSheetHolder == null && !StringUtils.isEmpty(writeSheet.getSheetName())) {
writeSheetHolder = writeWorkbookHolder.getHasBeenInitializedSheetNameMap().get(writeSheet.getSheetName());
}
if (writeSheetHolder == null) {
return false;
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Sheet:{},{} is already existed", writeSheet.getSheetNo(), writeSheet.getSheetName());
}
writeSheetHolder.setNewInitialization(Boolean.FALSE);
writeTableHolder = null;
currentWriteHolder = writeSheetHolder;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("CurrentConfiguration is writeSheetHolder");
}
return true;
}
private void initCurrentSheetHolder(WriteSheet writeSheet) {
writeSheetHolder = new WriteSheetHolder(writeSheet, writeWorkbookHolder);
writeTableHolder = null;
currentWriteHolder = writeSheetHolder;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("CurrentConfiguration is writeSheetHolder");
}
}
private void initSheet(WriteTypeEnum writeType) {
SheetWriteHandlerContext sheetWriteHandlerContext = WriteHandlerUtils.createSheetWriteHandlerContext(this);
WriteHandlerUtils.beforeSheetCreate(sheetWriteHandlerContext);
Sheet currentSheet;
try {
if (writeSheetHolder.getSheetNo() != null) {
// When the add default sort order of appearance
if (WriteTypeEnum.ADD.equals(writeType) && writeWorkbookHolder.getTempTemplateInputStream() == null) {
currentSheet = createSheet();
} else {
currentSheet = writeWorkbookHolder.getWorkbook().getSheetAt(writeSheetHolder.getSheetNo());
writeSheetHolder
.setCachedSheet(
writeWorkbookHolder.getCachedWorkbook().getSheetAt(writeSheetHolder.getSheetNo()));
}
} else {
// sheet name must not null
currentSheet = writeWorkbookHolder.getWorkbook().getSheet(writeSheetHolder.getSheetName());
writeSheetHolder
.setCachedSheet(writeWorkbookHolder.getCachedWorkbook().getSheet(writeSheetHolder.getSheetName()));
}
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().contains(NO_SHEETS)) {
currentSheet = createSheet();
} else {
throw e;
}
}
if (currentSheet == null) {
currentSheet = createSheet();
}
writeSheetHolder.setSheet(currentSheet);
WriteHandlerUtils.afterSheetCreate(sheetWriteHandlerContext);
if (WriteTypeEnum.ADD.equals(writeType)) {
// Initialization head
initHead(writeSheetHolder.excelWriteHeadProperty());
}
writeWorkbookHolder.getHasBeenInitializedSheetIndexMap().put(writeSheetHolder.getSheetNo(), writeSheetHolder);
writeWorkbookHolder.getHasBeenInitializedSheetNameMap().put(writeSheetHolder.getSheetName(), writeSheetHolder);
}
private Sheet createSheet() {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Can not find sheet:{} ,now create it", writeSheetHolder.getSheetNo());
}
if (StringUtils.isEmpty(writeSheetHolder.getSheetName())) {
writeSheetHolder.setSheetName(writeSheetHolder.getSheetNo().toString());
}
Sheet currentSheet =
WorkBookUtil.createSheet(writeWorkbookHolder.getWorkbook(), writeSheetHolder.getSheetName());
writeSheetHolder.setCachedSheet(currentSheet);
return currentSheet;
}
public void initHead(ExcelWriteHeadProperty excelWriteHeadProperty) {
if (!currentWriteHolder.needHead() || !currentWriteHolder.excelWriteHeadProperty().hasHead()) {
return;
}
int newRowIndex = writeSheetHolder.getNewRowIndexAndStartDoWrite();
newRowIndex += currentWriteHolder.relativeHeadRowIndex();
// Combined head
if (currentWriteHolder.automaticMergeHead()) {
addMergedRegionToCurrentSheet(excelWriteHeadProperty, newRowIndex);
}
for (int relativeRowIndex = 0, i = newRowIndex; i < excelWriteHeadProperty.getHeadRowNumber()
+ newRowIndex; i++, relativeRowIndex++) {
RowWriteHandlerContext rowWriteHandlerContext = WriteHandlerUtils.createRowWriteHandlerContext(this,
newRowIndex, relativeRowIndex, Boolean.TRUE);
WriteHandlerUtils.beforeRowCreate(rowWriteHandlerContext);
Row row = WorkBookUtil.createRow(writeSheetHolder.getSheet(), i);
rowWriteHandlerContext.setRow(row);
WriteHandlerUtils.afterRowCreate(rowWriteHandlerContext);
addOneRowOfHeadDataToExcel(row, i, excelWriteHeadProperty.getHeadMap(), relativeRowIndex);
WriteHandlerUtils.afterRowDispose(rowWriteHandlerContext);
}
}
private void addMergedRegionToCurrentSheet(ExcelWriteHeadProperty excelWriteHeadProperty, int rowIndex) {
for (CellRange cellRangeModel : excelWriteHeadProperty.headCellRangeList()) {
writeSheetHolder.getSheet()
.addMergedRegionUnsafe(new CellRangeAddress(cellRangeModel.getFirstRow() + rowIndex,
cellRangeModel.getLastRow() + rowIndex, cellRangeModel.getFirstCol(), cellRangeModel.getLastCol()));
}
}
private void addOneRowOfHeadDataToExcel(Row row, Integer rowIndex, Map<Integer, Head> headMap,
int relativeRowIndex) {
for (Map.Entry<Integer, Head> entry : headMap.entrySet()) {
Head head = entry.getValue();
int columnIndex = entry.getKey();
ExcelContentProperty excelContentProperty = ClassUtils.declaredExcelContentProperty(null,
currentWriteHolder.excelWriteHeadProperty().getHeadClazz(), head.getFieldName(), currentWriteHolder);
CellWriteHandlerContext cellWriteHandlerContext = WriteHandlerUtils.createCellWriteHandlerContext(this, row,
rowIndex, head, columnIndex, relativeRowIndex, Boolean.TRUE, excelContentProperty);
WriteHandlerUtils.beforeCellCreate(cellWriteHandlerContext);
Cell cell = row.createCell(columnIndex);
cellWriteHandlerContext.setCell(cell);
WriteHandlerUtils.afterCellCreate(cellWriteHandlerContext);
WriteCellData<String> writeCellData = new WriteCellData<>(head.getHeadNameList().get(relativeRowIndex));
cell.setCellValue(writeCellData.getStringValue());
cellWriteHandlerContext.setCellDataList(ListUtils.newArrayList(writeCellData));
cellWriteHandlerContext.setFirstCellData(writeCellData);
WriteHandlerUtils.afterCellDispose(cellWriteHandlerContext);
}
}
@Override
public void currentTable(WriteTable writeTable) {
if (writeTable == null) {
return;
}
if (writeTable.getTableNo() == null || writeTable.getTableNo() <= 0) {
writeTable.setTableNo(0);
}
if (writeSheetHolder.getHasBeenInitializedTable().containsKey(writeTable.getTableNo())) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Table:{} is already existed", writeTable.getTableNo());
}
writeTableHolder = writeSheetHolder.getHasBeenInitializedTable().get(writeTable.getTableNo());
writeTableHolder.setNewInitialization(Boolean.FALSE);
currentWriteHolder = writeTableHolder;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("CurrentConfiguration is writeTableHolder");
}
return;
}
initCurrentTableHolder(writeTable);
// Workbook and sheet handler need to supplementary execution
WorkbookWriteHandlerContext workbookWriteHandlerContext = WriteHandlerUtils.createWorkbookWriteHandlerContext(
this);
WriteHandlerUtils.beforeWorkbookCreate(workbookWriteHandlerContext, true);
WriteHandlerUtils.afterWorkbookCreate(workbookWriteHandlerContext, true);
SheetWriteHandlerContext sheetWriteHandlerContext = WriteHandlerUtils.createSheetWriteHandlerContext(this);
WriteHandlerUtils.beforeSheetCreate(sheetWriteHandlerContext, true);
WriteHandlerUtils.afterSheetCreate(sheetWriteHandlerContext, true);
initHead(writeTableHolder.excelWriteHeadProperty());
}
private void initCurrentTableHolder(WriteTable writeTable) {
writeTableHolder = new WriteTableHolder(writeTable, writeSheetHolder);
writeSheetHolder.getHasBeenInitializedTable().put(writeTable.getTableNo(), writeTableHolder);
currentWriteHolder = writeTableHolder;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("CurrentConfiguration is writeTableHolder");
}
}
@Override
public WriteWorkbookHolder writeWorkbookHolder() {
return writeWorkbookHolder;
}
@Override
public WriteSheetHolder writeSheetHolder() {
return writeSheetHolder;
}
@Override
public WriteTableHolder writeTableHolder() {
return writeTableHolder;
}
@Override
public WriteHolder currentWriteHolder() {
return currentWriteHolder;
}
@Override
public void finish(boolean onException) {
if (finished) {
return;
}
finished = true;
WriteHandlerUtils.afterWorkbookDispose(writeWorkbookHolder.getWorkbookWriteHandlerContext());
if (writeWorkbookHolder == null) {
return;
}
Throwable throwable = null;
boolean isOutputStreamEncrypt = false;
// Determine if you need to write excel
boolean writeExcel = !onException;
if (writeWorkbookHolder.getWriteExcelOnException()) {
writeExcel = Boolean.TRUE;
}
// No data is written if an exception is thrown
if (writeExcel) {
try {
isOutputStreamEncrypt = doOutputStreamEncrypt07();
} catch (Throwable t) {
throwable = t;
}
}
if (!isOutputStreamEncrypt) {
try {
if (writeExcel) {
writeWorkbookHolder.getWorkbook().write(writeWorkbookHolder.getOutputStream());
}
writeWorkbookHolder.getWorkbook().close();
} catch (Throwable t) {
throwable = t;
}
}
try {
Workbook workbook = writeWorkbookHolder.getWorkbook();
if (workbook instanceof SXSSFWorkbook) {
((SXSSFWorkbook)workbook).dispose();
}
} catch (Throwable t) {
throwable = t;
}
try {
if (writeWorkbookHolder.getAutoCloseStream() && writeWorkbookHolder.getOutputStream() != null) {
writeWorkbookHolder.getOutputStream().close();
}
} catch (Throwable t) {
throwable = t;
}
if (writeExcel && !isOutputStreamEncrypt) {
try {
doFileEncrypt07();
} catch (Throwable t) {
throwable = t;
}
}
try {
if (writeWorkbookHolder.getTempTemplateInputStream() != null) {
writeWorkbookHolder.getTempTemplateInputStream().close();
}
} catch (Throwable t) {
throwable = t;
}
clearEncrypt03();
removeThreadLocalCache();
if (throwable != null) {
throw new ExcelGenerateException("Can not close IO.", throwable);
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Finished write.");
}
}
private void removeThreadLocalCache() {
NumberDataFormatterUtils.removeThreadLocalCache();
DateUtils.removeThreadLocalCache();
ClassUtils.removeThreadLocalCache();
}
@Override
public Sheet getCurrentSheet() {
return writeSheetHolder.getSheet();
}
@Override
public boolean needHead() {
return writeSheetHolder.needHead();
}
@Override
public OutputStream getOutputStream() {
return writeWorkbookHolder.getOutputStream();
}
@Override
public Workbook getWorkbook() {
return writeWorkbookHolder.getWorkbook();
}
private void clearEncrypt03() {
if (StringUtils.isEmpty(writeWorkbookHolder.getPassword())
|| !ExcelTypeEnum.XLS.equals(writeWorkbookHolder.getExcelType())) {
return;
}
Biff8EncryptionKey.setCurrentUserPassword(null);
}
/**
* To encrypt
*/
private boolean doOutputStreamEncrypt07() throws Exception {
if (StringUtils.isEmpty(writeWorkbookHolder.getPassword())
|| !ExcelTypeEnum.XLSX.equals(writeWorkbookHolder.getExcelType())) {
return false;
}
if (writeWorkbookHolder.getFile() != null) {
return false;
}
File tempXlsx = FileUtils.createTmpFile(UUID.randomUUID() + ".xlsx");
FileOutputStream tempFileOutputStream = new FileOutputStream(tempXlsx);
try {
writeWorkbookHolder.getWorkbook().write(tempFileOutputStream);
} finally {
try {
writeWorkbookHolder.getWorkbook().close();
tempFileOutputStream.close();
} catch (Exception e) {
if (!tempXlsx.delete()) {
throw new ExcelGenerateException("Can not delete temp File!");
}
throw e;
}
}
try (POIFSFileSystem fileSystem = openFileSystemAndEncrypt(tempXlsx)) {
fileSystem.writeFilesystem(writeWorkbookHolder.getOutputStream());
} finally {
if (!tempXlsx.delete()) {
throw new ExcelGenerateException("Can not delete temp File!");
}
}
return true;
}
/**
* To encrypt
*/
private void doFileEncrypt07() throws Exception {
if (StringUtils.isEmpty(writeWorkbookHolder.getPassword())
|| !ExcelTypeEnum.XLSX.equals(writeWorkbookHolder.getExcelType())) {
return;
}
if (writeWorkbookHolder.getFile() == null) {
return;
}
try (POIFSFileSystem fileSystem = openFileSystemAndEncrypt(writeWorkbookHolder.getFile());
FileOutputStream fileOutputStream = new FileOutputStream(writeWorkbookHolder.getFile())) {
fileSystem.writeFilesystem(fileOutputStream);
}
}
private POIFSFileSystem openFileSystemAndEncrypt(File file) throws Exception {
POIFSFileSystem fileSystem = new POIFSFileSystem();
Encryptor encryptor = new EncryptionInfo(EncryptionMode.standard).getEncryptor();
encryptor.confirmPassword(writeWorkbookHolder.getPassword());
try (OPCPackage opcPackage = OPCPackage.open(file, PackageAccess.READ_WRITE);
OutputStream outputStream = encryptor.getDataStream(fileSystem)) {
opcPackage.save(outputStream);
}
return fileSystem;
}
}

View File

@ -0,0 +1,26 @@
package ai.chat2db.excel.context.csv;
import ai.chat2db.excel.context.AnalysisContext;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadWorkbookHolder;
/**
* A context is the main anchorage point of a ls xls reader.
*
* @author Jiaju Zhuang
**/
public interface CsvReadContext extends AnalysisContext {
/**
* All information about the workbook you are currently working on.
*
* @return Current workbook holder
*/
CsvReadWorkbookHolder csvReadWorkbookHolder();
/**
* All information about the sheet you are currently working on.
*
* @return Current sheet holder
*/
CsvReadSheetHolder csvReadSheetHolder();
}

View File

@ -0,0 +1,29 @@
package ai.chat2db.excel.context.csv;
import ai.chat2db.excel.context.AnalysisContextImpl;
import ai.chat2db.excel.read.metadata.ReadWorkbook;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.csv.CsvReadWorkbookHolder;
import ai.chat2db.excel.support.ExcelTypeEnum;
/**
* A context is the main anchorage point of a ls xls reader.
*
* @author Jiaju Zhuang
*/
public class DefaultCsvReadContext extends AnalysisContextImpl implements CsvReadContext {
public DefaultCsvReadContext(ReadWorkbook readWorkbook, ExcelTypeEnum actualExcelType) {
super(readWorkbook, actualExcelType);
}
@Override
public CsvReadWorkbookHolder csvReadWorkbookHolder() {
return (CsvReadWorkbookHolder)readWorkbookHolder();
}
@Override
public CsvReadSheetHolder csvReadSheetHolder() {
return (CsvReadSheetHolder)readSheetHolder();
}
}

View File

@ -0,0 +1,30 @@
package ai.chat2db.excel.context.xls;
import ai.chat2db.excel.context.AnalysisContextImpl;
import ai.chat2db.excel.read.metadata.ReadWorkbook;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadWorkbookHolder;
import ai.chat2db.excel.support.ExcelTypeEnum;
/**
*
* A context is the main anchorage point of a ls xls reader.
*
* @author Jiaju Zhuang
*/
public class DefaultXlsReadContext extends AnalysisContextImpl implements XlsReadContext {
public DefaultXlsReadContext(ReadWorkbook readWorkbook, ExcelTypeEnum actualExcelType) {
super(readWorkbook, actualExcelType);
}
@Override
public XlsReadWorkbookHolder xlsReadWorkbookHolder() {
return (XlsReadWorkbookHolder)readWorkbookHolder();
}
@Override
public XlsReadSheetHolder xlsReadSheetHolder() {
return (XlsReadSheetHolder)readSheetHolder();
}
}

View File

@ -0,0 +1,26 @@
package ai.chat2db.excel.context.xls;
import ai.chat2db.excel.context.AnalysisContext;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.xls.XlsReadWorkbookHolder;
/**
* A context is the main anchorage point of a ls xls reader.
*
* @author Jiaju Zhuang
**/
public interface XlsReadContext extends AnalysisContext {
/**
* All information about the workbook you are currently working on.
*
* @return Current workbook holder
*/
XlsReadWorkbookHolder xlsReadWorkbookHolder();
/**
* All information about the sheet you are currently working on.
*
* @return Current sheet holder
*/
XlsReadSheetHolder xlsReadSheetHolder();
}

View File

@ -0,0 +1,30 @@
package ai.chat2db.excel.context.xlsx;
import ai.chat2db.excel.context.AnalysisContextImpl;
import ai.chat2db.excel.read.metadata.ReadWorkbook;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder;
import ai.chat2db.excel.support.ExcelTypeEnum;
/**
*
* A context is the main anchorage point of a ls xls reader.
*
* @author Jiaju Zhuang
*/
public class DefaultXlsxReadContext extends AnalysisContextImpl implements XlsxReadContext {
public DefaultXlsxReadContext(ReadWorkbook readWorkbook, ExcelTypeEnum actualExcelType) {
super(readWorkbook, actualExcelType);
}
@Override
public XlsxReadWorkbookHolder xlsxReadWorkbookHolder() {
return (XlsxReadWorkbookHolder)readWorkbookHolder();
}
@Override
public XlsxReadSheetHolder xlsxReadSheetHolder() {
return (XlsxReadSheetHolder)readSheetHolder();
}
}

View File

@ -0,0 +1,26 @@
package ai.chat2db.excel.context.xlsx;
import ai.chat2db.excel.context.AnalysisContext;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
import ai.chat2db.excel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder;
/**
* A context is the main anchorage point of a ls xlsx reader.
*
* @author Jiaju Zhuang
**/
public interface XlsxReadContext extends AnalysisContext {
/**
* All information about the workbook you are currently working on.
*
* @return Current workbook holder
*/
XlsxReadWorkbookHolder xlsxReadWorkbookHolder();
/**
* All information about the sheet you are currently working on.
*
* @return Current sheet holder
*/
XlsxReadSheetHolder xlsxReadSheetHolder();
}

Some files were not shown because too many files have changed in this diff Show More