Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 74 additions & 3 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test & Coverage (JaCoCo -> Codecov)
name: Test & Coverage (Java/Node/Python -> Codecov)

on:
pull_request:
Expand All @@ -21,12 +21,83 @@ jobs:
distribution: temurin
java-version: '11'

- name: Build & Test (with JaCoCo)
run: mvn -B -U -DskipITs=true test -f java/opendataloader-pdf-core/pom.xml
- name: Build, Verify & Test (with JaCoCo)
run: mvn -B -U -DskipITs=true verify -f java/opendataloader-pdf-core/pom.xml

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
files: java/opendataloader-pdf-core/target/site/jacoco/jacoco.xml
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}

node-coverage:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
node: [20, 22]
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
cache: pnpm
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
version: 9
- name: Install deps
run: pnpm install --no-frozen-lockfile
working-directory: ./node/opendataloader-pdf
- name: Lint (ESLint)
run: pnpm run lint
working-directory: ./node/opendataloader-pdf
- name: Typecheck (tsc)
run: pnpm exec tsc -p tsconfig.json --noEmit
working-directory: ./node/opendataloader-pdf
- name: Run tests with coverage
run: pnpm exec vitest run --coverage.enabled --coverage.reporter=lcov --coverage.include=src/**
working-directory: ./node/opendataloader-pdf
- name: Upload Node coverage to Codecov
uses: codecov/codecov-action@v5
with:
files: node/opendataloader-pdf/coverage/lcov.info
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}

python-coverage:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python: ['3.11', '3.12']
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Install coverage
run: pip install coverage
working-directory: ./python/opendataloader-pdf
- name: Run tests with coverage
run: |
python -m coverage run -m unittest discover -s tests -p "test_*.py"
python -m coverage xml -o coverage.xml
working-directory: ./python/opendataloader-pdf
- name: Upload Python coverage to Codecov
uses: codecov/codecov-action@v5
with:
files: python/opendataloader-pdf/coverage.xml
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
9 changes: 6 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ jobs:
echo "VERSION=0.0.0" >> $GITHUB_ENV
fi

- name: Write VERSION file and sync versions
run: |
echo "${{ env.VERSION }}" > VERSION
python build-scripts/set_version.py

- name: Set up Java
uses: actions/setup-java@v4
with:
Expand Down Expand Up @@ -62,19 +67,17 @@ jobs:
# =================================================================
- name: '[Java] Build and test'
run: |
mvn versions:set -DnewVersion=${{ env.VERSION }} -DgenerateBackupPoms=false
mvn -B clean package -P release
working-directory: ./java

- name: '[Python] Build and test'
run: |
sed -i "s/version=\"0.0.0\"/version=\"${{ env.VERSION }}\"/" setup.py
chmod +x build.sh && ./build.sh
working-directory: ./python/opendataloader-pdf

- name: '[Node.js] Build and test'
run: |
pnpm install --frozen-lockfile
pnpm install --no-frozen-lockfile
pnpm version ${{ env.VERSION }} --no-git-tag-version --allow-same-version
pnpm run build
pnpm test
Expand Down
24 changes: 12 additions & 12 deletions build-scripts/set_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re
import sys

def set_version(version_file, pom_file, pyproject_toml_file):
def set_version(version_file, pom_file, setup_py_file):
with open(version_file, 'r') as f:
version = f.read().strip()

Expand All @@ -16,30 +16,30 @@ def set_version(version_file, pom_file, pyproject_toml_file):
f.write(pom_content)
print(f"Updated Maven POM version to {version}")

# Update Python pyproject.toml
with open(pyproject_toml_file, 'r') as f:
pyproject_content = f.read()
pyproject_content = re.sub(r'version = ".*"', f'version = "{version}"', pyproject_content, count=1)
with open(pyproject_toml_file, 'w') as f:
f.write(pyproject_content)
print(f"Updated Python pyproject.toml version to {version}")
# Update Python setup.py
with open(setup_py_file, 'r') as f:
setup_content = f.read()
setup_content = re.sub(r'version=\".*\"', f'version=\"{version}\"', setup_content, count=1)
with open(setup_py_file, 'w') as f:
f.write(setup_content)
print(f"Updated Python setup.py version to {version}")

if __name__ == "__main__":
# Paths are relative to the monorepo root
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))

version_path = os.path.join(root_dir, 'VERSION')
java_pom_path = os.path.join(root_dir, 'java', 'pom.xml')
python_pyproject_path = os.path.join(root_dir, 'python', 'packages', 'opendataloader_pdf', 'pyproject.toml')
python_setup_path = os.path.join(root_dir, 'python', 'opendataloader-pdf', 'setup.py')

if not os.path.exists(version_path):
print(f"Error: VERSION file not found at {version_path}")
sys.exit(1)
if not os.path.exists(java_pom_path):
print(f"Error: Java pom.xml not found at {java_pom_path}")
sys.exit(1)
if not os.path.exists(python_pyproject_path):
print(f"Error: Python pyproject.toml not found at {python_pyproject_path}")
if not os.path.exists(python_setup_path):
print(f"Error: Python setup.py not found at {python_setup_path}")
sys.exit(1)

set_version(version_path, java_pom_path, python_pyproject_path)
set_version(version_path, java_pom_path, python_setup_path)
13 changes: 13 additions & 0 deletions java/checkstyle.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0"?>
<!DOCTYPE module PUBLIC "-//Checkstyle//DTD Checkstyle Configuration 1.3//EN" "https://checkstyle.org/dtds/configuration_1_3.dtd">
<module name="Checker">
<module name="TreeWalker">
<module name="UnusedImports"/>
<module name="WhitespaceAfter"/>
<module name="WhitespaceAround"/>
<module name="NoWhitespaceAfter"/>
<module name="NoWhitespaceBefore"/>
<module name="NeedBraces"/>
<module name="EmptyBlock"/>
</module>
</module>
10 changes: 8 additions & 2 deletions java/opendataloader-pdf-cli/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
FROM eclipse-temurin:11-jre
FROM eclipse-temurin:17-jre-alpine@sha256:f6ba7667f56ecf95646288862deb66c52fced48fa375111b4f6ff21745b380c7
LABEL org.opencontainers.image.title="opendataloader-pdf-cli" \
org.opencontainers.image.description="OpenDataLoader PDF CLI" \
org.opencontainers.image.vendor="Hancom Inc." \
org.opencontainers.image.licenses="MPL-2.0"
WORKDIR /app
COPY target/opendataloader-pdf-cli-*.jar opendataloader-pdf-cli.jar
RUN addgroup -S app && adduser -S -G app appuser
COPY --chown=appuser:app target/opendataloader-pdf-cli-*.jar opendataloader-pdf-cli.jar
USER appuser:app
ENTRYPOINT ["java","-jar","opendataloader-pdf-cli.jar"]
36 changes: 36 additions & 0 deletions java/opendataloader-pdf-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,42 @@

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<version>3.3.1</version>
<configuration>
<configLocation>${project.basedir}/../checkstyle.xml</configLocation>
<encoding>${project.build.sourceEncoding}</encoding>
<consoleOutput>true</consoleOutput>
<failsOnError>true</failsOnError>
</configuration>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<version>4.8.6.1</version>
<configuration>
<effort>Max</effort>
<threshold>Low</threshold>
</configuration>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
Expand Down
36 changes: 36 additions & 0 deletions java/opendataloader-pdf-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,42 @@
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<version>3.3.1</version>
<configuration>
<configLocation>${project.basedir}/../checkstyle.xml</configLocation>
<encoding>${project.build.sourceEncoding}</encoding>
<consoleOutput>true</consoleOutput>
<failsOnError>true</failsOnError>
</configuration>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<version>4.8.6.1</version>
<configuration>
<effort>Max</effort>
<threshold>Low</threshold>
</configuration>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>flatten-maven-plugin</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ protected void writeList(PDFList list) throws IOException {
htmlWriter.write(HtmlSyntax.HTML_LIST_ITEM_TAG);

htmlWriter.write(HtmlSyntax.HTML_PARAGRAPH_TAG);
htmlWriter.write(getCorrectString(item.toString()));
htmlWriter.write(escapeHtmlText(item.toString()));
htmlWriter.write(HtmlSyntax.HTML_PARAGRAPH_CLOSE_TAG);

for (IObject object : item.getContents()) {
Expand All @@ -168,7 +168,7 @@ protected void writeList(PDFList list) throws IOException {

protected void writeSemanticTextNode(SemanticTextNode textNode) throws IOException {
htmlWriter.write(HtmlSyntax.HTML_FIGURE_CAPTION_TAG);
htmlWriter.write(getCorrectString(textNode.getValue()));
htmlWriter.write(escapeHtmlText(textNode.getValue()));
htmlWriter.write(HtmlSyntax.HTML_FIGURE_CAPTION_CLOSE_TAG);
htmlWriter.write(HtmlSyntax.HTML_LINE_BREAK);
}
Expand Down Expand Up @@ -219,19 +219,19 @@ protected void writeParagraph(SemanticParagraph paragraph) throws IOException {
htmlWriter.write(HtmlSyntax.HTML_INDENT);
}

String safe = escapeHtmlText(paragraphValue);
if (isInsideTable() && StaticContainers.isKeepLineBreaks()) {
paragraphValue = paragraphValue.replace(HtmlSyntax.HTML_LINE_BREAK, HtmlSyntax.HTML_LINE_BREAK_TAG);
safe = safe.replace(HtmlSyntax.HTML_LINE_BREAK, HtmlSyntax.HTML_LINE_BREAK_TAG);
}

htmlWriter.write(getCorrectString(paragraphValue));
htmlWriter.write(safe);
htmlWriter.write(HtmlSyntax.HTML_PARAGRAPH_CLOSE_TAG);
htmlWriter.write(HtmlSyntax.HTML_LINE_BREAK);
}

protected void writeHeading(SemanticHeading heading) throws IOException {
int headingLevel = Math.min(6, Math.max(1, heading.getHeadingLevel()));
htmlWriter.write("<h" + headingLevel + ">");
htmlWriter.write(getCorrectString(heading.getValue()));
htmlWriter.write(escapeHtmlText(heading.getValue()));
htmlWriter.write("</h" + headingLevel + ">");
htmlWriter.write(HtmlSyntax.HTML_LINE_BREAK);
}
Expand All @@ -249,7 +249,7 @@ private void writeCellTag(TableBorderCell cell, boolean isHeader) throws IOExcep
cellTag.append(" rowspan=\"").append(rowSpan).append("\"");
}
cellTag.append(">");
htmlWriter.write(getCorrectString(cellTag.toString()));
htmlWriter.write(cellTag.toString());
}

protected void enterTable() {
Expand All @@ -273,6 +273,27 @@ protected String getCorrectString(String value) {
return null;
}

private static String escapeHtmlText(String value) {
if (value == null) {
return null;
}
String s = value.replace("\u0000", "");
StringBuilder sb = new StringBuilder(s.length());
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
switch (c) {
case '&': sb.append("&amp;"); break;
case '<': sb.append("&lt;"); break;
case '>': sb.append("&gt;"); break;
case '"': sb.append("&quot;"); break;
case '\'': sb.append("&#x27;"); break;
case '/': sb.append("&#x2F;"); break;
default: sb.append(c);
}
}
return sb.toString();
}

@Override
public void close() throws IOException {
if (htmlWriter != null) {
Expand Down
Loading
Loading