Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/pre-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@
token: ${{ secrets.QLTY_COVERAGE_TOKEN }}
total-parts-count: 3
files: converter/coverage/cobertura.xml
# Check translation tags
- name: Check translation tags
id: translation_check
run: |
pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..."
# Read the report content and save it as an output
echo "TRANSLATION_REPORT<<EOF" >> $GITHUB_ENV
cat translation_check_report.md >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
- name: Generate new output files
run: |
#
Expand Down Expand Up @@ -121,6 +130,21 @@

cp output/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml output/cornucopia_webapp/
zip -r output/owasp_cornucopia_webapp_3.0_en.zip output/cornucopia_webapp/Links/* output/cornucopia_webapp/Fonts/* output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_bridge_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_cards_tarot_qr_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_bridge_en.idml output/cornucopia_webapp/owasp_cornucopia_webapp_3.0_leaflet_tarot_en.idml ./resources/templates/owasp_cornucopia_webapp_scoresheet.pdf
- name: Prepare release body with translation report
id: prepare_release
run: |
# Read the translation report
TRANSLATION_REPORT=$(cat translation_check_report.md)
# Create a combined release body
cat > release_body.md << 'EOF'
## OWASP Cornucopia Pre-Release

This is an automated pre-release build from the latest master branch.

---

EOF
cat translation_check_report.md >> release_body.md
- name: Delete existing release and tag
run: gh release delete "pre-release" --cleanup-tag -y
env:
Expand All @@ -133,6 +157,7 @@
tag_name: pre-release
prerelease: true
name: Latest pre-release
body_path: release_body.md
files: |
CHANGELOG.md
LICENSE.md
Expand Down
41 changes: 36 additions & 5 deletions .github/workflows/run-tests-generate-output.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ jobs:
token: ${{ secrets.QLTY_COVERAGE_TOKEN }}
total-parts-count: 3
files: converter/coverage/cobertura.xml
- name: Check translation tags
run: |
pipenv run python scripts/check_translations.py > translation_check_report.md || echo "Translation issues found, continuing..."
- name: Upload translation check report
if: always()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
retention-days: 5
name: translation-check-report.${{ github.sha }}.md
path: translation_check_report.md
- name: Generate new output files
run: |
#
Expand Down Expand Up @@ -160,23 +170,44 @@ jobs:
issues: write
needs: uploadoutputfiles
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
ref: ${{ github.event.pull_request.head.ref }}
- name: Download translation check report
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: translation-check-report.${{ github.sha }}.md
path: .
- uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
env:
PR_NUMBER: ${{ github.event.number }}
PR_NOTES: |
[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge
ARTIFACT_URL: ${{needs.uploadoutputfiles.outputs.artifact-url}}
with:
script: |
const fs = require('fs');
let translationReport = '';
try {
translationReport = fs.readFileSync('translation_check_report.md', 'utf8');
} catch (error) {
translationReport = 'Translation check report not found.';
}

const prNotes = `[badge]: https://img.shields.io/badge/Build-Success!-3fb950?logo=github&style=for-the-badge

## Build artifacts:

| Name | Link |
|------|------|
| Output files | [cornucopia-build-files.${{ github.event.pull_request.head.sha }}.zip](${{needs.uploadoutputfiles.outputs.artifact-url}}) |

with:
script: |
---

${translationReport}`;

github.rest.issues.createComment({
issue_number: process.env.PR_NUMBER,
owner: context.repo.owner,
repo: context.repo.repo,
body: process.env.PR_NOTES
body: prNotes
})
238 changes: 238 additions & 0 deletions scripts/check_translations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
"""
Translation Tag Checker for OWASP Cornucopia

This script checks that translation files have the same T0xxx tags as the English version.
It detects:
- Missing tags in translations
- Untranslated tags (text identical to English)
- Empty tag values
"""

import sys
import yaml
from pathlib import Path
from typing import Dict, List, Tuple, Any
from collections import defaultdict


class TranslationChecker:
"""Check translations for missing, untranslated, or empty tags."""

def __init__(self, source_dir: Path):
self.source_dir = source_dir
self.results: Dict[str, Dict[str, Any]] = defaultdict(lambda: defaultdict(dict))

def extract_tags(self, yaml_file: Path) -> Dict[str, str]:
"""Extract T0xxx tags and their text from a YAML file."""
tags = {}
try:
with open(yaml_file, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)

# Extract tags from paragraphs.sentences
if data and "paragraphs" in data:
for paragraph in data["paragraphs"]:
if "sentences" in paragraph:
for sentence in paragraph["sentences"]:
tag_id = sentence.get("id", "")
if tag_id.startswith("T0"):
tags[tag_id] = sentence.get("text", "")

except Exception as e:
print(f"Error reading {yaml_file}: {e}", file=sys.stderr)

return tags

def get_file_groups(self) -> Dict[str, List[Path]]:
"""Group YAML files by their base name (e.g., webapp-cards-2.2)."""
file_groups = defaultdict(list)

for yaml_file in self.source_dir.glob("*-*.yaml"):
# Skip archived files
if "archive" in str(yaml_file):
continue

# Extract base name and language
# Format: {edition}-{component}-{version}-{lang}.yaml
parts = yaml_file.stem.split("-")
if len(parts) >= 3:
# Find language code (usually last part or second to last)
lang = parts[-1]
base_name = "-".join(parts[:-1])

# Only process card files with language codes
if "cards" in base_name and len(lang) == 2:
file_groups[base_name].append(yaml_file)

return file_groups

def _separate_english_and_translations(self, files: List[Path]) -> Tuple[Path | None, List[Path]]:
"""Separate English reference file from translation files."""
english_file = None
translation_files = []

for f in files:
lang = f.stem.split("-")[-1]
if lang == "en":
english_file = f
else:
translation_files.append(f)

return english_file, translation_files

def _check_translation_tags(self, english_tags: Dict[str, str], trans_tags: Dict[str, str]) -> Dict[str, Any]:
"""Check translation tags against English reference."""
missing = []
untranslated = []
empty = []

for tag_id, eng_text in english_tags.items():
if tag_id not in trans_tags:
missing.append(tag_id)
elif not trans_tags[tag_id]:
empty.append(tag_id)
elif trans_tags[tag_id] == eng_text:
untranslated.append(tag_id)

return {
"missing": sorted(missing),
"untranslated": sorted(untranslated),
"empty": sorted(empty),
}

def check_translations(self) -> Dict[str, Dict[str, Any]]:
"""
Check all translation files against English versions.

Returns:
Dict with structure:
{
'base_name': {
'language': {
'missing': ['T00145', ...],
'untranslated': ['T00100', ...],
'empty': ['T00200', ...]
}
}
}
"""
file_groups = self.get_file_groups()

for base_name, files in file_groups.items():
english_file, translation_files = self._separate_english_and_translations(files)

if not english_file:
print(f"Warning: No English file found for {base_name}", file=sys.stderr)
continue

english_tags = self.extract_tags(english_file)
if not english_tags:
continue

for trans_file in translation_files:
lang = trans_file.stem.split("-")[-1]
trans_tags = self.extract_tags(trans_file)
issues = self._check_translation_tags(english_tags, trans_tags)

if any(issues.values()):
issues["file"] = str(trans_file.name)
self.results[base_name][lang] = issues

return dict(self.results)

def generate_markdown_report(self) -> str:
"""Generate a Markdown report of translation issues."""
report_lines = []

if not self.results:
report_lines.append("# Translation Check Report\n")
report_lines.append("✅ All existing translations have been completed.\n")
return "\n".join(report_lines)

report_lines.append("# Translation Check Report\n")
report_lines.append("The following sentences/tags have issues in the translations:\n")

# Language name mapping
lang_names = {
"es": "Spanish",
"fr": "French",
"hu": "Hungarian",
"it": "Italian",
"nl": "Dutch",
"no-nb": "Norwegian",
"pt-br": "Portuguese (Brazil)",
"pt-pt": "Portuguese (Portugal)",
"ru": "Russian",
}

for base_name in sorted(self.results.keys()):
languages = self.results[base_name]

for lang in sorted(languages.keys()):
lang_name = lang_names.get(lang, lang)
issues = languages[lang]
filename = issues.get("file", "")

report_lines.append(f"\n## {lang_name}\n")
report_lines.append(f"**File:** `{filename}`\n")

if issues["missing"]:
report_lines.append("### Missing Tags\n")
report_lines.append(
"The following tags are present in the English version but missing in this translation:\n"
)
tags_str = ", ".join(issues["missing"])
report_lines.append(f"{tags_str}\n")

if issues["untranslated"]:
report_lines.append("### Untranslated Tags\n")
report_lines.append("The following tags have identical text to English (not translated):\n")
tags_str = ", ".join(issues["untranslated"])
report_lines.append(f"{tags_str}\n")

if issues["empty"]:
report_lines.append("### Empty Tags\n")
report_lines.append("The following tags are empty:\n")
tags_str = ", ".join(issues["empty"])
report_lines.append(f"{tags_str}\n")

return "\n".join(report_lines)


def main() -> None:
"""Main entry point for the translation checker."""
# Determine source directory
script_dir = Path(__file__).parent
base_dir = script_dir.parent
source_dir = base_dir / "source"

if not source_dir.exists():
print(f"Error: Source directory not found: {source_dir}", file=sys.stderr)
sys.exit(1)

# Run checker
checker = TranslationChecker(source_dir)
results = checker.check_translations()

# Generate report
report = checker.generate_markdown_report()

# Output report
print(report)

# Write to file
output_file = base_dir / "translation_check_report.md"
with open(output_file, "w", encoding="utf-8") as f:
f.write(report)

print(f"\n---\nReport written to: {output_file}", file=sys.stderr)

# Exit with error code if issues found
if results:
sys.exit(1)
else:
sys.exit(0)


if __name__ == "__main__":
main()
Loading
Loading