Files
DS_L10N/lib/validator.py
2025-10-29 13:32:42 +09:00

285 lines
10 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Translation Validator for DS_L10N
번역 검증 시스템
"""
import re
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
from enum import Enum
class ValidationLevel(Enum):
"""검증 레벨"""
ERROR = 'ERROR' # 치명적 오류
WARNING = 'WARNING' # 경고
INFO = 'INFO' # 정보
@dataclass
class ValidationIssue:
"""검증 이슈"""
level: ValidationLevel
msgctxt: str
lang: str
category: str
message: str
original: str
translation: str
def __str__(self):
icon = {
ValidationLevel.ERROR: '',
ValidationLevel.WARNING: '⚠️',
ValidationLevel.INFO: ''
}[self.level]
return (f"{icon} [{self.level.value}] {self.lang}: {self.msgctxt}\n"
f" 분류: {self.category}\n"
f" 문제: {self.message}\n"
f" 원문: {self.original}\n"
f" 번역: {self.translation}")
class TranslationValidator:
"""번역 검증기"""
def __init__(self, config: dict):
self.config = config
self.validation_config = config.get('validation', {})
# 변수 패턴 컴파일
self.variable_pattern = re.compile(r'\{[A-Za-z0-9_]+\}')
# 리치 텍스트 태그 패턴
self.tag_patterns = config.get('rich_text', {}).get('tag_patterns', [])
def validate_entry(self, msgctxt: str, original: str, translation: str, lang: str) -> List[ValidationIssue]:
"""단일 항목 검증"""
issues = []
# 빈 번역 확인
if self.validation_config.get('check_empty_translations', True):
if not translation or not translation.strip():
issues.append(ValidationIssue(
level=ValidationLevel.WARNING,
msgctxt=msgctxt,
lang=lang,
category='빈 번역',
message='번역문이 비어있습니다',
original=original,
translation=translation
))
return issues # 빈 번역이면 다른 검증 건너뛰기
# 변수 일치 확인
if self.validation_config.get('check_variables', True):
var_issues = self._check_variables(msgctxt, original, translation, lang)
issues.extend(var_issues)
# 리치 텍스트 태그 확인
if self.validation_config.get('check_rich_text_tags', True):
tag_issues = self._check_rich_text_tags(msgctxt, original, translation, lang)
issues.extend(tag_issues)
# 줄바꿈 확인
if self.validation_config.get('check_newlines', True):
newline_issues = self._check_newlines(msgctxt, original, translation, lang)
issues.extend(newline_issues)
# 최대 길이 확인
max_length = self.validation_config.get('max_length_warning', 200)
if len(translation) > max_length:
issues.append(ValidationIssue(
level=ValidationLevel.INFO,
msgctxt=msgctxt,
lang=lang,
category='길이 초과',
message=f'번역문이 {max_length}자를 초과합니다 (현재: {len(translation)}자)',
original=original,
translation=translation
))
return issues
def _check_variables(self, msgctxt: str, original: str, translation: str, lang: str) -> List[ValidationIssue]:
"""변수 일치 확인"""
issues = []
orig_vars = set(self.variable_pattern.findall(original))
trans_vars = set(self.variable_pattern.findall(translation))
# 누락된 변수
missing_vars = orig_vars - trans_vars
if missing_vars:
issues.append(ValidationIssue(
level=ValidationLevel.ERROR,
msgctxt=msgctxt,
lang=lang,
category='변수 누락',
message=f'누락된 변수: {", ".join(sorted(missing_vars))}',
original=original,
translation=translation
))
# 추가된 변수
extra_vars = trans_vars - orig_vars
if extra_vars:
issues.append(ValidationIssue(
level=ValidationLevel.WARNING,
msgctxt=msgctxt,
lang=lang,
category='추가 변수',
message=f'원문에 없는 변수: {", ".join(sorted(extra_vars))}',
original=original,
translation=translation
))
return issues
def _check_rich_text_tags(self, msgctxt: str, original: str, translation: str, lang: str) -> List[ValidationIssue]:
"""리치 텍스트 태그 일치 확인"""
issues = []
# 여는 태그와 닫는 태그 개수 확인
for tag in self.tag_patterns:
if tag == '</>':
continue # 닫는 태그는 별도 처리
orig_count = original.count(tag)
trans_count = translation.count(tag)
if orig_count != trans_count:
issues.append(ValidationIssue(
level=ValidationLevel.ERROR,
msgctxt=msgctxt,
lang=lang,
category='태그 불일치',
message=f'태그 {tag} 개수 불일치 (원문: {orig_count}, 번역: {trans_count})',
original=original,
translation=translation
))
# 닫는 태그 </> 개수 확인
orig_close = original.count('</>')
trans_close = translation.count('</>')
if orig_close != trans_close:
issues.append(ValidationIssue(
level=ValidationLevel.ERROR,
msgctxt=msgctxt,
lang=lang,
category='태그 불일치',
message=f'닫는 태그 </> 개수 불일치 (원문: {orig_close}, 번역: {trans_close})',
original=original,
translation=translation
))
return issues
def _check_newlines(self, msgctxt: str, original: str, translation: str, lang: str) -> List[ValidationIssue]:
"""줄바꿈 문자 확인"""
issues = []
# \r\n, \n, \r 개수 확인
orig_crlf = original.count('\\r\\n')
trans_crlf = translation.count('\\r\\n')
orig_lf = original.count('\\n') - orig_crlf # \\r\\n에 포함된 \\n 제외
trans_lf = translation.count('\\n') - trans_crlf
orig_cr = original.count('\\r') - orig_crlf # \\r\\n에 포함된 \\r 제외
trans_cr = translation.count('\\r') - trans_crlf
if orig_crlf != trans_crlf or orig_lf != trans_lf or orig_cr != trans_cr:
issues.append(ValidationIssue(
level=ValidationLevel.WARNING,
msgctxt=msgctxt,
lang=lang,
category='줄바꿈 불일치',
message=f'줄바꿈 문자 개수 불일치 (원문: \\r\\n={orig_crlf}, \\n={orig_lf}, \\r={orig_cr} / '
f'번역: \\r\\n={trans_crlf}, \\n={trans_lf}, \\r={trans_cr})',
original=original,
translation=translation
))
return issues
def validate_batch(self, entries: List[Dict]) -> Tuple[List[ValidationIssue], Dict[str, int]]:
"""
여러 항목 일괄 검증
Args:
entries: [{'msgctxt': ..., 'msgid': ..., 'lang': ..., 'msgstr': ...}, ...]
Returns:
(issues, stats)
"""
all_issues = []
for entry in entries:
msgctxt = entry.get('msgctxt', '')
original = entry.get('msgid', '')
translation = entry.get('msgstr', '')
lang = entry.get('lang', '')
issues = self.validate_entry(msgctxt, original, translation, lang)
all_issues.extend(issues)
# 통계 생성
stats = {
'total': len(entries),
'errors': sum(1 for issue in all_issues if issue.level == ValidationLevel.ERROR),
'warnings': sum(1 for issue in all_issues if issue.level == ValidationLevel.WARNING),
'info': sum(1 for issue in all_issues if issue.level == ValidationLevel.INFO),
'passed': len(entries) - len(set(issue.msgctxt for issue in all_issues if issue.level == ValidationLevel.ERROR))
}
return all_issues, stats
def print_validation_report(self, issues: List[ValidationIssue], stats: Dict[str, int], logger):
"""검증 리포트 출력"""
logger.section('🔍 번역 검증 결과')
if not issues:
logger.success('✅ 모든 검증 통과!')
logger.stats(**stats)
return
# 레벨별로 그룹화
errors = [i for i in issues if i.level == ValidationLevel.ERROR]
warnings = [i for i in issues if i.level == ValidationLevel.WARNING]
infos = [i for i in issues if i.level == ValidationLevel.INFO]
# 오류 출력
if errors:
logger.error(f'\n❌ 오류 ({len(errors)}건):')
for issue in errors[:10]: # 최대 10개만 출력
logger.error(f'\n{issue}')
if len(errors) > 10:
logger.error(f'\n... 외 {len(errors) - 10}건 더 있음')
# 경고 출력
if warnings:
logger.warning(f'\n⚠️ 경고 ({len(warnings)}건):')
for issue in warnings[:10]: # 최대 10개만 출력
logger.warning(f'\n{issue}')
if len(warnings) > 10:
logger.warning(f'\n... 외 {len(warnings) - 10}건 더 있음')
# 정보 출력
if infos:
logger.info(f'\n 정보 ({len(infos)}건):')
for issue in infos[:5]: # 최대 5개만 출력
logger.info(f'\n{issue}')
if len(infos) > 5:
logger.info(f'\n... 외 {len(infos) - 5}건 더 있음')
# 통계 출력
logger.separator()
logger.stats(**stats)
if errors:
logger.error(f'\n검증 실패: {stats["errors"]}개의 오류가 발견되었습니다.')
else:
logger.success(f'\n검증 완료: {stats["passed"]}건 통과 (경고 {stats["warnings"]}건)')