Refactor merge logic to prioritize 'ko' msgid and streamline data structure for translations

2025-10-22 17:48:14 +09:00
parent 33d03481e4
commit 7874970816
1 changed files with 22 additions and 6 deletions
--- a/po_merge_to_csv.py
+++ b/po_merge_to_csv.py
@ -60,12 +60,20 @@ def merge_po_to_csv_literal():
                    source_location_str = entry.occurrences[0][0] if entry.occurrences else "NoSourceLocation"
                    # source_location은 경로이므로 줄바꿈이 없을 가능성이 높지만, 안정성을 위해 추가
                    source_location_str = source_location_str.replace('\r', '\\r').replace('\n', '\\n')
-                    
-                    data_key = (msgctxt_str, msgid_str, source_location_str)
+
+                    # msgctxt + SourceLocation을 키로 사용 (msgid는 언어마다 다를 수 있음)
+                    data_key = (msgctxt_str, source_location_str)

                    if data_key not in merged_data:
-                        merged_data[data_key] = {}
-                    
+                        merged_data[data_key] = {
+                            'msgid': msgid_str,  # 첫 번째로 발견된 msgid 저장
+                            'msgid_ko': None,     # ko의 msgid를 별도로 저장
+                        }
+
+                    # ko 언어의 msgid는 별도로 저장 (원본 언어이므로 우선)
+                    if lang_code == 'ko':
+                        merged_data[data_key]['msgid_ko'] = msgid_str
+
                    merged_data[data_key][lang_code] = msgstr_str
                    # --- 수정 완료 ---

@ -74,13 +82,21 @@ def merge_po_to_csv_literal():
                continue

        records = []
-        for (msgctxt, msgid, source_location), translations in merged_data.items():
+        for (msgctxt, source_location), data in merged_data.items():
+            # ko의 msgid가 있으면 우선 사용, 없으면 첫 번째로 발견된 msgid 사용
+            msgid = data.get('msgid_ko') if data.get('msgid_ko') else data.get('msgid')
+
            record = {
                'msgctxt': msgctxt,
                'SourceLocation': source_location,
                'msgid': msgid,
            }
-            record.update(translations)
+
+            # 언어별 번역 추가 ('msgid', 'msgid_ko' 키는 제외)
+            for key, value in data.items():
+                if key not in ['msgid', 'msgid_ko']:
+                    record[key] = value
+
            records.append(record)
        
        df = pd.DataFrame(records)