Python Library Usage
This guide covers using WiMarka as a Python library in your applications and scripts.
Basic Usage
The primary function for evaluation is wmk_eval():
from wimarka.main import wmk_eval
wmk_eval(
src_file_path='path/to/source.txt',
src_lang='EN',
tgt_file_path='path/to/translation.txt',
tgt_lang='CEB'
)
Function Parameters
wmk_eval() Parameters
- wmk_eval(src_file_path: str, src_lang: str, tgt_file_path: str, tgt_lang: str)
Evaluate machine translation quality between source and target files.
- Parameters:
- Raises:
ValueError – If source and target files have different line counts
FileNotFoundError – If input files don’t exist
- Returns:
None (results are printed to console)
Input File Format
Both source and target files must follow these specifications:
Structure
Encoding: UTF-8
Format: Plain text (.txt)
Sentence Alignment: One sentence per line
Line Count: Source and target files must have equal lines
Line Correspondence: Line N in source corresponds to line N in target
Example Files
source.txt:
The weather is beautiful today.
I enjoy learning new languages.
Thank you for your patience.
translation.txt:
Nindot ang panahon karon.
Nalipay ko nga makakat-on ug bag-ong pinulongan.
Salamat sa imong pailub.
Working with Results
Currently, wmk_eval() prints results to the console. You can capture results by accessing the global results dictionary:
from wimarka.main import wmk_eval, results
# Run evaluation
wmk_eval(
src_file_path='source.txt',
src_lang='EN',
tgt_file_path='translation.txt',
tgt_lang='CEB'
)
# Access results
for i in range(len(results['source'])):
print(f"Source: {results['source'][i]}")
print(f"Target: {results['target'][i]}")
print(f"Fluency: {results['fluency_score'][i]}")
print(f"Adequacy: {results['adequacy_score'][i]}")
print(f"Overall: {results['overall_score'][i]}")
print(f"Errors: {results['errors'][i]}")
print(f"Explanation: {results['explanation'][i]}")
print(f"Correction: {results['corrected_translation'][i]}")
print("-" * 50)
Results Dictionary Structure
The results dictionary contains:
{
'source': [list of source sentences],
'target': [list of target sentences],
'errors': [list of detected errors],
'fluency_score': [list of fluency scores],
'adequacy_score': [list of adequacy scores],
'overall_score': [list of overall scores],
'explanation': [list of explanations],
'corrected_translation': [list of corrections]
}
Advanced Examples
Example 1: Batch Processing Multiple File Pairs
from wimarka.main import wmk_eval
import os
# List of file pairs to evaluate
file_pairs = [
('en_batch1.txt', 'ceb_batch1.txt', 'CEB'),
('en_batch2.txt', 'ilo_batch2.txt', 'ILO'),
('en_batch3.txt', 'tgt_batch3.txt', 'TGT'),
]
for src_file, tgt_file, tgt_lang in file_pairs:
print(f"\\nEvaluating: {src_file} -> {tgt_file}")
print("=" * 60)
wmk_eval(
src_file_path=src_file,
src_lang='EN',
tgt_file_path=tgt_file,
tgt_lang=tgt_lang
)
Example 2: Filtering Low-Quality Translations
from wimarka.main import wmk_eval, results
# Run evaluation
wmk_eval(
src_file_path='source.txt',
src_lang='EN',
tgt_file_path='translation.txt',
tgt_lang='CEB'
)
# Find translations with low quality scores
low_quality_threshold = 70
low_quality_indices = []
for i, score in enumerate(results['overall_score']):
if score < low_quality_threshold:
low_quality_indices.append(i)
# Print low-quality translations for review
print(f"\\nFound {len(low_quality_indices)} low-quality translations:\\n")
for idx in low_quality_indices:
print(f"Line {idx + 1}:")
print(f" Source: {results['source'][idx]}")
print(f" Target: {results['target'][idx]}")
print(f" Score: {results['overall_score'][idx]}")
print(f" Errors: {results['errors'][idx]}")
print(f" Suggested: {results['corrected_translation'][idx]}")
print()
Example 3: Exporting Results to CSV
from wimarka.main import wmk_eval, results
import pandas as pd
# Run evaluation
wmk_eval(
src_file_path='source.txt',
src_lang='EN',
tgt_file_path='translation.txt',
tgt_lang='CEB'
)
# Convert results to DataFrame
df = pd.DataFrame(results)
# Save to CSV
df.to_csv('evaluation_results.csv', index=False, encoding='utf-8')
print("Results saved to evaluation_results.csv")
# Calculate statistics
print(f"\\nEvaluation Statistics:")
print(f"Average Fluency: {df['fluency_score'].mean():.2f}")
print(f"Average Adequacy: {df['adequacy_score'].mean():.2f}")
print(f"Average Overall: {df['overall_score'].mean():.2f}")
Example 4: Integration with Translation Pipeline
from wimarka.main import wmk_eval, results
def translate_and_evaluate(source_text, target_text,
src_lang='EN', tgt_lang='CEB'):
"""
Translate text and evaluate the quality.
Args:
source_text: Source text content
target_text: Target translation content
src_lang: Source language code
tgt_lang: Target language code
Returns:
dict: Evaluation results
"""
# Write temporary files
with open('temp_source.txt', 'w', encoding='utf-8') as f:
f.write(source_text)
with open('temp_target.txt', 'w', encoding='utf-8') as f:
f.write(target_text)
# Evaluate
wmk_eval(
src_file_path='temp_source.txt',
src_lang=src_lang,
tgt_file_path='temp_target.txt',
tgt_lang=tgt_lang
)
# Return results
return {
'fluency': results['fluency_score'][0],
'adequacy': results['adequacy_score'][0],
'overall': results['overall_score'][0],
'errors': results['errors'][0],
'correction': results['corrected_translation'][0]
}
# Usage
source = "Good morning!"
translation = "Maayong buntag!"
eval_results = translate_and_evaluate(source, translation)
print(f"Translation quality: {eval_results['overall']}/100")
Error Handling
Handle common errors gracefully:
from wimarka.main import wmk_eval
def safe_evaluate(src_file, tgt_file, src_lang, tgt_lang):
"""Safely evaluate files with error handling."""
try:
wmk_eval(
src_file_path=src_file,
src_lang=src_lang,
tgt_file_path=tgt_file,
tgt_lang=tgt_lang
)
return True
except FileNotFoundError as e:
print(f"Error: File not found - {e}")
return False
except ValueError as e:
print(f"Error: Invalid input - {e}")
return False
except Exception as e:
print(f"Unexpected error: {e}")
return False
# Usage
success = safe_evaluate(
'source.txt', 'translation.txt', 'EN', 'CEB'
)
if success:
print("Evaluation completed successfully")
else:
print("Evaluation failed")
Best Practices
File Management
Use absolute paths when possible
Verify files exist before calling
wmk_eval()Handle encoding issues explicitly
Performance Optimization
Models are cached after first load
Process files sequentially for consistency
Clear results between evaluations if needed
Results Processing
Copy results immediately after evaluation
Use pandas for advanced analysis
Export results for reproducibility
Language Validation
Use valid language codes (EN, CEB, ILO, TGT)
Verify language pair compatibility
Check input text matches specified language
Common Patterns
Pattern 1: Quality Assurance Pipeline
def qa_translations(source_dir, target_dir, threshold=80):
"""QA all translations in directory."""
approved = []
rejected = []
for src_file in os.listdir(source_dir):
tgt_file = src_file.replace('_en', '_ceb')
wmk_eval(
src_file_path=os.path.join(source_dir, src_file),
src_lang='EN',
tgt_file_path=os.path.join(target_dir, tgt_file),
tgt_lang='CEB'
)
avg_score = sum(results['overall_score']) / len(results['overall_score'])
if avg_score >= threshold:
approved.append((src_file, avg_score))
else:
rejected.append((src_file, avg_score))
return approved, rejected
Pattern 2: Translation Comparison
def compare_translations(source_file, translation1, translation2):
"""Compare two different translations of the same source."""
# Evaluate first translation
wmk_eval(source_file, 'EN', translation1, 'CEB')
results1 = results.copy()
# Evaluate second translation
wmk_eval(source_file, 'EN', translation2, 'CEB')
results2 = results.copy()
# Compare
for i in range(len(results1['source'])):
print(f"\\nSentence {i+1}:")
print(f"Source: {results1['source'][i]}")
print(f"Translation 1 (Score: {results1['overall_score'][i]}): {results1['target'][i]}")
print(f"Translation 2 (Score: {results2['overall_score'][i]}): {results2['target'][i]}")
if results1['overall_score'][i] > results2['overall_score'][i]:
print("→ Translation 1 is better")
else:
print("→ Translation 2 is better")
Next Steps
See Understanding Output Format to understand evaluation metrics in depth
See Examples for more real-world usage scenarios
See API Reference for complete API documentation