Examples
This page provides comprehensive, real-world examples of using WiMarka for various translation evaluation scenarios.
Basic Examples
Example 1: Simple English to Cebuano
Scenario: Evaluate a short greeting translation
Input Files:
greeting_en.txt:
Good morning!
How are you today?
I hope you have a great day.
greeting_ceb.txt:
Maayong buntag!
Kumusta ka karon?
Nanghinaut ko nga nindot ang imong adlaw.
Python Code:
from wimarka.main import wmk_eval
wmk_eval(
src_file_path='greeting_en.txt',
src_lang='EN',
tgt_file_path='greeting_ceb.txt',
tgt_lang='CEB'
)
CLI Command:
wimarka --src_file_path greeting_en.txt \\
--src_lang EN \\
--tgt_file_path greeting_ceb.txt \\
--tgt_lang CEB
Example 2: English to Ilocano
Input Files:
sentences_en.txt:
Thank you very much.
Where is the library?
I love reading books.
sentences_ilo.txt:
Agyamanak unay.
Sadino ti library?
Pagpagay agbasa kadagiti libro.
Evaluation:
from wimarka.main import wmk_eval
wmk_eval(
src_file_path='sentences_en.txt',
src_lang='EN',
tgt_file_path='sentences_ilo.txt',
tgt_lang='ILO'
)
Example 3: English to Tagalog
Input Files:
questions_en.txt:
What is your name?
How old are you?
Where do you live?
questions_tgt.txt:
Ano ang pangalan mo?
Ilang taon ka na?
Saan ka nakatira?
Evaluation:
wimarka --src_file_path questions_en.txt \\
--src_lang EN \\
--tgt_file_path questions_tgt.txt \\
--tgt_lang TGT
Advanced Examples
Example 4: Quality Assurance Pipeline
Scenario: Automatically filter low-quality translations in a batch
from wimarka.main import wmk_eval, results
import os
def qa_pipeline(src_dir, tgt_dir, output_dir, threshold=75):
"""
Quality assurance pipeline for translations.
Args:
src_dir: Directory with source files
tgt_dir: Directory with translation files
output_dir: Directory for approved/rejected files
threshold: Minimum acceptable score
"""
os.makedirs(f"{output_dir}/approved", exist_ok=True)
os.makedirs(f"{output_dir}/rejected", exist_ok=True)
approved_count = 0
rejected_count = 0
for src_file in os.listdir(src_dir):
if not src_file.endswith('.txt'):
continue
src_path = os.path.join(src_dir, src_file)
tgt_path = os.path.join(tgt_dir, src_file)
if not os.path.exists(tgt_path):
print(f"Warning: No translation for {src_file}")
continue
print(f"\\nEvaluating: {src_file}")
print("=" * 50)
# Evaluate
wmk_eval(src_path, 'EN', tgt_path, 'CEB')
# Calculate average score
avg_score = sum(results['overall_score']) / len(results['overall_score'])
# Sort by quality
if avg_score >= threshold:
dest = f"{output_dir}/approved/{src_file}"
with open(tgt_path) as f:
content = f.read()
with open(dest, 'w') as f:
f.write(content)
approved_count += 1
print(f"✓ APPROVED (Score: {avg_score:.1f})")
else:
dest = f"{output_dir}/rejected/{src_file}"
with open(tgt_path) as f:
content = f.read()
with open(dest, 'w') as f:
f.write(content)
rejected_count += 1
print(f"✗ REJECTED (Score: {avg_score:.1f})")
print(f"\\n{'='*50}")
print(f"QA Complete:")
print(f" Approved: {approved_count}")
print(f" Rejected: {rejected_count}")
print(f" Pass Rate: {approved_count/(approved_count+rejected_count)*100:.1f}%")
# Run the pipeline
qa_pipeline(
src_dir='translations/source',
tgt_dir='translations/cebuano',
output_dir='translations/qa_results',
threshold=75
)
Example 5: Comparing Multiple Translation Systems
Scenario: Evaluate two different MT systems translating the same content
from wimarka.main import wmk_eval, results
import pandas as pd
def compare_systems(src_file, system_a_file, system_b_file, tgt_lang='CEB'):
"""Compare two MT systems."""
# Evaluate System A
print("Evaluating System A...")
wmk_eval(src_file, 'EN', system_a_file, tgt_lang)
results_a = {
'overall': results['overall_score'].copy(),
'fluency': results['fluency_score'].copy(),
'adequacy': results['adequacy_score'].copy()
}
# Evaluate System B
print("\\nEvaluating System B...")
wmk_eval(src_file, 'EN', system_b_file, tgt_lang)
results_b = {
'overall': results['overall_score'].copy(),
'fluency': results['fluency_score'].copy(),
'adequacy': results['adequacy_score'].copy()
}
# Compare
print("\\n" + "="*60)
print("COMPARISON RESULTS")
print("="*60)
metrics = ['overall', 'fluency', 'adequacy']
for metric in metrics:
avg_a = sum(results_a[metric]) / len(results_a[metric])
avg_b = sum(results_b[metric]) / len(results_b[metric])
print(f"\\n{metric.upper()} Score:")
print(f" System A: {avg_a:.2f}")
print(f" System B: {avg_b:.2f}")
print(f" Difference: {avg_b - avg_a:+.2f}")
if avg_a > avg_b:
print(f" Winner: System A")
elif avg_b > avg_a:
print(f" Winner: System B")
else:
print(f" Result: Tie")
# Sentence-level comparison
print(f"\\n{'='*60}")
print("SENTENCE-LEVEL WINS")
print(f"{'='*60}")
a_wins = sum(1 for i in range(len(results_a['overall']))
if results_a['overall'][i] > results_b['overall'][i])
b_wins = sum(1 for i in range(len(results_b['overall']))
if results_b['overall'][i] > results_a['overall'][i])
ties = len(results_a['overall']) - a_wins - b_wins
print(f"System A wins: {a_wins}")
print(f"System B wins: {b_wins}")
print(f"Ties: {ties}")
# Run comparison
compare_systems(
src_file='test_set.txt',
system_a_file='google_translate.txt',
system_b_file='custom_mt.txt',
tgt_lang='CEB'
)
Example 6: Translation Memory Integration
Scenario: Build a translation memory with quality scores
from wimarka.main import wmk_eval, results
import json
def build_translation_memory(src_file, tgt_file, tgt_lang,
min_score=80, output_file='tm.json'):
"""
Build translation memory from high-quality translations.
Args:
src_file: Source text file
tgt_file: Target translation file
tgt_lang: Target language code
min_score: Minimum quality score to include
output_file: Output JSON file for TM
"""
# Evaluate translations
wmk_eval(src_file, 'EN', tgt_file, tgt_lang)
# Build TM from high-quality pairs
tm = []
src_lines = open(src_file, encoding='utf-8').readlines()
tgt_lines = open(tgt_file, encoding='utf-8').readlines()
for i in range(len(results['source'])):
if results['overall_score'][i] >= min_score:
tm_entry = {
'source': src_lines[i].strip(),
'target': tgt_lines[i].strip(),
'language_pair': f'EN-{tgt_lang}',
'quality_scores': {
'overall': results['overall_score'][i],
'fluency': results['fluency_score'][i],
'adequacy': results['adequacy_score'][i]
},
'errors': results['errors'][i],
'verified': results['errors'][i] == []
}
tm.append(tm_entry)
# Save TM
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(tm, f, ensure_ascii=False, indent=2)
print(f"Translation Memory created:")
print(f" Total entries: {len(tm)}")
print(f" Quality threshold: {min_score}")
print(f" Verified (error-free): {sum(1 for e in tm if e['verified'])}")
print(f" Saved to: {output_file}")
# Build TM
build_translation_memory(
src_file='corpus_en.txt',
tgt_file='corpus_ceb.txt',
tgt_lang='CEB',
min_score=85,
output_file='cebuano_tm.json'
)
Example 7: Batch Processing with Reports
Scenario: Process multiple files and generate comprehensive reports
from wimarka.main import wmk_eval, results
import pandas as pd
from datetime import datetime
def batch_process_with_reports(file_pairs, report_dir='reports'):
"""
Process multiple file pairs and generate reports.
Args:
file_pairs: List of (src_file, tgt_file, tgt_lang) tuples
report_dir: Directory for output reports
"""
import os
os.makedirs(report_dir, exist_ok=True)
all_results = []
for src_file, tgt_file, tgt_lang in file_pairs:
print(f"\\nProcessing: {os.path.basename(src_file)}")
print("-" * 50)
# Evaluate
wmk_eval(src_file, 'EN', tgt_file, tgt_lang)
# Collect statistics
file_stats = {
'source_file': os.path.basename(src_file),
'target_file': os.path.basename(tgt_file),
'target_language': tgt_lang,
'sentence_count': len(results['source']),
'avg_overall': sum(results['overall_score']) / len(results['overall_score']),
'avg_fluency': sum(results['fluency_score']) / len(results['fluency_score']),
'avg_adequacy': sum(results['adequacy_score']) / len(results['adequacy_score']),
'min_score': min(results['overall_score']),
'max_score': max(results['overall_score']),
'error_count': sum(1 for e in results['errors'] if e),
'perfect_count': sum(1 for e in results['errors'] if not e),
'timestamp': datetime.now().isoformat()
}
all_results.append(file_stats)
# Generate per-file detailed report
df_detailed = pd.DataFrame({
'Line': range(1, len(results['source']) + 1),
'Source': results['source'],
'Target': results['target'],
'Overall_Score': results['overall_score'],
'Fluency_Score': results['fluency_score'],
'Adequacy_Score': results['adequacy_score'],
'Errors': results['errors'],
'Explanation': results['explanation'],
'Correction': results['corrected_translation']
})
detail_file = f"{report_dir}/{os.path.basename(src_file).replace('.txt', '_detailed.csv')}"
df_detailed.to_csv(detail_file, index=False, encoding='utf-8')
print(f"Detailed report saved: {detail_file}")
# Generate summary report
df_summary = pd.DataFrame(all_results)
summary_file = f"{report_dir}/summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
df_summary.to_csv(summary_file, index=False)
print(f"\\n{'='*60}")
print("BATCH PROCESSING COMPLETE")
print(f"{'='*60}")
print(f"Files processed: {len(file_pairs)}")
print(f"Summary report: {summary_file}")
print(f"\\nOverall Statistics:")
print(f" Average Overall Score: {df_summary['avg_overall'].mean():.2f}")
print(f" Average Fluency: {df_summary['avg_fluency'].mean():.2f}")
print(f" Average Adequacy: {df_summary['avg_adequacy'].mean():.2f}")
print(f" Total Sentences: {df_summary['sentence_count'].sum()}")
print(f" Total Perfect Translations: {df_summary['perfect_count'].sum()}")
# Run batch processing
file_pairs = [
('dataset/en_news.txt', 'dataset/ceb_news.txt', 'CEB'),
('dataset/en_stories.txt', 'dataset/ceb_stories.txt', 'CEB'),
('dataset/en_docs.txt', 'dataset/ilo_docs.txt', 'ILO'),
]
batch_process_with_reports(file_pairs, report_dir='evaluation_reports')
Domain-Specific Examples
Example 8: News Article Translation
Input: News article about Philippine culture
news_en.txt:
The Sinulog Festival is celebrated every third Sunday of January in Cebu City.
It honors the Santo Niño and features colorful street parades and dances.
Thousands of tourists visit Cebu during this grand festival.
news_ceb.txt:
Ang Sinulog Festival gisaulog matag ikatulong Dominggo sa Enero sa Dakbayan sa Sugbo.
Kini nagpasidungog sa Santo Niño ug adunay mabulukon nga mga parada ug sayaw sa dalan.
Libolibo nga mga turista ang mobisita sa Sugbo niining dakong pista.
wimarka --src_file_path news_en.txt \\
--src_lang EN \\
--tgt_file_path news_ceb.txt \\
--tgt_lang CEB
Example 9: Educational Content
Input: Science education text
science_en.txt:
The water cycle includes evaporation, condensation, and precipitation.
Water from oceans and lakes evaporates into the atmosphere.
It then condenses to form clouds and falls back as rain.
science_tgt.txt:
Ang siklo ng tubig ay kinabibilangan ng evaporation, condensation, at precipitation.
Ang tubig mula sa dagat at lawa ay umaangat sa atmospera.
Pagkatapos ito ay bumubuo ng mga ulap at bumabalik bilang ulan.
Example 10: Conversational Dialogues
Input: Casual conversation
dialogue_en.txt:
Hi! Long time no see!
How have you been?
Let's grab coffee sometime.
dialogue_ceb.txt:
Hoy! Dugay ta nawala nakita!
Kumusta naman ka?
Magkape ta unya uy.
Troubleshooting Examples
Example 11: Handling Mixed Languages
Problem: Source file contains mixed languages
Bad Practice:
# mixed_source.txt
Hello! Kumusta ka? # ← English and Tagalog mixed
Good morning!
Good Practice:
# en_source.txt
Hello! How are you?
Good morning!
# tgt_target.txt
Kamusta! Kumusta ka?
Magandang umaga!
Example 12: Resolving Line Misalignment
Problem: Different line counts
Error:
ValueError: Source and target files must have the same number of lines.
Solution:
def check_alignment(src_file, tgt_file):
"""Check if files are properly aligned."""
with open(src_file, encoding='utf-8') as f:
src_lines = f.readlines()
with open(tgt_file, encoding='utf-8') as f:
tgt_lines = f.readlines()
print(f"Source lines: {len(src_lines)}")
print(f"Target lines: {len(tgt_lines)}")
if len(src_lines) != len(tgt_lines):
print("\\n⚠ FILES NOT ALIGNED!")
print("\\nChecking for empty lines:")
for i, line in enumerate(src_lines, 1):
if not line.strip():
print(f" Empty line in source at line {i}")
for i, line in enumerate(tgt_lines, 1):
if not line.strip():
print(f" Empty line in target at line {i}")
else:
print("\\n✓ Files are aligned")
check_alignment('source.txt', 'target.txt')
Next Steps
Review Understanding Output Format to better understand evaluation results
See Python Library Usage for more programming patterns
See API Reference for complete API documentation