Source code for wimarka.main

import wimarka.tasks.error_detection as error_detection
import wimarka.tasks.scoring as scoring
import wimarka.tasks.explanation as explanation
import wimarka.tasks.correction  as correction

import wimarka.utils.helper as helper
from wimarka.utils.logger import setup_logger

logger = setup_logger()

results = {
    "source": [],
    "target": [],
    "errors": [],
    "fluency_score": [],
    "adequacy_score": [],
    "overall_score": [],
    "explanation": [],
    "corrected_translation": []
}


[docs]
def wmk_eval(src_file_path: str, src_lang: str, tgt_file_path: str, tgt_lang: str):

    helper.check_tag(src_lang, tgt_lang)
    logger.info("Starting evaluation...")

    src_lines = open(src_file_path, 'r', encoding='utf-8').readlines()
    tgt_lines = open(tgt_file_path, 'r', encoding='utf-8').readlines()

    if len(src_lines) != len(tgt_lines):
        raise ValueError("Source and target files must have the same number of lines.")
    
    for i, (src_line, tgt_line) in enumerate(zip(open(src_file_path, 'r', encoding='utf-8'), open(tgt_file_path, 'r', encoding='utf-8'))):

        logger.info(f"Evaluating line {i+1}/{len(src_lines)}")

        src_line = helper.add_tag(src_line.strip(), src_lang)
        tgt_line = helper.add_tag(tgt_line.strip(), tgt_lang)

        logger.info("Detecting errors...")
        errors = error_detection.error_detection(src_line, tgt_line)

        logger.info("Scoring translation...")
        fluency, adequacy, overall = scoring.scoring(src_line, tgt_line, errors)

        logger.info("Generating explanation...")
        comments = explanation.generate_explanation(src_line, tgt_line, errors, fluency, adequacy, overall)

        logger.info("Correcting translation...")
        corrected_translation = correction.generate_correction(src_line, tgt_line, errors, comments)

        results["source"].append(src_line)
        results["target"].append(tgt_line)
        results["errors"].append(errors)
        results["fluency_score"].append(fluency) 
        results["adequacy_score"].append(adequacy) 
        results["overall_score"].append(overall)
        results["explanation"].append(comments)
        results["corrected_translation"].append(corrected_translation)  

    helper.printEvaluationResults(results)
    print("Evaluation completed.")


if __name__ == "__main__":
    wmk_eval()