import unittest
from service.TesseractOCRService import TesseractOCRService


class TestPostprocessOCRText(unittest.TestCase):

    def test_postprocess_ocr_output_white_spaces(self):
        self.ocr_service: TesseractOCRService = TesseractOCRService()
        input_text = "  This  is  a  test \t 1234"
        expected_output = "this is a test 1234"

        result = self.ocr_service.postprocess_ocr_output(input_text)

        self.assertEqual(expected_output, result, f"Expected '{expected_output}' but got '{result}'")

    def test_postprocess_ocr_output_only_alphanumeric(self):
        self.ocr_service: TesseractOCRService = TesseractOCRService()
        input_text = "  This [is  ]  a   test 1234 ***"
        expected_output = "this is a test 1234"

        result = self.ocr_service.postprocess_ocr_output(input_text)

        self.assertEqual(expected_output, result, f"Expected '{expected_output}' but got '{result}'")

