Tkinter库的学习历程:设计一个OCR文字识别的GUI界面

3 min read Page Views

1.问题引入

现需要设计一个OCR文字识别的GUI界面,要求至少能够识别数字以及中英文字符,并且能够应对一些复杂场景的图片文字识别。

2.python程序

import os
from cnocr import CnOcr
import tkinter as tk
from tkinter.filedialog import *
from PIL import ImageTk, Image

"""
os: standard library (Python 3.8.10)
cnocr: 2.3.0.3
tkinter: Tk 8.6
PIL: 10.4.0
"""

class OCR:
    def __init__(self):
        self.path = ''
        self.img_path = ''

    def openfile(self):
        global file_path, path, photo
        file_path = askopenfilename(title='读取图片:', filetypes=[('Image Files', '*.jpg'), ('Image Files', '*.jpeg'),
                                                                  ('Image Files', '*.png')])
        path.set(file_path)
        self.path = path.get()
        if entry1.get():
            button2.config(state=tk.NORMAL)
            text.config(state=tk.NORMAL)
            text.delete(1.0, tk.END)
            text.config(state=tk.DISABLED)
            img_path = os.path.basename(self.path)
            image = Image.open(img_path)
            label_width, label_height = 560, 260
            original_width, original_height = image.size
            if original_width / original_height < label_width / label_height:
                image = image.resize((int(original_width * label_height / original_height), label_height))
            else:
                image = image.resize((label_width, int(original_height * label_width / original_width)))
            photo = ImageTk.PhotoImage(image)
            image_label.config(image=photo, width=label_width, height=label_height)
        else:
            button2.config(state=tk.DISABLED)
            text.config(state=tk.NORMAL)
            text.delete(1.0, tk.END)
            text.config(state=tk.DISABLED)
            label_width, label_height = 560, 260
            image_label.config(image=tk.PhotoImage(width=label_width, height=label_height))
        return path

    def disabled_editing(self, event):
        text.config(state=tk.DISABLED)

    def popup_menu(self, event):
        text.tag_add('sel', '1.0', 'end')
        menu.post(event.x_root, event.y_root)

    def copy_text(self):
        text.event_generate('<<Copy>>')

    def ocr(self):
        ocr = CnOcr(model_name='chinese', rec_model_name='scene-densenet_lite_136-gru')
        img_path = os.path.basename(self.path)
        self.img_path = img_path
        if self.path != '':
            out = ocr.ocr(img_path)
            identified_content = []
            for i in range(len(out)):
                for key, value in out[i].items():
                    if key == 'text':
                        identified_content.append(value + '\n')
            text.config(state=tk.NORMAL)
            for i in range(len(identified_content)):
                text.insert(tk.END, identified_content[i])
            text.config(state=tk.DISABLED)
            button2.config(state=tk.DISABLED)


if __name__ == '__main__':
    windows = tk.Tk()
    windows.geometry('650x650+300+100')
    windows.title('OCR文字识别工具')
    windows.resizable(width=False, height=False)

    model = OCR()
    path = tk.StringVar()
    tk.Label(windows, text='OCR文字识别工具', font=('Times', 16, 'bold')).grid(row=0, column=0, columnspan=4, padx=10,
                                                                               pady=10)
    tk.Label(windows, text='图片路径:').grid(row=1, column=0, padx=10, pady=10)
    entry1 = tk.Entry(windows, textvariable=path, width=55, state=tk.DISABLED)
    entry1.grid(row=1, column=1)
    button1 = tk.Button(windows, text='打开图片', command=model.openfile, fg='black', background='#87ceeb', width=8)
    button1.grid(row=1, column=2, padx=10, pady=10)
    button2 = tk.Button(windows, text='识别文字', command=model.ocr, state=tk.DISABLED, fg='black',
                        background='#87ceeb', width=8)
    button2.grid(row=1, column=3, padx=10, pady=10)
    label_width, label_height = 80, 15
    image_label = tk.Label(windows, width=label_width, height=label_height, background='white')
    image_label.grid(row=2, column=0, columnspan=4, padx=10, pady=10)
    scrollbar_vertical = tk.Scrollbar(windows, width=30, orient=tk.VERTICAL)
    text = tk.Text(windows, width=80, height=13, yscrollcommand=scrollbar_vertical.set, state=tk.DISABLED,
                   background='white', font=('Times', 11, 'normal'))
    text.bind('<Button-3>', model.popup_menu)
    text.grid(row=3, column=0, columnspan=4, padx=10, pady=10)
    scrollbar_vertical.grid(row=3, column=3, sticky=tk.NS)
    scrollbar_vertical.config(command=text.yview)
    menu = tk.Menu(windows, tearoff=0)
    menu.add_command(label='复制', command=model.copy_text)
    windows.mainloop()

3.效果展示

其中,cnstd-cnocr-models文件夹存放了文字识别相关的模型,下载地址为:

https://github.com/breezedeus/cnstd-cnocr-models

Last updated on 2025-05-10