Hi what would be the best way to implement a way to detect text from a “live feed or capture” of a pc screen itself, maybe a portion of the screen, so the program can execute based on what it found on the screen itself, the text, as the program runs live. What would be the simple and straight forward method with common libraries like opencv or pyterrseract etc. Looking for base examples. Thank you.
import pytesseract
import cv2
import numpy
##some code over here
The best way to detect text can be with the code below, what it does is, reads the screen live or takes screenshot of it (temporary screenshot to save memory) then it process it with pytesseract to get text, later the program behavior will depend on other constant functions you define based on what we get from the Optical Character Recognition, this is an offline example and using the libraries above or PIL, combine this functionality with a GUI that interacts with the user while using it:
The key for the example below is:
Clock.schedule_interval
from pytesseract import image_to_string #keep constantly obtaining the text from live screen
This is a function that does exist in most SDKs or GUI designers, it repeats certain function given the time in seconds or milliseconds, the full code is:
Result: enter image description here
Full code:
try:
from PIL import ImageGrab #ImageGrab is macOS and Windows only
except:
pass
from PIL import Image
from PIL import ImageChops
try:
from pytesseract import image_to_string
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
except:
pass
from kivy.lang import Builder
from kivymd.app import MDApp
from kivy.clock import Clock
from kivymd.uix.behaviors import (
RectangularElevationBehavior,
BackgroundColorBehavior,
RectangularRippleBehavior
)
from kivymd.uix.behaviors import (
SpecificBackgroundColorBehavior,
RectangularElevationBehavior,
CircularElevationBehavior
)
from kivymd.theming import ThemableBehavior
from kivy.uix.behaviors import ButtonBehavior
from kivy.uix.boxlayout import BoxLayout
from kivy.properties import BooleanProperty, ColorProperty, StringProperty
KV = '''
#pro.py
#pro.py
Screen:
ScreenManager:
Screen:
FloatLayout:
id: changer_jbsidis
BoxLayout:
padding: dp(5),dp(1)
spacing: dp(10)
MDFillRoundFlatIconButton:
icon: "home"
text: "Tab 1"
width: dp(250)
on_release:
app.screen=1
app.globaltext=""
app.root.ids.ritems2.data=[]
app.root.ids.ritems2.data=[{"viewclass": "MDTextButton","font_style": "H3","text":"*"},{"viewclass": "Bane"},{"viewclass": "Bane2"},{"viewclass": "B_"}]
MDFillRoundFlatIconButton:
icon: "pencil"
text: "Tab 2"
width: dp(250)
on_release:
app.screen=2
app.globaltext=""
app.root.ids.ritems2.data=[]
app.root.ids.ritems2.data=[{"viewclass": "MDTextButton","font_style": "H3","text":"*"},{"viewclass": "Bane3"},{"viewclass": "Bane4"},{"viewclass": "B_"}]
BoxLayout:
id: m5
pos_hint: {"center_x": .5, "center_y": .59}
orientation: "horizontal"
padding: dp(1),dp(1)
BoxLayout:
padding: dp(1),dp(1)
pos_hint: {"center_x": .6, "center_y": .5}
#size_hint_x: .34
RecycleView:
id: ritems2
pos_hint: {'x': 0, 'y': 0}
#effect_cls: app.SF()
key_viewclass: "viewclass"
bar_color: [.92,.05,.5,.6]
bar_inactive_color: [1,1,1,0]
bar_width: 5
#data: [{"viewclass": "MDTextButton","font_style": "H3","text":"Einstenio"},{"viewclass": "Bane"},{"viewclass": "Bane2"},{"viewclass": "B_"}]
#
data: [{"viewclass": "MDTextButton","font_style": "H3","text":"*"},{"viewclass": "Bane3"},{"viewclass": "Bane4"},{"viewclass": "B_"}]
#scroll_wheel_distance: sp(80)
RecycleGridLayout:
id: cat2
cols: 1
padding: dp(5),dp(75)
spacing: dp(10),dp(30)
default_size: None, dp(265)
default_size_hint: 1,None
size_hint_y: None
height: self.minimum_height
<B_@BoxLayout>:
orientation: "vertical"
canvas.after:
Color:
rgba: [0,0,0,0]
Line:
width: dp(1.2)
rounded_rectangle:
(self.x, self.y, self.width, self.height+dp(50),
dp(9),dp(9),dp(9),dp(9),
dp(50))
<Bane>:
BoxLayout:
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.pos[0],self.pos[1]
size: self.width, dp(270)
radius: [dp(14),dp(14),dp(14),dp(14)]
id: gg
elevation: 1
orientation: "vertical"
padding: dp(5),dp(1)
BoxLayout:
padding: dp(5)
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.width/2.3,self.pos[1]-dp(10)
size: dp(70), dp(70)
radius: [dp(14),dp(14),dp(14),dp(14)]
source: app.resource_path("logo.ico") if not root.pic else app.resource_path("logo.ico")
canvas.after:
Color:
rgba: [.7,0,1,.5]
Line:
width: dp(1)
rounded_rectangle:
(self.x+dp(196), self.y-dp(10), dp(70),dp(70),
dp(14),dp(14),dp(14),dp(14),
dp(50))
MDLabel:
markup: True
text: "[b] Pregunta:\n[/b]"
TextInput:
id: thetext5
height: dp(40)
pos_hint: {"center_x": .5, "top": .975}
font_size: "14sp"
hint_text_color: [0,0,0, .7]
foreground_color: [0,.1,1,1]
background_color: [1,1,1,0]
multiline: True
canvas.after:
Color:
rgba: [.1,.05,1,.6]
Line:
width: dp(2)
rounded_rectangle:
(self.x, self.y, self.width+dp(1), dp(115),
dp(8),dp(8),dp(8),dp(8),
dp(50))
size_hint: 1,3
text: root.question
BoxLayout:
id: co
padding: dp(10),dp(-5)
pos_hint: {"center_x": .5, "center_y": -.6}
FloatLayout:
MDFillRoundFlatIconButton: #MDIconButton:
#size: (dp(30), dp(30))
user_font_size: "17sp"
text: "Obtener respuesta"
pos_hint: {"center_x": .22, "center_y": 1.4}
theme_text_color: "Custom"
text_color: [1,1,1,1]
md_bg_color: [1,.1,.1,.5]
icon: "send"
on_release: app.obtener_respuesta(root.ids.thetext5.text)
<Bane2>:
BoxLayout:
canvas:
Color:
rgba: [1,1,0,1]
RoundedRectangle:
pos: self.pos[0],self.pos[1]
size: self.width, dp(270)
radius: [dp(14),dp(14),dp(14),dp(14)]
id: gg
elevation: 1
orientation: "vertical"
padding: dp(5),dp(1)
BoxLayout:
padding: dp(5)
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.width/2.3,self.pos[1]-dp(10)
size: dp(70), dp(70)
radius: [dp(14),dp(14),dp(14),dp(14)]
source: app.resource_path("logo.ico")
canvas.after:
Color:
rgba: [.7,0,1,.5]
Line:
width: dp(1)
rounded_rectangle:
(self.x+dp(196), self.y-dp(10), dp(70),dp(70),
dp(14),dp(14),dp(14),dp(14),
dp(50))
MDLabel:
markup: True
font_style: "H5"
text: "[b] Respuesta:\n[/b]"
TextInput:
height: dp(40)
pos_hint: {"center_x": .5, "top": .975}
font_size: "14sp"
hint_text_color: [0,0,0, .7]
foreground_color: [0,.1,1,1]
background_color: [1,1,1,0]
multiline: True
canvas.after:
Color:
rgba: [.42,.05,.6,.6]
Line:
width: dp(2)
rounded_rectangle:
(self.x, self.y, self.width+dp(1), dp(115),
dp(8),dp(8),dp(8),dp(8),
dp(50))
size_hint: 1,3
text: root.answer
BoxLayout:
id: co
padding: dp(10),dp(-5)
pos_hint: {"center_x": .5, "center_y": -.6}
FloatLayout:
<Bane3>:
BoxLayout:
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.pos[0],self.pos[1]
size: self.width, dp(270)
radius: [dp(14),dp(14),dp(14),dp(14)]
id: gg
elevation: 1
orientation: "vertical"
padding: dp(5),dp(1)
BoxLayout:
padding: dp(5)
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.width/2.3,self.pos[1]-dp(10)
size: dp(70), dp(70)
radius: [dp(14),dp(14),dp(14),dp(14)]
source: app.resource_path("logo.ico")
canvas.after:
Color:
rgba: [.7,0,1,.5]
Line:
width: dp(1)
rounded_rectangle:
(self.x+dp(196), self.y-dp(10), dp(70),dp(70),
dp(14),dp(14),dp(14),dp(14),
dp(50))
MDLabel:
markup: True
text: "[b] Agregando texto de cada Snipping capture:\n[/b]"
TextInput:
id: box5
height: dp(40)
pos_hint: {"center_x": .5, "top": .975}
font_size: "14sp"
hint_text_color: [0,0,0, .7]
foreground_color: [0,.1,1,1]
background_color: [1,1,1,0]
multiline: True
canvas.after:
Color:
rgba: [.42,.05,.6,.6]
Line:
width: dp(2)
rounded_rectangle:
(self.x, self.y, self.width+dp(1), dp(115),
dp(8),dp(8),dp(8),dp(8),
dp(50))
size_hint: 1,3
text: root.question
BoxLayout:
id: co
padding: dp(10),dp(-5)
pos_hint: {"center_x": .5, "center_y": -.6}
FloatLayout:
MDIconButton:
#size: (dp(20), dp(20))
user_font_size: "17sp"
pos_hint: {"center_x": .59, "center_y": 6.4}
theme_text_color: "Custom"
text_color: [1,1,1,1]
md_bg_color: [0,.9,.1,1]
icon: "camera"
#on_release:
MDFillRoundFlatIconButton: #MDIconButton:
#size: (dp(30), dp(30))
user_font_size: "17sp"
text: "Obtener respuesta"
pos_hint: {"center_x": .22, "center_y": 1.4}
theme_text_color: "Custom"
text_color: [1,1,1,1]
md_bg_color: [.1,.5,.1,1]
icon: "send"
on_release: app.obtener_respuesta(root.ids.box5.text)
MDFillRoundFlatIconButton: #MDIconButton:
#size: (dp(30), dp(30))
user_font_size: "17sp"
text: "Borrar "+str(len(root.ids.box5.text))+" letras"
pos_hint: {"center_x": .75, "center_y": 1.4}
theme_text_color: "Custom"
text_color: [1,1,1,1]
md_bg_color: [.1,.1,1,.9]
icon: "delete"
on_release:
app.globaltext=""
root.ids.box5.text=""
<Bane4>:
BoxLayout:
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.pos[0],self.pos[1]
size: self.width, dp(270)
radius: [dp(14),dp(14),dp(14),dp(14)]
id: gg
elevation: 1
orientation: "vertical"
padding: dp(5),dp(1)
BoxLayout:
padding: dp(5)
canvas:
Color:
rgba: [1,1,1,1]
RoundedRectangle:
pos: self.width/2.3,self.pos[1]-dp(10)
size: dp(70), dp(70)
radius: [dp(14),dp(14),dp(14),dp(14)]
source: app.resource_path("logo.ico")
canvas.after:
Color:
rgba: [.7,0,1,.5]
Line:
width: dp(1)
rounded_rectangle:
(self.x+dp(196), self.y-dp(10), dp(70),dp(70),
dp(14),dp(14),dp(14),dp(14),
dp(50))
MDLabel:
markup: True
text: "[b] Respuesta obtenida:\n[/b]"
TextInput:
height: dp(40)
pos_hint: {"center_x": .5, "top": .975}
font_size: "14sp"
hint_text_color: [0,0,0, .7]
foreground_color: [0,.1,1,1]
background_color: [1,1,1,0]
multiline: True
canvas.after:
Color:
rgba: [.42,.05,.6,.6]
Line:
width: dp(2)
rounded_rectangle:
(self.x, self.y, self.width+dp(1), dp(115),
dp(8),dp(8),dp(8),dp(8),
dp(50))
size_hint: 1,3
text: root.answer
BoxLayout:
id: co
padding: dp(10),dp(-5)
pos_hint: {"center_x": .5, "center_y": -.6}
FloatLayout:
MDIconButton:
#size: (dp(20), dp(20))
user_font_size: "17sp"
pos_hint: {"center_x": .59, "center_y": 6.4}
theme_text_color: "Custom"
text_color: [1,1,1,1]
md_bg_color: [1,0,0,1]
icon: "bell"
#on_release:
'''
class Bane(
ThemableBehavior,
RectangularElevationBehavior,
SpecificBackgroundColorBehavior,
BoxLayout):
question=StringProperty()
pic=StringProperty()
answer=StringProperty()
pass
class Bane2(
ThemableBehavior,
RectangularElevationBehavior,
SpecificBackgroundColorBehavior,
BoxLayout):
question=StringProperty()
pic=StringProperty()
answer=StringProperty()
pass
class Bane3(
ThemableBehavior,
RectangularElevationBehavior,
SpecificBackgroundColorBehavior,
BoxLayout):
question=StringProperty()
pic=StringProperty()
answer=StringProperty()
pass
class Bane4(
ThemableBehavior,
RectangularElevationBehavior,
SpecificBackgroundColorBehavior,
BoxLayout):
question=StringProperty()
pic=StringProperty()
answer=StringProperty()
pass
import sys,os
class Einsteniojbsidis(MDApp):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.title="Einstenio V0.1 "
def build(self):
Clock.schedule_interval(lambda x:self.check_if_images_are_the_same2(self.resource_path('temp.png'),self.resource_path('n.png')),4)
global app
app=MDApp.get_running_app()
return Builder.load_string(KV)
def resource_path(self,relative_path):
""" Get absolute path to resource, works for dev and for PyInstaller """
base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__)))
return os.path.join(base_path, relative_path)
def multiple_images(self):
app.root.ids.ritems2.data=[]
app.root.ids.ritems2.data=[
{"viewclass": "MDTextButton","font_style": "H3","text":"*"},
{"viewclass": "Bane3"},
{"viewclass": "Bane4"},
{"viewclass": "B_"}
]
def check_if_images_are_the_same(self,new,last):
print(new,last)
pass
## im = ImageGrab.grabclipboard()
## print(im)
## if im is not None:
## im.save(self.resource_path('temp.png'),'PNG')
#### if im == None:
#### return 0
## try:
## i1 = Image.open(new)
## i2 = Image.open(last)
##
##
## diff = ImageChops.difference(i1, i2)
## channels = diff.split()
## for channel in channels:
## if channel.getbbox() is not None:
## self.listen_new_image()
## return False
## print("images are the same")
## return True
## except Exception as e:
## print(str(e))
def check_if_images_are_the_same2(self,new,last):
print(new,last)
pass
Einsteniojbsidis().run()
jbsidis is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.