-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
executable file
·173 lines (120 loc) · 5.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import os
import cv2
import openai
import random
from string import ascii_uppercase
from time import sleep
from dotenv import load_dotenv
from ppadb.client import Client
from pytesseract import pytesseract
def wait_for_device():
"""Waits for a device to be connected via USB and returns it."""
print("📲 Connecting to ADB...")
adb = Client(host='127.0.0.1', port=5037)
devices = adb.devices()
if (not len(devices)):
print('📵 No device found! Retrying in 5s...\n')
sleep(5)
return wait_for_device()
print('📱 Connected!\n')
return devices[0]
def delete_screenshots():
"""Deletes all images in the 'screenshots' directory."""
dir = './screenshots'
for f in os.listdir(dir):
if f != ".gitkeep":
os.remove(os.path.join(dir, f))
def parse_slice_dimensions(dim):
"""Parses the given slice dimenions to a 2D array and returns it."""
return [[int(x[0]), int(x[1])] for x in (wh.split(":") for wh in dim.split("-"))]
def calc_slice_center(s):
"""Calculates the center coordinates (x|y) of the given slice and returns it."""
return [(s[1][0] + s[1][1]) / 2, (s[0][0] + s[0][1]) / 2]
def extract_texts(img, slices, lang):
"""Extracts the texts in the given image slices and returns them."""
images = [img[s[0][0]:s[0][1], s[1][0]:s[1][1]] for s in slices]
texts = []
for idx, img in enumerate(images):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite('./screenshots/img_{}_gray.jpg'.format(idx), gray)
ret, thresh1 = cv2.threshold(
gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
cv2.imwrite('./screenshots/img_{}_tresh.jpg'.format(idx), thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (12, 12))
dilation = cv2.dilate(thresh1, rect_kernel, iterations=3)
cv2.imwrite('./screenshots/img_{}_dilation.jpg'.format(idx), dilation)
contours, _ = cv2.findContours(
dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# Draw the bounding box on the text area
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Crop the bounding box area
cropped = im2[y:y + h, x:x + w]
cv2.imwrite('./screenshots/img_{}.jpg'.format(idx), rect)
# Using tesseract on the cropped image area to get text
text = pytesseract.image_to_string(
cropped, lang=lang)
# Clean text
text = text.replace("\n", " ").replace(" ", " ").strip()
if (len(text)):
texts.append(text)
return texts
def prompt_chatgpt(question, gpt_key):
"""Prompts a question to the ChatGPT API and returns the answer."""
openai.api_key = gpt_key
completions = openai.ChatCompletion.create(
model=os.getenv('GPT_MODEL'),
messages=[{'role': 'user', 'content': question}]
)
return completions.choices[0].message.content.strip()
if __name__ == '__main__':
load_dotenv()
# ADB setup
device = wait_for_device()
# Build slices & calculate answer center coordinates
ANSWERS_CENTER = {}
SLICES = [parse_slice_dimensions(os.getenv('SLICE_Q'))]
for char in ascii_uppercase:
if os.getenv('SLICE_ANSW_' + char) is None:
break
s = parse_slice_dimensions(os.getenv('SLICE_ANSW_' + char))
SLICES.append(s)
ANSWERS_CENTER[char] = calc_slice_center(s)
TOUCH_RANDOMNESS = int(os.getenv('TOUCH_RANDOMNESS'))
input('❓ Open a question and and press <Enter> to start...')
while (True):
print('\n----------------------------------')
delete_screenshots()
print('\n📸 Taking screenshot...')
screenshot = device.screencap()
with open('./screenshots/screen.jpg', 'wb') as f:
f.write(screenshot)
src = cv2.imread('./screenshots/screen.jpg')
print('\n📋 Extracting texts...')
texts = extract_texts(src, SLICES, os.getenv('TESSERACT_LANG'))
print(texts)
if (len(texts) != len(SLICES)):
print('😟 Could not recognize all texts!')
answer = input('Enter alternative answer: ').upper()
else:
# Build question string with possible answers
question = texts[0]
for i, answ in enumerate(texts[1:]):
question += " {}: {}?".format(ascii_uppercase[i], answ)
question += " " + ", ".join(ANSWERS_CENTER.keys()) + "?"
# Ask ChatGPT
print('\n🧠 Asking ChatGPT...')
message = prompt_chatgpt(question, os.getenv('GPT_KEY'))
print('🙋 Answer given: ' + message)
answer = message[0]
if (not answer in ANSWERS_CENTER):
print('😟 No definite answer found!')
answer = input('Enter alternative answer: ').upper()
print('\n👆 Entering answer: {}'.format(answer))
[x, y] = ANSWERS_CENTER.get(answer)
# Adding randomness
rnd = random.randint(-TOUCH_RANDOMNESS, TOUCH_RANDOMNESS)
device.input_tap(x + rnd, y + rnd)
input('\nPress <Enter> to continue...')