Hey so i found a python code on the internet, and i found it runs really good, but i wanted it to be faster so i tried to translate it to C++. I did and it works , it draws rectangles on a separate window like in python but it cant find some things… the python version can and i put them side by side and in python it worked. I dont know what is the problem because i wrote the same function calls in C++ , and i thought someone more experienced with both versions may help me.
here are the codes:
import numpy as np
import win32gui, win32ui, win32con
from PIL import Image
from time import sleep
import cv2 as cv
import os
import random
class WindowCapture:
w = 0
h = 0
hwnd = None
def __init__(self, window_name):
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
border_pixels = 8
titlebar_pixels = 30
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
def get_screenshot(self):
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def generate_image_dataset(self):
if not os.path.exists("images"):
os.mkdir("images")
while(True):
img = self.get_screenshot()
im = Image.fromarray(img[..., [2, 1, 0]])
im.save(f"./images/img_{len(os.listdir('images'))}.jpeg")
sleep(1)
def get_window_size(self):
return (self.w, self.h)
class ImageProcessor:
W = 0
H = 0
net = None
ln = None
classes = {}
colors = []
def __init__(self, img_size, cfg_file, weights_file):
np.random.seed(42)
self.net = cv.dnn.readNetFromDarknet(cfg_file, weights_file)
self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
self.ln = self.net.getLayerNames()
self.ln = [self.ln[i-1] for i in self.net.getUnconnectedOutLayers()]
self.W = img_size[0]
self.H = img_size[1]
with open('Models/classes.txt', 'r') as file:
lines = file.readlines()
for i, line in enumerate(lines):
self.classes[i] = line.strip()
# If you plan to utilize more than six classes, please include additional colors in this list.
self.colors = [
(0, 0, 255),
(0, 255, 0),
(255, 0, 0),
(255, 255, 0),
(255, 0, 255),
(0, 255, 255)
]
def proccess_image(self, img):
blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
self.net.setInput(blob)
outputs = self.net.forward(self.ln)
outputs = np.vstack(outputs)
coordinates = self.get_coordinates(outputs, 0.1)
self.draw_identified_objects(img, coordinates)
return coordinates
def get_coordinates(self, outputs, conf):
boxes = []
confidences = []
classIDs = []
print(outputs)
for output in outputs:
#exit
scores = output[5:]
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > conf:
x, y, w, h = output[:4] * np.array([self.W, self.H, self.W, self.H])
p0 = int(x - w//2), int(y - h//2)
boxes.append([*p0, int(w), int(h)])
confidences.append(float(confidence))
classIDs.append(classID)
indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1)
if len(indices) == 0:
return []
coordinates = []
for i in indices.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
coordinates.append({'x': x, 'y': y, 'w': w, 'h': h, 'class': classIDs[i], 'class_name': self.classes[classIDs[i]]})
return coordinates
def draw_identified_objects(self, img, coordinates):
for coordinate in coordinates:
x = coordinate['x']
y = coordinate['y']
w = coordinate['w']
h = coordinate['h']
classID = coordinate['class']
color = self.colors[classID]
cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv.putText(img, self.classes[classID], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv.imshow('window', img)
# Run this cell to initiate detections using the trained model.
window_name = "Trek"
cfg_file_name = "./Models/yolov4_train.cfg"
weights_file_name = "./Models/yolov4_train_final.weights"
wincap = WindowCapture(window_name)
improc = ImageProcessor(wincap.get_window_size(), cfg_file_name, weights_file_name)
while(True):
ss = wincap.get_screenshot()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
coordinates = improc.proccess_image(ss)
sleep(2)
#for coordinate in coordinates:
# print(coordinate)
#print()
#sleep(0.2)
print('Finished.')
C++ code (my version):
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/imgproc.hpp>
#include <Z_Utils.h>
using namespace cv;
using namespace std;
using namespace dnn;
#define lol long long
#define ld double
#define CONF 0.1
int main()
{
LPCWSTR window_title = L"Trek";
HWND handle = FindWindow(NULL, window_title);
std::string model = "./Models/yolov4_train_final.weights";
std::string config = "./Models/yolov4_train.cfg";
Net network = readNet(model, config , "Darknet");
network.setPreferableBackend(DNN_BACKEND_OPENCV);
network.setPreferableTarget(DNN_TARGET_OPENCL);
//std::vector<cv::String> ln = network.getUnconnectedOutLayersNames();
//std::vector<cv::String> ln;
//auto layers = network.getLayerNames();
//for (auto i : network.getUnconnectedOutLayers()) {
// ln.push_back(layers[i]);
//}
for (;;)
{
//if (!cap.isOpened()) {
// cout << "Video Capture Fail" << endl;
// break;
// }
Mat img = hwnd2mat(handle);
cvtColor(img, img, COLOR_RGBA2RGB);
static Mat blobFromImg;
bool swapRB = true;
blobFromImage(img, blobFromImg, 1/255.0, Size(416, 416), Scalar(), swapRB, false);
//cout << blobFromImg.size() << endl;
network.setInput(blobFromImg);
Mat outMat;
network.forward(outMat);
int rowsNoOfDetection = outMat.rows;
int colsCoordinatesPlusClassScore = outMat.cols;
std::vector<cv::Rect> boxes;
std::vector<float> confidences;
for (int j = 0; j < rowsNoOfDetection; ++j)
{
Mat scores = outMat.row(j).colRange(5, colsCoordinatesPlusClassScore);
Point PositionOfMax;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &PositionOfMax);
if (confidence > CONF)
{
ld centerX = (outMat.at<float>(j, 0) * img.cols);
ld centerY = (outMat.at<float>(j, 1) * img.rows);
ld width = (outMat.at<float>(j, 2) * img.cols);
ld height = (outMat.at<float>(j, 3) * img.rows);
ld left = centerX - width / 2;
ld top = centerY - height / 2;
cv::Rect2d box_(left, top, width, height);
boxes.push_back(box_);
confidences.push_back(confidence);
// putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 1.4, Scalar(0, 0, 255), 2, false);
//rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0);
}
}
std::vector<int> good;
cv::dnn::NMSBoxes(boxes, confidences, CONF, 0 , good);
for (auto ind : good) {
Rect r = boxes[ind];
int left = r.x;
int top = r.y;
int width = r.width;
int height = r.height;
putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255), 2);
rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0);
}
namedWindow("C++", WINDOW_AUTOSIZE);
cv::imshow("C++", img);
cv::waitKey(25);
Sleep(2000);
}
return 0;
}
the function hwndtomat() in Z_utils.h , (the only function called from there , the rest are opencv api functions):
Mat hwnd2mat(HWND hwnd)
{
HDC hwindowDC, hwindowCompatibleDC;
int height, width, srcheight, srcwidth;
HBITMAP hbwindow;
Mat src;
BITMAPINFOHEADER bi;
hwindowDC = GetDC(hwnd);
hwindowCompatibleDC = CreateCompatibleDC(hwindowDC);
SetStretchBltMode(hwindowCompatibleDC, COLORONCOLOR);
RECT windowsize; // get the height and width of the screen
GetClientRect(hwnd, &windowsize);
srcheight = windowsize.bottom;
srcwidth = windowsize.right;
height = windowsize.bottom / 0.5; //change this to whatever size you want to resize to
width = windowsize.right / 0.5;
src.create(height, width, CV_8UC4);
// create a bitmap
hbwindow = CreateCompatibleBitmap(hwindowDC, width, height);
bi.biSize = sizeof(BITMAPINFOHEADER); //http://msdn.microsoft.com/en-us/library/windows/window/dd183402%28v=vs.85%29.aspx
bi.biWidth = width;
bi.biHeight = -height; //this is the line that makes it draw upside down or not
bi.biPlanes = 1;
bi.biBitCount = 32;
bi.biCompression = BI_RGB;
bi.biSizeImage = 0;
bi.biXPelsPerMeter = 0;
bi.biYPelsPerMeter = 0;
bi.biClrUsed = 0;
bi.biClrImportant = 0;
// use the previously created device context with the bitmap
SelectObject(hwindowCompatibleDC, hbwindow);
// copy from the window device context to the bitmap device context
StretchBlt(hwindowCompatibleDC, 0, 0, width, height, hwindowDC, 0, 0, srcwidth, srcheight, SRCCOPY); //change SRCCOPY to NOTSRCCOPY for wacky colors !
GetDIBits(hwindowCompatibleDC, hbwindow, 0, height, src.data, (BITMAPINFO*)&bi, DIB_RGB_COLORS); //copy from hwindowCompatibleDC to hbwindow
// avoid memory leak
DeleteObject(hbwindow);
DeleteDC(hwindowCompatibleDC);
ReleaseDC(hwnd, hwindowDC);
return src;
}
The problem is that the objects it needs to find change size as the game is 3d , but i took care of that using the model when i trained. The only problem is that python while slower has a better accuracy, but i am using the same models and the same game… I have no idea why this happens.
Security is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.