Thiết kế website giá rẻ

Question

Hey so i found a python code on the internet, and i found it runs really good, but i wanted it to be faster so i tried to translate it to C++. I did and it works , it draws rectangles on a separate window like in python but it cant find some things… the python version can and i put them side by side and in python it worked. I dont know what is the problem because i wrote the same function calls in C++ , and i thought someone more experienced with both versions may help me.

here are the codes:

import numpy as np
import win32gui, win32ui, win32con
from PIL import Image
from time import sleep
import cv2 as cv
import os
import random


class WindowCapture:
    w = 0
    h = 0
    hwnd = None

    def __init__(self, window_name):
        self.hwnd = win32gui.FindWindow(None, window_name)
        if not self.hwnd:
            raise Exception('Window not found: {}'.format(window_name))

        window_rect = win32gui.GetWindowRect(self.hwnd)
        self.w = window_rect[2] - window_rect[0]
        self.h = window_rect[3] - window_rect[1]

        border_pixels = 8
        titlebar_pixels = 30
        self.w = self.w - (border_pixels * 2)
        self.h = self.h - titlebar_pixels - border_pixels
        self.cropped_x = border_pixels
        self.cropped_y = titlebar_pixels

    def get_screenshot(self):
        wDC = win32gui.GetWindowDC(self.hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)

        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.fromstring(signedIntsArray, dtype='uint8')
        img.shape = (self.h, self.w, 4)

        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(self.hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())

        img = img[...,:3]
        img = np.ascontiguousarray(img) 
            
        return img

    def generate_image_dataset(self):
        if not os.path.exists("images"):
            os.mkdir("images")
        while(True):
            img = self.get_screenshot()
            im = Image.fromarray(img[..., [2, 1, 0]])
            im.save(f"./images/img_{len(os.listdir('images'))}.jpeg")
            sleep(1)
    
    def get_window_size(self):
        return (self.w, self.h)
        
        
class ImageProcessor:
    W = 0
    H = 0
    net = None
    ln = None
    classes = {}
    colors = []

    def __init__(self, img_size, cfg_file, weights_file):
        np.random.seed(42)
        self.net = cv.dnn.readNetFromDarknet(cfg_file, weights_file)
        self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
        self.ln = self.net.getLayerNames()
        self.ln = [self.ln[i-1] for i in self.net.getUnconnectedOutLayers()]
        self.W = img_size[0]
        self.H = img_size[1]
        
        with open('Models/classes.txt', 'r') as file:
            lines = file.readlines()
        for i, line in enumerate(lines):
            self.classes[i] = line.strip()
        
        # If you plan to utilize more than six classes, please include additional colors in this list.
        self.colors = [
            (0, 0, 255), 
            (0, 255, 0), 
            (255, 0, 0), 
            (255, 255, 0), 
            (255, 0, 255), 
            (0, 255, 255)
        ]
        

    def proccess_image(self, img):

        blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
        self.net.setInput(blob)
        outputs = self.net.forward(self.ln)
        outputs = np.vstack(outputs)
        coordinates = self.get_coordinates(outputs, 0.1)

        self.draw_identified_objects(img, coordinates)

        return coordinates

    def get_coordinates(self, outputs, conf):

        boxes = []
        confidences = []
        classIDs = []
        print(outputs)
        for output in outputs:
            #exit
            scores = output[5:]
            
            classID = np.argmax(scores)
            confidence = scores[classID]
            if confidence > conf:
                x, y, w, h = output[:4] * np.array([self.W, self.H, self.W, self.H])
                p0 = int(x - w//2), int(y - h//2)
                boxes.append([*p0, int(w), int(h)])
                confidences.append(float(confidence))
                classIDs.append(classID)

        indices = cv.dnn.NMSBoxes(boxes, confidences, conf, conf-0.1)

        if len(indices) == 0:
            return []

        coordinates = []
        for i in indices.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            coordinates.append({'x': x, 'y': y, 'w': w, 'h': h, 'class': classIDs[i], 'class_name': self.classes[classIDs[i]]})
        return coordinates

    def draw_identified_objects(self, img, coordinates):
        for coordinate in coordinates:
            x = coordinate['x']
            y = coordinate['y']
            w = coordinate['w']
            h = coordinate['h']
            classID = coordinate['class']
            
            color = self.colors[classID]
            
            cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv.putText(img, self.classes[classID], (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        cv.imshow('window',  img)


# Run this cell to initiate detections using the trained model.

window_name = "Trek"
cfg_file_name = "./Models/yolov4_train.cfg"
weights_file_name = "./Models/yolov4_train_final.weights"

wincap = WindowCapture(window_name)
improc = ImageProcessor(wincap.get_window_size(), cfg_file_name, weights_file_name)

while(True):
    ss = wincap.get_screenshot()
    
    if cv.waitKey(1) == ord('q'):
        cv.destroyAllWindows()
        break
    coordinates = improc.proccess_image(ss)
    sleep(2)
    #for coordinate in coordinates:
     #   print(coordinate)
    #print()
    #sleep(0.2)

print('Finished.')

C++ code (my version):

#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/imgproc.hpp>
#include <Z_Utils.h>
using namespace cv;
using namespace std;
using namespace dnn;
#define lol long long
#define ld double
#define CONF 0.1
int main()
{
    LPCWSTR window_title = L"Trek";
    HWND handle = FindWindow(NULL, window_title);
    std::string model = "./Models/yolov4_train_final.weights";  
    std::string config = "./Models/yolov4_train.cfg";

    Net network = readNet(model, config , "Darknet");
    network.setPreferableBackend(DNN_BACKEND_OPENCV);
    network.setPreferableTarget(DNN_TARGET_OPENCL);
    //std::vector<cv::String> ln = network.getUnconnectedOutLayersNames();
    //std::vector<cv::String> ln;
    //auto layers = network.getLayerNames();
    //for (auto i : network.getUnconnectedOutLayers()) {
    //    ln.push_back(layers[i]);
    //}



    for (;;)
    {
        //if (!cap.isOpened()) {
        //    cout << "Video Capture Fail" << endl;
       //     break;
       // }
        Mat img = hwnd2mat(handle);
        cvtColor(img, img, COLOR_RGBA2RGB);
        static Mat blobFromImg;
        bool swapRB = true;
        blobFromImage(img, blobFromImg, 1/255.0, Size(416, 416), Scalar(), swapRB, false);
        //cout << blobFromImg.size() << endl;
        network.setInput(blobFromImg);
        Mat outMat;
        network.forward(outMat);
        int rowsNoOfDetection = outMat.rows;
        int colsCoordinatesPlusClassScore = outMat.cols;
        std::vector<cv::Rect> boxes;
        std::vector<float> confidences;
        for (int j = 0; j < rowsNoOfDetection; ++j)
        {
            Mat scores = outMat.row(j).colRange(5, colsCoordinatesPlusClassScore);

            Point PositionOfMax;
            double confidence;
            minMaxLoc(scores, 0, &confidence, 0, &PositionOfMax);

            if (confidence > CONF)
            {
                ld centerX = (outMat.at<float>(j, 0) * img.cols);
                ld centerY = (outMat.at<float>(j, 1) * img.rows);
                ld width = (outMat.at<float>(j, 2) * img.cols);
                ld height = (outMat.at<float>(j, 3) * img.rows);

                ld left = centerX - width / 2;
                ld top = centerY - height / 2;
                
                cv::Rect2d box_(left, top, width, height);
                boxes.push_back(box_);
                confidences.push_back(confidence);

               // putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 1.4, Scalar(0, 0, 255), 2, false);
               

                //rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0);
            }
        }
        std::vector<int> good;
        cv::dnn::NMSBoxes(boxes, confidences, CONF, 0 , good);
        for (auto ind : good) {
            Rect r = boxes[ind];
            int left = r.x;
            int top = r.y;
            int width = r.width;
            int height = r.height;

            putText(img, "tank", Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255), 2);
            rectangle(img, Rect(left, top, width, height), Scalar(0, 0, 255), 2, 8, 0);
        }
        namedWindow("C++", WINDOW_AUTOSIZE);
        cv::imshow("C++", img);
        cv::waitKey(25);
        Sleep(2000);
    }
    return 0;
}

the function hwndtomat() in Z_utils.h , (the only function called from there , the rest are opencv api functions):

Mat hwnd2mat(HWND hwnd)
{
    HDC hwindowDC, hwindowCompatibleDC;

    int height, width, srcheight, srcwidth;
    HBITMAP hbwindow;
    Mat src;
    BITMAPINFOHEADER  bi;

    hwindowDC = GetDC(hwnd);
    hwindowCompatibleDC = CreateCompatibleDC(hwindowDC);
    SetStretchBltMode(hwindowCompatibleDC, COLORONCOLOR);

    RECT windowsize;    // get the height and width of the screen
    GetClientRect(hwnd, &windowsize);

    srcheight = windowsize.bottom;
    srcwidth = windowsize.right;
    height = windowsize.bottom / 0.5;  //change this to whatever size you want to resize to
    width = windowsize.right / 0.5;

    src.create(height, width, CV_8UC4);

    // create a bitmap
    hbwindow = CreateCompatibleBitmap(hwindowDC, width, height);
    bi.biSize = sizeof(BITMAPINFOHEADER);    //http://msdn.microsoft.com/en-us/library/windows/window/dd183402%28v=vs.85%29.aspx
    bi.biWidth = width;
    bi.biHeight = -height;  //this is the line that makes it draw upside down or not
    bi.biPlanes = 1;
    bi.biBitCount = 32;
    bi.biCompression = BI_RGB;
    bi.biSizeImage = 0;
    bi.biXPelsPerMeter = 0;
    bi.biYPelsPerMeter = 0;
    bi.biClrUsed = 0;
    bi.biClrImportant = 0;

    // use the previously created device context with the bitmap
    SelectObject(hwindowCompatibleDC, hbwindow);
    // copy from the window device context to the bitmap device context
    StretchBlt(hwindowCompatibleDC, 0, 0, width, height, hwindowDC, 0, 0, srcwidth, srcheight, SRCCOPY); //change SRCCOPY to NOTSRCCOPY for wacky colors !
    GetDIBits(hwindowCompatibleDC, hbwindow, 0, height, src.data, (BITMAPINFO*)&bi, DIB_RGB_COLORS);  //copy from hwindowCompatibleDC to hbwindow

    // avoid memory leak
    DeleteObject(hbwindow);
    DeleteDC(hwindowCompatibleDC);
    ReleaseDC(hwnd, hwindowDC);

    return src;
}

The problem is that the objects it needs to find change size as the game is 3d , but i took care of that using the model when i trained. The only problem is that python while slower has a better accuracy, but i am using the same models and the same game… I have no idea why this happens.

Thiết kế website giá rẻ

Danh mục

Why does OpenCV C++ have a worse accuracy than Python-OpenCV?