Lesson 18:机器视觉-人脸跟踪

基于前面两节人脸检测,特征值获取及跟踪,我们进一步实现人脸的跟踪,我们可以让机器人跟踪人脸,暨当人脸移动时控制机器人转动,使得人脸始终在图像窗体的中间位置,基本流程如下

 

 

 

 

  • 人脸检测,lesson 16中已经介绍过了,这里直接引用就可以。
  • 特征获取,lesson 17中也已经实现,但要达到比较好的效果,需要对特征点做一些补偿及噪声点的剔除
  • 人脸跟踪,需要根据ROI的位置,来判断机器人需要怎样移动才能达到跟踪的效果

1. 人脸检测及特征获取

源文件face_tracker2.py请见github,代码已经在opencv3环境完成测试

FaceTracker继承FaceDetector, LKTracker两个类,并做了如下的主要修改和扩展

  • 重写process_image函数
  • add_keypoints函数用于发现新的特征点,其调用goodFeaturesToTrack方法,并判断与当前特征点cluster的距离,以保证添加有效的新特征点
  • drop_keypoints函数采用聚类算法,将无效的特征点剔除

1.1 源代码

#!/usr/bin/env python

import roslib
import rospy
import cv2
import numpy as np
from face_detector import FaceDetector
from lk_tracker import LKTracker

class FaceTracker(FaceDetector, LKTracker):
    def __init__(self, node_name):
        super(FaceTracker, self).__init__(node_name)
        
        self.n_faces = rospy.get_param("~n_faces", 1)
        self.show_text = rospy.get_param("~show_text", True)
        self.show_add_drop = rospy.get_param("~show_add_drop", False)
        self.feature_size = rospy.get_param("~feature_size", 1)
        self.use_depth_for_tracking = rospy.get_param("~use_depth_for_tracking", False)
        self.min_keypoints = rospy.get_param("~min_keypoints", 20)
        self.abs_min_keypoints = rospy.get_param("~abs_min_keypoints", 6)
        self.std_err_xy = rospy.get_param("~std_err_xy", 2.5) 
        self.pct_err_z = rospy.get_param("~pct_err_z", 0.42) 
        self.max_mse = rospy.get_param("~max_mse", 10000)
        self.add_keypoint_distance = rospy.get_param("~add_keypoint_distance", 10)
        self.add_keypoints_interval = rospy.get_param("~add_keypoints_interval", 1)
        self.drop_keypoints_interval = rospy.get_param("~drop_keypoints_interval", 1)
        self.expand_roi_init = rospy.get_param("~expand_roi", 1.02)
        self.expand_roi = self.expand_roi_init
        self.face_tracking = True

        self.frame_index = 0
        self.add_index = 0
        self.drop_index = 0
        self.keypoints = list()

        self.detect_box = None
        self.track_box = None
        
        self.grey = None
        self.prev_grey = None
        
    def process_image(self, cv_image):
        try:
            # Create a greyscale version of the image
            self.grey = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
            
            # Equalize the grey histogram to minimize lighting effects
            self.grey = cv2.equalizeHist(self.grey)
            
            # Step 1: Detect the face if we haven't already
            if self.detect_box is None:
                self.keypoints = list()
                self.track_box = None
                self.detect_box = self.detect_face(self.grey)
            else:
                # Step 2: If we aren't yet tracking keypoints, get them now
                if not self.track_box or not self.is_rect_nonzero(self.track_box):
                    self.track_box = self.detect_box
                    self.keypoints = self.get_keypoints(self.grey, self.track_box)
    
                # Store a copy of the current grey image used for LK tracking                   
                if self.prev_grey is None:
                    self.prev_grey = self.grey           
                  
                # Step 3: If we have keypoints, track them using optical flow
                self.track_box = self.track_keypoints(self.grey, self.prev_grey)
              
                # Step 4: Drop keypoints that are too far from the main cluster
                if self.frame_index % self.drop_keypoints_interval == 0 and len(self.keypoints) > 0:
                    ((cog_x, cog_y, cog_z), mse_xy, mse_z, score) = self.drop_keypoints(self.abs_min_keypoints, self.std_err_xy, self.max_mse)
                    
                    if score == -1:
                        self.detect_box = None
                        self.track_box = None
                        return cv_image
                  
                # Step 5: Add keypoints if the number is getting too low 
                if self.frame_index % self.add_keypoints_interval == 0 and len(self.keypoints) < self.min_keypoints:
                    self.expand_roi = self.expand_roi_init * self.expand_roi
                    self.add_keypoints(self.track_box)
                else:
                    self.frame_index += 1
                    self.expand_roi = self.expand_roi_init
            
            # Store a copy of the current grey image used for LK tracking            
            self.prev_grey = self.grey
              
            # Process any special keyboard commands for this module
            self.prev_grey = self.grey
              
            # Process any special keyboard commands for this module
            if 32 <= self.keystroke and self.keystroke < 128:
                cc = chr(self.keystroke).lower()
                if cc == 'c':
                    self.keypoints = []
                    self.track_box = None
                    self.detect_box = None
                elif cc == 'd':
                    self.show_add_drop = not self.show_add_drop
                    
        except AttributeError:
            pass
                    
        return cv_image

    def add_keypoints(self, track_box):
        # Look for any new keypoints around the current keypoints
        
        # Begin with a mask of all black pixels
        mask = np.zeros_like(self.grey)
        
        # Get the coordinates and dimensions of the current track box
        try:
            ((x,y), (w,h), a) = track_box
        except:
            try:
                x,y,w,h = track_box
            except:
                rospy.loginfo("Track box has shrunk to zero...")
                return
        
        x = int(x)
        y = int(y)
        
        # Expand the track box to look for new keypoints
        w_new = int(self.expand_roi * w)
        h_new = int(self.expand_roi * h)
        
        pt1 = (x - int(w_new / 2), y - int(h_new / 2))
        pt2 = (x + int(w_new / 2), y + int(h_new / 2))
        
        mask_box = ((x, y), (w_new, h_new), a)

        # Display the expanded ROI with a yellow rectangle
        if self.show_add_drop:
            cv2.rectangle(self.marker_image, pt1, pt2, (255, 255, 0))
                        
        # Create a filled white ellipse within the track_box to define the ROI
        cv2.ellipse(mask, mask_box, (255,255, 255), cv2.FILLED)
        
        if self.keypoints is not None:
            # Mask the current keypoints
            for x, y in [np.int32(p) for p in self.keypoints]:
                cv2.circle(mask, (x, y), 5, 0, -1)
         
        new_keypoints = cv2.goodFeaturesToTrack(self.grey, mask = mask, **self.gf_params)

        # Append new keypoints to the current list if they are not
        # too far from the current cluster      
        if new_keypoints is not None:
            for x, y in np.float32(new_keypoints).reshape(-1, 2):
                distance = self.distance_to_cluster((x,y), self.keypoints)
                if distance > self.add_keypoint_distance:
                    self.keypoints.append((x,y))
                    # Briefly display a blue disc where the new point is added
                    if self.show_add_drop:
                        cv2.circle(self.marker_image, (x, y), 3, (255, 255, 0, 0), cv2.FILLED, 2, 0)
                                    
            # Remove duplicate keypoints
            self.keypoints = list(set(self.keypoints))
        
    def distance_to_cluster(self, test_point, cluster):
        min_distance = 10000
        for point in cluster:
            if point == test_point:
                continue
            # Use L1 distance since it is faster than L2
            distance = abs(test_point[0] - point[0])  + abs(test_point[1] - point[1])
            if distance < min_distance:
                min_distance = distance
        return min_distance

    def drop_keypoints(self, min_keypoints, outlier_threshold, mse_threshold):
        sum_x = 0
        sum_y = 0
        sum_z = 0
        sse = 0
        keypoints_xy = self.keypoints
        keypoints_z = self.keypoints
        n_xy = len(self.keypoints)
        n_z = n_xy
        
        if self.use_depth_for_tracking:
            if self.depth_image is None:
                return ((0, 0, 0), 0, 0, -1)
        
        # If there are no keypoints left to track, start over
        if n_xy == 0:
            return ((0, 0, 0), 0, 0, -1)
        
        # Compute the COG (center of gravity) of the cluster
        for point in self.keypoints:
            sum_x = sum_x + point[0]
            sum_y = sum_y + point[1]
        
        mean_x = sum_x / n_xy
        mean_y = sum_y / n_xy
        
        if self.use_depth_for_tracking:
            for point in self.keypoints:
                try:
                    z = cv2.get2D(self.depth_image, min(self.frame_height - 1, int(point[1])), min(self.frame_width - 1, int(point[0])))
                except:
                    continue
                z = z[0]
                # Depth values can be NaN which should be ignored
                if isnan(z):
                    continue
                else:
                    sum_z = sum_z + z
                    
            mean_z = sum_z / n_z
            
        else:
            mean_z = -1
        
        # Compute the x-y MSE (mean squared error) of the cluster in the camera plane
        for point in self.keypoints:
            sse = sse + (point[0] - mean_x) * (point[0] - mean_x) + (point[1] - mean_y) * (point[1] - mean_y)
            #sse = sse + abs((point[0] - mean_x)) + abs((point[1] - mean_y))
        
        # Get the average over the number of feature points
        mse_xy = sse / n_xy

        # The MSE must be > 0 for any sensible feature cluster
        if mse_xy == 0 or mse_xy > mse_threshold:
            return ((0, 0, 0), 0, 0, -1)
        
        # Throw away the outliers based on the x-y variance
        max_err = 0
        for point in self.keypoints:
            std_err = ((point[0] - mean_x) * (point[0] - mean_x) + (point[1] - mean_y) * (point[1] - mean_y)) / mse_xy
            if std_err > max_err:
                max_err = std_err
            if std_err > outlier_threshold:
                keypoints_xy.remove(point)
                if self.show_add_drop:
                    # Briefly mark the removed points in red
                    cv2.circle(self.marker_image, (point[0], point[1]), 3, (0, 0, 255), cv2.FILLED, 2, 0)   
                try:
                    keypoints_z.remove(point)
                    n_z = n_z - 1
                except:
                    pass
                
                n_xy = n_xy - 1
                                
        # Now do the same for depth
        if self.use_depth_for_tracking:
            sse = 0
            for point in keypoints_z:
                try:
                    z = cv2.get2D(self.depth_image, min(self.frame_height - 1, int(point[1])), min(self.frame_width - 1, int(point[0])))
                    z = z[0]
                    sse = sse + (z - mean_z) * (z - mean_z)
                except:
                    n_z = n_z - 1
            
            if n_z != 0:
                mse_z = sse / n_z
            else:
                mse_z = 0
            
            # Throw away the outliers based on depth using percent error 
            # rather than standard error since depth values can jump
            # dramatically at object boundaries
            for point in keypoints_z:
                try:
                    z = cv2.get2D(self.depth_image, min(self.frame_height - 1, int(point[1])), min(self.frame_width - 1, int(point[0])))
                    z = z[0]
                except:
                    continue
                try:
                    pct_err = abs(z - mean_z) / mean_z
                    if pct_err > self.pct_err_z:
                        keypoints_xy.remove(point)
                        if self.show_add_drop:
                            # Briefly mark the removed points in red
                            cv2.circle(self.marker_image, (point[0], point[1]), 2, (0, 0, 255), cv2.FILLED)  
                except:
                    pass
        else:
            mse_z = -1
        
        self.keypoints = keypoints_xy
               
        # Consider a cluster bad if we have fewer than min_keypoints left
        if len(self.keypoints) < min_keypoints:
            score = -1
        else:
            score = 1

        return ((mean_x, mean_y, mean_z), mse_xy, mse_z, score)
    
if __name__ == '__main__':
    try:
        node_name = "face_tracker"
        FaceTracker(node_name)
        rospy.spin()
    except KeyboardInterrupt:
        print "Shutting down face tracker node."
        cv2.destroyAllWindows()

1.2 launch文件

<launch>
   <node pkg="diego_vision" name="face_tracker2" type="face_tracker2.py" output="screen">

   <remap from="input_rgb_image" to="/camera/rgb/image_color" />
   <remap from="input_depth_image" to="/camera/depth/image" />
 
   <rosparam>
       use_depth_for_tracking: True
       min_keypoints: 20
       abs_min_keypoints: 6
       add_keypoint_distance: 10
       std_err_xy: 2.5
       pct_err_z: 0.42
       max_mse: 10000
       add_keypoints_interval: 1
       drop_keypoints_interval: 1
       show_text: True
       show_features: True
       show_add_drop: False
       feature_size: 1
       expand_roi: 1.02
       gf_maxCorners: 200
       gf_qualityLevel: 0.02
       gf_minDistance: 7
       gf_blockSize: 10
       gf_useHarrisDetector: False
       gf_k: 0.04
       haar_scaleFactor: 1.3
       haar_minNeighbors: 3
       haar_minSize: 30
       haar_maxSize: 150
   </rosparam>

   <param name="cascade_1" value="$(find diego_vision)/data/haar_detectors/haarcascade_frontalface_alt2.xml" />
   <param name="cascade_2" value="$(find diego_vision)/data/haar_detectors/haarcascade_frontalface_alt.xml" />
   <param name="cascade_3" value="$(find diego_vision)/data/haar_detectors/haarcascade_profileface.xml" />

</node>
</launch>

在Diego1 plus中使用深度相机,故把use_depth_for_tracking参数设置为True,实践表明使用深度相机会有更好的效果,这时我们分别在两个terminal中启动openni节点,和face_tracker2.py节点,会出现视频窗口,如果有人脸出现在面前就会被捕捉到,并被标识出来,而且效果也不错。没有想到是Diego1 plus的配置下,人脸识别任然会感觉到有滞后的现象。

roslaunch diego_vision openni_node.launch

roslaucn diego_vision face_tracker2.launch

2.人脸跟踪

源文件object_tracker2.py请见github,代码已经在opencv3环境完成测试

ObjectTracker类主要功能如下:

  • 订阅ROI信息,此消息来由FaceTracker类发布,表示捕捉到的人脸的位置,ObjectTracker订阅此消息来判断人脸在画面中的位置
  • 订阅/camera/depth/image消息,判断人脸距离摄像头的位置,以实现跟踪
  • 发布Twist消息,控制机器人的移动

2.1 源代码

#!/usr/bin/env python

import roslib
import rospy
from sensor_msgs.msg import Image, RegionOfInterest, CameraInfo
from geometry_msgs.msg import Twist
from math import copysign, isnan
from cv_bridge import CvBridge, CvBridgeError
import numpy as np

class ObjectTracker():
    def __init__(self):
        rospy.init_node("object_tracker")
                        
        # Set the shutdown function (stop the robot)
        rospy.on_shutdown(self.shutdown)
        
        # How often should we update the robot's motion?
        self.rate = rospy.get_param("~rate", 10)
        r = rospy.Rate(self.rate) 
        
        # The maximum rotation speed in radians per second
        self.max_rotation_speed = rospy.get_param("~max_rotation_speed", 2.0)
        
        # The minimum rotation speed in radians per second
        self.min_rotation_speed = rospy.get_param("~min_rotation_speed", 0.5)
        
        # The x threshold (% of image width) indicates how far off-center
        # the ROI needs to be in the x-direction before we react
        self.x_threshold = rospy.get_param("~x_threshold", 0.1)
        
        # The maximum distance a target can be from the robot for us to track
        self.max_z = rospy.get_param("~max_z", 2.0)
        
        # Initialize the global ROI
        self.roi = RegionOfInterest()
        
        # The goal distance (in meters) to keep between the robot and the person
        self.goal_z = rospy.get_param("~goal_z", 0.6)
        
        # How far away from the goal distance (in meters) before the robot reacts
        self.z_threshold = rospy.get_param("~z_threshold", 0.05)
        
        # How far away from being centered (x displacement) on the person
        # before the robot reacts
        self.x_threshold = rospy.get_param("~x_threshold", 0.05)
        
        # How much do we weight the goal distance (z) when making a movement
        self.z_scale = rospy.get_param("~z_scale", 1.0)

        # How much do we weight x-displacement of the person when making a movement        
        self.x_scale = rospy.get_param("~x_scale", 2.0)
        
        # The max linear speed in meters per second
        self.max_linear_speed = rospy.get_param("~max_linear_speed", 0.3)
        
        # The minimum linear speed in meters per second
        self.min_linear_speed = rospy.get_param("~min_linear_speed", 0.1)

        # Publisher to control the robot's movement
        self.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist)
        
        # Intialize the movement command
        self.move_cmd = Twist()
        
        # We will get the image width and height from the camera_info topic
        self.image_width = 0
        self.image_height = 0
        
        # We need cv_bridge to convert the ROS depth image to an OpenCV array
        self.cv_bridge = CvBridge()
        self.depth_array = None
        
        # Set flag to indicate when the ROI stops updating
        self.target_visible = False
        
        # Wait for the camera_info topic to become available
        rospy.loginfo("Waiting for camera_info topic...")
        rospy.wait_for_message('camera_info', CameraInfo)
        
        # Subscribe to the camera_info topic to get the image width and height
        rospy.Subscriber('camera_info', CameraInfo, self.get_camera_info)

        # Wait until we actually have the camera data
        while self.image_width == 0 or self.image_height == 0:
            rospy.sleep(1)
                    
        # Subscribe to the registered depth image
        rospy.Subscriber("depth_image", Image, self.convert_depth_image)
        
        # Wait for the depth image to become available
        rospy.wait_for_message('depth_image', Image)
        
        # Subscribe to the ROI topic and set the callback to update the robot's motion
        rospy.Subscriber('roi', RegionOfInterest, self.set_cmd_vel)
        
        # Wait until we have an ROI to follow
        rospy.loginfo("Waiting for an ROI to track...")
        rospy.wait_for_message('roi', RegionOfInterest)
        
        rospy.loginfo("ROI messages detected. Starting tracker...")
        
        # Begin the tracking loop
        while not rospy.is_shutdown():
            # If the target is not visible, stop the robot
            if not self.target_visible:
                self.move_cmd = Twist()
            else:
                # Reset the flag to False by default
                self.target_visible = False
                
            # Send the Twist command to the robot
            self.cmd_vel_pub.publish(self.move_cmd)
            
            # Sleep for 1/self.rate seconds
            r.sleep()

    def set_cmd_vel(self, msg):
        # If the ROI has a width or height of 0, we have lost the target
        if msg.width == 0 or msg.height == 0:
            return
        
        # If the ROI stops updating this next statement will not happen
        self.target_visible = True
        
        self.roi = msg
        
        # Compute the displacement of the ROI from the center of the image
        target_offset_x = msg.x_offset + msg.width / 2 - self.image_width / 2

        try:
            percent_offset_x = float(target_offset_x) / (float(self.image_width) / 2.0)
        except:
            percent_offset_x = 0
            
        # Intialize the movement command
        self.move_cmd = Twist()
        
        # Rotate the robot only if the displacement of the target exceeds the threshold
        if abs(percent_offset_x) > self.x_threshold:
            # Set the rotation speed proportional to the displacement of the target
            try:
                speed = percent_offset_x * self.x_scale
                self.move_cmd.angular.z = -copysign(max(self.min_rotation_speed,
                                            min(self.max_rotation_speed, abs(speed))), speed)
            except:
                pass
            
        # Now compute the depth component
        n_z = sum_z = mean_z = 0
         
        # Get the min/max x and y values from the ROI
        min_x = self.roi.x_offset
        max_x = min_x + self.roi.width
        min_y = self.roi.y_offset
        max_y = min_y + self.roi.height
        
        # Get the average depth value over the ROI
        for x in range(min_x, max_x):
            for y in range(min_y, max_y):
                try:
                    z = self.depth_array[y, x]
                except:
                    continue
                
                # Depth values can be NaN which should be ignored
                if isnan(z) or z > self.max_z:
                    continue
                else:
                    sum_z = sum_z + z
                    n_z += 1   
        try:
            mean_z = sum_z / n_z
            
            if mean_z < self.max_z and (abs(mean_z - self.goal_z) > self.z_threshold):
                speed = (mean_z - self.goal_z) * self.z_scale
                self.move_cmd.linear.x = copysign(min(self.max_linear_speed, max(self.min_linear_speed, abs(speed))), speed)
        except:
            pass
                    
    def convert_depth_image(self, ros_image):
        # Use cv_bridge() to convert the ROS image to OpenCV format
        try:
            # The depth image is a single-channel float32 image
            depth_image = self.cv_bridge.imgmsg_to_cv2(ros_image, "32FC1")
        except CvBridgeError, e:
            print e

        # Convert the depth image to a Numpy array
        self.depth_array = np.array(depth_image, dtype=np.float32)

    def get_camera_info(self, msg):
        self.image_width = msg.width
        self.image_height = msg.height

    def shutdown(self):
        rospy.loginfo("Stopping the robot...")
        self.cmd_vel_pub.publish(Twist())
        rospy.sleep(1)     

if __name__ == '__main__':
    try:
        ObjectTracker()
        rospy.spin()
    except rospy.ROSInterruptException:
        rospy.loginfo("Object tracking node terminated.")

2.2 launch文件

<launch>
<param name="/camera/driver/depth_registration" value="True" />

<node pkg="diego_vision" name="object_tracker" type="object_tracker2.py" output="screen">

<remap from="camera_info" to="/camera/depth/camera_info" />
<remap from="depth_image" to="/camera/depth/image" />

<rosparam>
rate: 10
max_z: 2.0
goal_z: 0.6
z_threshold: 0.05
x_threshold: 0.1
z_scale: 1.0
x_scale: 0.1
max_rotation_speed: 0.2
min_rotation_speed: 0.02
max_linear_speed: 0.2
min_linear_speed: 0.05
</rosparam>

</node>
</launch>

在已经启动face_tracker2.py节点的情况下,执行如下代码启动object_tracker2.py节点,机器人就会跟随检测到的人脸移动。

roslaunch diego_vision object_tracker2.launch

需要特别说明的是在launch文件中有关速度的参数不能设置的太大,太大会导致机器人不停转来转去来跟踪人脸,原因是因为图像处理有滞后,速度乘以滞后时间后,就会导致机器人移动的位置超出了预期的位置,不停的矫正。

Lesson 17:机器视觉-特征点检测及跟踪

上一节中我们实现了人脸的检测,当有人脸出现摄像头前面时,图像窗体中会用矩形显示人脸的位置,接下来我们需要实现特征值获取,及特征值跟随。本文针对Opencv3移植了ROS By Example Volume 1中的示例代码,所有代码均在opencv3环境完成测试,可以正常运行。

1.特征值获取

1.1 源代码

源文件good_features.py请见github

在Opencv中已经提供了特征值获取的方法goodFeaturesToTrack,我们只需要在程序中设定ROI,然后调用goodFeaturesToTrack就可以获取该ROI内的特征点

源代码中GoodFeatures继承自ROS2OpenCV3,并且重写了process_image函数,进行处理,并调用get_keypoints函数,在get_keypoints函数中调用goodFeaturesToTrack获取detect_box内的特征值。

#!/usr/bin/env python

import roslib
import rospy
import cv2
from ros2opencv3 import ROS2OpenCV3
import numpy as np

class GoodFeatures(ROS2OpenCV3):
    def __init__(self, node_name): 
        super(GoodFeatures, self).__init__(node_name)
          
        # Do we show text on the display?
        self.show_text = rospy.get_param("~show_text", True)
        
        # How big should the feature points be (in pixels)?
        self.feature_size = rospy.get_param("~feature_size", 1)
        
        # Good features parameters
        self.gf_maxCorners = rospy.get_param("~gf_maxCorners", 200)
        self.gf_qualityLevel = rospy.get_param("~gf_qualityLevel", 0.02)
        self.gf_minDistance = rospy.get_param("~gf_minDistance", 7)
        self.gf_blockSize = rospy.get_param("~gf_blockSize", 10)
        self.gf_useHarrisDetector = rospy.get_param("~gf_useHarrisDetector", True)
        self.gf_k = rospy.get_param("~gf_k", 0.04)
        
        # Store all parameters together for passing to the detector
        self.gf_params = dict(maxCorners = self.gf_maxCorners, 
                       qualityLevel = self.gf_qualityLevel,
                       minDistance = self.gf_minDistance,
                       blockSize = self.gf_blockSize,
                       useHarrisDetector = self.gf_useHarrisDetector,
                       k = self.gf_k)

        # Initialize key variables
        self.keypoints = list()
        self.detect_box = None
        self.mask = None
        
    def process_image(self, cv_image):
        # If the user has not selected a region, just return the image
        if not self.detect_box:
            return cv_image

        # Create a greyscale version of the image
        grey = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
        
        # Equalize the histogram to reduce lighting effects
        grey = cv2.equalizeHist(grey)

        # Get the good feature keypoints in the selected region
        keypoints = self.get_keypoints(grey, self.detect_box)
        
        # If we have points, display them
        if keypoints is not None and len(keypoints) > 0:
            for x, y in keypoints:
                cv2.circle(self.marker_image, (x, y), self.feature_size, (0, 255, 0, 0), cv2.FILLED, 8, 0)
        # Process any special keyboard commands
        if 32 <= self.keystroke and self.keystroke < 128:
            cc = chr(self.keystroke).lower()
            if cc == 'c':
                # Clear the current keypoints
                keypoints = list()
                self.detect_box = None
                                
        return cv_image        

    def get_keypoints(self, input_image, detect_box):
        # Initialize the mask with all black pixels
        self.mask = np.zeros_like(input_image)
 
        # Get the coordinates and dimensions of the detect_box
        try:
            x, y, w, h = detect_box
        except: 
            return None
        
        # Set the selected rectangle within the mask to white
        self.mask[y:y+h, x:x+w] = 255

        # Compute the good feature keypoints within the selected region
        keypoints = list()
        kp = cv2.goodFeaturesToTrack(input_image, mask = self.mask, **self.gf_params)
        if kp is not None and len(kp) > 0:
            for x, y in np.float32(kp).reshape(-1, 2):
                keypoints.append((x, y))
                
        return keypoints

if __name__ == '__main__':
    try:
        node_name = "good_features"
        GoodFeatures(node_name)
        rospy.spin()
    except KeyboardInterrupt:
        print "Shutting down the Good Features node."
        cv2.destroyAllWindows()

1.2.launch文件

<launch>
 <node pkg="diego_vision" name="good_features" type="good_features.py" output="screen">

 <remap from="input_rgb_image" to="/camera/rgb/image_color" />
 
 <rosparam>
 gf_maxCorners: 200
 gf_qualityLevel: 0.02
 gf_minDistance: 7
 gf_blockSize: 10
 gf_useHarrisDetector: False
 gf_k: 0.04
 feature_size: 1
 show_text: True
 </rosparam>
 
 </node>
</launch>

2.特征值跟随

1.1源代码

源文件lk_tracker.py请见github

在Opencv中提供了方法calcOpticalFlowPyrLK提供在视频中连续帧特征点的匹配,其使用的算法是Lucas–Kanade method,有兴趣的同学去深入了解算法。

源代码中LKTracker继承自GoodFeatures,并且重写了process_image函数,进行处理,并调用track_keypoints函数,在track_keypoints函数中调用calcOpticalFlowPyrLK方法进行特征点跟踪。

#!/usr/bin/env python

import roslib
import rospy
import cv2
import numpy as np
from good_features import GoodFeatures

class LKTracker(GoodFeatures):
    def __init__(self, node_name):
        super(LKTracker, self).__init__(node_name)
        
        self.show_text = rospy.get_param("~show_text", True)
        self.feature_size = rospy.get_param("~feature_size", 1)
                
        # LK parameters
        self.lk_winSize = rospy.get_param("~lk_winSize", (10, 10))
        self.lk_maxLevel = rospy.get_param("~lk_maxLevel", 2)
        self.lk_criteria = rospy.get_param("~lk_criteria", (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 20, 0.01))
        
        self.lk_params = dict( winSize  = self.lk_winSize, 
                  maxLevel = self.lk_maxLevel, 
                  criteria = self.lk_criteria)    
        
        self.detect_interval = 1
        self.keypoints = None

        self.detect_box = None
        self.track_box = None
        self.mask = None
        self.grey = None
        self.prev_grey = None
            
    def process_image(self, cv_image):
        # If we don't yet have a detection box (drawn by the user
        # with the mouse), keep waiting
        if self.detect_box is None:
            return cv_image

        # Create a greyscale version of the image
        self.grey = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
        
        # Equalize the grey histogram to minimize lighting effects
        self.grey = cv2.equalizeHist(self.grey)
        
        # If we haven't yet started tracking, set the track box to the
        # detect box and extract the keypoints within it
        if self.track_box is None or not self.is_rect_nonzero(self.track_box):
            self.track_box = self.detect_box
            self.keypoints = self.get_keypoints(self.grey, self.track_box)
        
        else:
            if self.prev_grey is None:
                self.prev_grey = self.grey
    
            # Now that have keypoints, track them to the next frame
            # using optical flow
            self.track_box = self.track_keypoints(self.grey, self.prev_grey)

        # Process any special keyboard commands for this module
        if 32 <= self.keystroke and self.keystroke < 128:
            cc = chr(self.keystroke).lower()
            if cc == 'c':
                # Clear the current keypoints
                self.keypoints = None
                self.track_box = None
                self.detect_box = None
                
        self.prev_grey = self.grey
                
        return cv_image               
                    
    def track_keypoints(self, grey, prev_grey):
        # We are tracking points between the previous frame and the
        # current frame
        img0, img1 = prev_grey, grey
        
        # Reshape the current keypoints into a numpy array required
        # by calcOpticalFlowPyrLK()
        p0 = np.float32([p for p in self.keypoints]).reshape(-1, 1, 2)
        
        # Calculate the optical flow from the previous frame to the current frame
        p1, st, err = cv2.calcOpticalFlowPyrLK(img0, img1, p0, None, **self.lk_params)
        
        # Do the reverse calculation: from the current frame to the previous frame
        try:
            p0r, st, err = cv2.calcOpticalFlowPyrLK(img1, img0, p1, None, **self.lk_params)
            
            # Compute the distance between corresponding points in the two flows
            d = abs(p0-p0r).reshape(-1, 2).max(-1)
            
            # If the distance between pairs of points is < 1 pixel, set
            # a value in the "good" array to True, otherwise False
            good = d < 1
        
            # Initialize a list to hold new keypoints
            new_keypoints = list()
            
            # Cycle through all current and new keypoints and only keep
            # those that satisfy the "good" condition above
            for (x, y), good_flag in zip(p1.reshape(-1, 2), good):
                if not good_flag:
                    continue
                new_keypoints.append((x, y))
                
                # Draw the keypoint on the image
                cv2.circle(self.marker_image, (x, y), self.feature_size, (0, 255, 0, 0), cv2.FILLED, 8, 0)
            # Set the global keypoint list to the new list    
            self.keypoints = new_keypoints
            
            # If we have enough points, find the best fit ellipse around them
            if len(self.keypoints) > 6:
                keypoints_array = np.float32([p for p in self.keypoints]).reshape(-1, 1, 2)  
                track_box = cv2.fitEllipse(keypoints_array)
            else:
                # Otherwise, find the best fitting rectangle
                track_box = cv2.boundingRect(keypoints_matrix)
        except:
            track_box = None
                        
        return track_box
    
if __name__ == '__main__':
    try:
        node_name = "lk_tracker"
        LKTracker(node_name)
        rospy.spin()
    except KeyboardInterrupt:
        print "Shutting down LK Tracking node."
        cv2.destroyAllWindows()
    

2.2 launch文件

<launch>
   <node pkg="diego_vision" name="lk_tracker" type="lk_tracker.py" output="screen">
   <remap from="input_rgb_image" to="/camera/rgb/image_color" />

   <rosparam>
       show_text: True
       gf_maxCorners: 200
       gf_qualityLevel: 0.02
       gf_minDistance: 7
       gf_blockSize: 10
       gf_useHarrisDetector: False
       gf_k: 0.04
       feature_size: 1
   </rosparam>

   </node>
</launch>

3.测试

首先启动运行xtion摄像头

roslaunch diego_vision openni_node.launch

启动lk_tracker

roslaunch diego_vision lk_tracker.launch

启动后会出现视频窗体,用鼠标选择ROI,然后就会看到在ROI中检测到的特征点,这时候我们如果移动ROI内的物体,就会看到矩形框始终会定位到开始选择的物体,跟随物体的移动而移动。

下图是本人测试的结果:ROI选择在手部位置,当我移动手的位置时,矩形框就会跟随移动。

Scroll to top