Yazılım

Mediapipe – Hands Kütüphanesi ve Uygulama Örnekleri

Mediapipe Hands kütüphanesi ile görüntü işleme teknolojisinde standartların üstünde bir el algılaması yaparak bizlere sunuyor. Aynı zamanda algıladığı bu elin eklemlerinde tam 20 adet nokta (landmarks) belirleniyor ve bizlere makine öğrenmesi projeleri için muhteşem bir olanak sunuyor.

hand_landmarks.png

Her bir nokta “landmark” adıyla isimlendirilir.

Bu konuda detaylı bilgiyi mediapipe’ın kendi sitesinden öğrenebilirsiniz. Aynı zamanda uygulamayı gerçekleştirmek için python ve JavaScript dahil kullanım örneğini kullanıcılara sunuyor.

A demonstration of using MediaPipe hand tracking to move a robotic hand’s fingers with the Mirru app.
Mediapipe ile rahatlıkla yapabileceğiniz bir proje

Örneklere başlamadan önce yüklemeniz gereken kütüphaneleri söyleyelim:

opencv
mediapipe

Mediapipe kütüphanesini nasıl yükleyeceğinizi Mediapipe yazımızda anlatmıştık.
Hands uygulaması için Mediapipe’ın bize bağışladığı güzel programı bir çalıştıralım

Hands kütüphanesinin örneğini çalıştırmak için Python kodları:

import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# For static images:
IMAGE_FILES = []
with mp_hands.Hands(
  static_image_mode=True,
  max_num_hands=2,
  min_detection_confidence=0.5) as hands:
 for idx, file in enumerate(IMAGE_FILES):
  # Read an image, flip it around y-axis for correct handedness output (see
  # above).
  image = cv2.flip(cv2.imread(file), 1)
  # Convert the BGR image to RGB before processing.
  results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

  # Print handedness and draw hand landmarks on the image.
  print('Handedness:', results.multi_handedness)
  if not results.multi_hand_landmarks:
   continue
  image_height, image_width, _ = image.shape
  annotated_image = image.copy()
  for hand_landmarks in results.multi_hand_landmarks:
   print('hand_landmarks:', hand_landmarks)
   print(
     f'Index finger tip coordinates: (',
     f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
     f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
   )
   mp_drawing.draw_landmarks(
     annotated_image,
     hand_landmarks,
     mp_hands.HAND_CONNECTIONS,
     mp_drawing_styles.get_default_hand_landmarks_style(),
     mp_drawing_styles.get_default_hand_connections_style())
  cv2.imwrite(
    '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))

# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
  model_complexity=0,
  min_detection_confidence=0.5,
  min_tracking_confidence=0.5) as hands:
 while cap.isOpened():
  success, image = cap.read()
  if not success:
   print("Ignoring empty camera frame.")
   # If loading a video, use 'break' instead of 'continue'.
   continue

  # To improve performance, optionally mark the image as not writeable to
  # pass by reference.
  image.flags.writeable = False
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  results = hands.process(image)

  # Draw the hand annotations on the image.
  image.flags.writeable = True
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  if results.multi_hand_landmarks:
   for hand_landmarks in results.multi_hand_landmarks:
    mp_drawing.draw_landmarks(
      image,
      hand_landmarks,
      mp_hands.HAND_CONNECTIONS,
      mp_drawing_styles.get_default_hand_landmarks_style(),
      mp_drawing_styles.get_default_hand_connections_style())
  # Flip the image horizontally for a selfie-view display.
  cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
  if cv2.waitKey(5) & 0xFF == 27:
   break
cap.release()

Bu programı çalıştırdığınızda Elinizi algılayacak ve 20 noktasına landmarks atayacaktır.

hand_crops.png

Örnek 2:

Elin açık ve kapalı olma durumunu algılayan ve ekrana yazan programı oluşturalım

# el açık yada kapalı iken ekrana yazdırır
import cv2
import mediapipe as mp

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# For static images:
IMAGE_FILES = []
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.5) as hands:
  for idx, file in enumerate(IMAGE_FILES):
    # Read an image, flip it around y-axis for correct handedness output (see
    # above).
    image = cv2.flip(cv2.imread(file), 1)
    # Convert the BGR image to RGB before processing.
    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Print handedness and draw hand landmarks on the image.
    print('Handedness:', results.multi_handedness)
    if not results.multi_hand_landmarks:
      continue
    image_height, image_width, _ = image.shape
    annotated_image = image.copy()
    for hand_landmarks in results.multi_hand_landmarks:
      print('hand_landmarks:', hand_landmarks)
      print(
        f'Index finger tip coordinates: (',
        f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
        f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
      )
      mp_drawing.draw_landmarks(
        annotated_image,
        hand_landmarks,
        mp_hands.HAND_CONNECTIONS,
        mp_drawing_styles.get_default_hand_landmarks_style(),
        mp_drawing_styles.get_default_hand_connections_style())
    cv2.imwrite(
      '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))

# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # Flip the image horizontally for a later selfie-view display, and convert
    # the BGR image to RGB.
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:

        ## el açık ve kapalı iken ekrana yazdırma
        x, y = hand_landmarks.landmark[9].x, hand_landmarks.landmark[9].y
        x1, y1 = hand_landmarks.landmark[12].x, hand_landmarks.landmark[12].y

        font = cv2.FONT_HERSHEY_PLAIN

        if y1 > y:
          cv2.putText(image, "KAPALI", (10, 50), font, 4, (0, 0, 0), 3)
        else:
          cv2.putText(image, "ACIK", (10, 50), font, 4, (0, 0, 0), 3)
        ##

        mp_drawing.draw_landmarks(
          image,
          hand_landmarks,
          mp_hands.HAND_CONNECTIONS,
          mp_drawing_styles.get_default_hand_landmarks_style(),
          mp_drawing_styles.get_default_hand_connections_style())
    cv2.imshow('MediaPipe Hands', image)
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()

Programın en önemli noktası ise şu komutlarda

Yukarıdaki kodlarda bilmemiz gereken, X Y ve X1, Y1 adında değişkenlere landmark atanması. Mesela y1 değişkeni 12. Landmark’a y değişkeni ise 9. Landmark’a atanıyor. Landmark’ların konumları için yukarıdaki görselimize bakabilirsiniz.

“if y1>y:” komutunda ise bu değişkenlerin konumlarını karşılaştıracak ve elimizin durumuna göre elin açık veya kapalı olma durumunu ekranda paylaşacaktır.

Diğer komutlar ise önceki programla %95 aynı. Burayı kavrayınca bir çok örnek gerçekleştirebilirsiniz.

Mesela

Örnek 3:

Baş parmağın yukarıya veya aşağıya bakması durumunda bunu “OLUMLU, OLUMSUZ ” olarak değerlendiren programı oluşturalım

import cv2
import mediapipe as mp

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# For static images:
IMAGE_FILES = []
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.5) as hands:
  for idx, file in enumerate(IMAGE_FILES):
    # Read an image, flip it around y-axis for correct handedness output (see
    # above).
    image = cv2.flip(cv2.imread(file), 1)
    # Convert the BGR image to RGB before processing.
    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Print handedness and draw hand landmarks on the image.
    print('Handedness:', results.multi_handedness)
    if not results.multi_hand_landmarks:
      continue
    image_height, image_width, _ = image.shape
    annotated_image = image.copy()
    for hand_landmarks in results.multi_hand_landmarks:
      print('hand_landmarks:', hand_landmarks)
      print(
        f'Index finger tip coordinates: (',
        f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
        f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
      )
      mp_drawing.draw_landmarks(
        annotated_image,
        hand_landmarks,
        mp_hands.HAND_CONNECTIONS,
        mp_drawing_styles.get_default_hand_landmarks_style(),
        mp_drawing_styles.get_default_hand_connections_style())
    cv2.imwrite(
      '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))

# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # Flip the image horizontally for a later selfie-view display, and convert
    # the BGR image to RGB.
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:

        ## el açık ve kapalı iken ekrana yazdırma
        x, y = hand_landmarks.landmark[0].x, hand_landmarks.landmark[0].y
        x1, y1 = hand_landmarks.landmark[4].x, hand_landmarks.landmark[4].y

        font = cv2.FONT_HERSHEY_PLAIN

        if y1 > y:
          cv2.putText(image, "OLUMSUZ", (10, 50), font, 4, (0, 0, 0), 3)
        else:
          cv2.putText(image, "OLUMLU", (10, 50), font, 4, (0, 0, 0), 3)
        ##

        mp_drawing.draw_landmarks(
          image,
          hand_landmarks,
          mp_hands.HAND_CONNECTIONS,
          mp_drawing_styles.get_default_hand_landmarks_style(),
          mp_drawing_styles.get_default_hand_connections_style())
    cv2.imshow('MediaPipe Hands', image)
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()

Not: Uygulamalarınıza kesinlikle kütüphane isimlerini vermeyin ve türkçe karakterler kullanmayın.

Kaynaklar:
https://google.github.io/mediapipe/solutions/hands.html
https://www.youtube.com/c/NicholasRenotte/videos
https://www.youtube.com/watch?v=qZSSDzFeu7g

Similar Posts

Bir cevap yazın

E-posta hesabınız yayımlanmayacak. Gerekli alanlar * ile işaretlenmişlerdir