import cv2
import numpy as np
#import cv2  # We're using OpenCV to read video, to install !pip install opencv-python
import time
import os
import openai


import speech_recognition as sr

# Recognize speech in French
#speech_to_text(language='fr-FR')

# Recognize speech in German
#speech_to_text(language='de-DE')

#def speech_to_text(language='en-US'):
def speech_to_text():
    # Initialize recognizer
    recognizer = sr.Recognizer()

    # Use the microphone as source for input
    with sr.Microphone() as source:
        print("Adjusting for ambient noise... Please wait.")
        recognizer.adjust_for_ambient_noise(source, duration=5)
        print("Listening...")
        
        # Listen for the first phrase and extract it into audio data
        audio_data = recognizer.listen(source, timeout=10, phrase_time_limit=20)

        '''
        timeout: This parameter sets the maximum number of seconds that the listen method will wait for a phrase to start. If this timeout is reached and no speech is detected, it will raise a WaitTimeoutError.
        phrase_time_limit: This parameter sets the maximum number of seconds that the listen method will wait for a phrase to finish. Once this limit is reached, it will stop listening even if the phrase is not finished.
        '''
        
        try:
            # Recognize (convert from speech to text)
            text = recognizer.recognize_google(audio_data)
            print(f"Recognized text: {text}")
        except sr.UnknownValueError:
            print("Sorry, I could not understand the audio.")
        except sr.RequestError:
            print("Could not request results; check your network connection.")
        
    return (text)


# Open a connection to the camera (0 is usually the default camera)
cap = cv2.VideoCapture(0)

# Check if the camera opened successfully
if not cap.isOpened():
    print("Error: Could not open video device.")
    exit()

# Set video frame width and height (optional)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

for i in range(0,3): # it gives time to camera to adjust, necessary to avoid that the image is very dark
    # Capture frame-by-frame
    #cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)
    ret, frame01 = cap.read()

for i in range(0,3): # it gives time to camera to adjust, necessary to avoid that the image is very dark
    # Capture frame-by-frame
    #cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)
    ret, frame02 = cap.read()

for i in range(0,3): # it gives time to camera to adjust, necessary to avoid that the image is very dark
    # Capture frame-by-frame
    #cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)
    ret, frame03 = cap.read()

for i in range(0,3): # it gives time to camera to adjust, necessary to avoid that the image is very dark
    # Capture frame-by-frame
    #cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)
    ret, frame04 = cap.read()

# If frame is read correctly, ret is True
if not ret:
    print("Error: Failed to capture image.")


cap.release()


height, width = frame01.shape[:2]
top_row = np.hstack((frame01, frame02))
bottom_row = np.hstack((frame03, frame04))
combined_image = np.vstack((top_row, bottom_row))
cv2.imwrite('combined_image_new.jpg', combined_image)
#cv2.imshow('Combined Image', combined_image)
#cv2.waitKey(0)
cv2.destroyAllWindows()


from Ultrasonic import *
ultrasonic=Ultrasonic()                

data=ultrasonic.getDistance()   #Get the value
print ("Obstacle distance is "+str(data)+"CM")
            

print ("interrogating GPT4o")


import openai
import base64
import requests

openai.api_key = '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'

def encode_image(image_path):
    try:
        with open(image_path, "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
            return encoded_string
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

image_path = "combined_image_new.jpg"
base64_image = encode_image(image_path)

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {openai.api_key}"
}

#The head features also an untrasonic sensor. It shows that the distance of the nearest object at the center of the image is "+str(data)+"cm. \\\

voice_command = speech_to_text()

response = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
      {
        "role": "user",
        "content": [
          {"type": "text", "text": "\\\
              You are controlling a robot cat. \\\
              Attached, you will find four susequent images taken live just now. They are shot from a webcam mounted on the head of the robot. \\\
              \\\
              The robot cat has four legs, each with three servo motors. \\\
Each step is about 2 centimeters.\\\
I am the person you see in the image.\\\
Write a program to do this:\\\
"+voice_command+"\\\
Consider that your code will be directly run by the Raspberry Pi that is in control of the servo motors of the robot cat.\\\
For this reason, you must be very precise and do not include any comment. Just produce the code.\\\
\\\
Here you can find an example of the Python code that moves the robot:\\\
\\\
\\\
from Control import *\\\
\\\
#Creating object 'control' of 'Control' class.\\\
control=Control()\\\
\\\
#Using the forWard function, let the robot dog move forward five steps.\\\
for i in range(5):\\\
	control.forWard()\\\
control.stop()\\\
\\\
#Using the backWard function, let the robot dog move backWard five steps.\\\
for i in range(5):\\\
	control.backWard()\\\
control.stop()\\\
\\\
#Using the turnLeft function, let the robot dog move left five steps.\\\
for i in range(5):\\\
	control.turnLeft()\\\
control.stop()\\\
\\\
#Using the turnRight function, let the robot dog move right five steps.\\\
for i in range(5):\\\
	control.turnRight()\\\
control.stop()\\\
\\\
#Turn the robot dog's body 10 degrees to the right\\\
for i in range(10):\\\
	control.attitude(0,0,i)\\\
	time.sleep(0.1)\\\
\\\
#Turn the robot dog's body 20 degrees to the left	\\\
for i in range(10,-10,-1):\\\
	control.attitude(0,0,i)\\\
	time.sleep(0.1)\\\
\\\
#Get the distance from the nearest front obstacle\\\
distance=ultrasonic.getDistance()   #Get the value\\\
	\\\
	\\\
              \\\
              \\\
              \\\
              \\\
              "
              },
          {
            "type": "image_url",
            "image_url": {
              "url": f"data:image/jpeg;base64,{base64_image}"            },
          },
        ],
      }
    ],
    max_tokens=300,
  )
print("===")
print(response.choices[0].message.content)
print("===")
code=response.choices[0].message.content
lines=code.splitlines()
lines = lines[1:-1] #remove the first and the last lines
code2run = "\n".join(lines)
exec(code2run)