diff libervia/web/pages/calls/_browser/webrtc.py @ 1602:6feac4a25e60

browser: Remote Control implementation: - Add `cbor-x` JS dependency. - In "Call" page, a Remote Control session can now be started. This is done by clicking on a search item 3 dots menu. Libervia Web will act as a controlling device. The call box is then adapted, and mouse/wheel and keyboard events are sent to remote, touch events are converted to mouse one. - Some Brython 3.12* related changes. rel 436
author Goffi <goffi@goffi.org>
date Sat, 11 May 2024 14:02:22 +0200
parents 0a4433a343a3
children 4a9679369856
line wrap: on
line diff
--- a/libervia/web/pages/calls/_browser/webrtc.py	Sat May 11 13:57:49 2024 +0200
+++ b/libervia/web/pages/calls/_browser/webrtc.py	Sat May 11 14:02:22 2024 +0200
@@ -2,10 +2,11 @@
 import re
 
 from bridge import AsyncBridge as Bridge
-from browser import aio, console as log, document, window
+from browser import aio, console as log, document, window, DOMNode
 import dialog
-from javascript import JSObject
+from javascript import JSObject, NULL
 import jid
+from js_modules.cbor_x import CBOR
 
 log.warning = log.warn
 profile = window.profile or ""
@@ -89,6 +90,189 @@
         aio.run(bridge.call_end(self.session_id, ""))
 
 
+class RemoteControler:
+    """Send input events to controlled device"""
+
+    def __init__(
+        self,
+        session_id: str,
+        capture_elt: DOMNode,
+        data_channel: JSObject,
+        simulate_mouse: bool = True
+    ) -> None:
+        """Initialize a RemoteControler instance.
+
+        @param session_id: ID of the Jingle Session
+        @param capture_elt: element where the input events are captured.
+        @param data_channel: WebRTCDataChannel instance linking to controlled device.
+        @simulate_mouse: if True, touch event will be converted to mouse events.
+        """
+        self.session_id = session_id
+        self.capture_elt = capture_elt
+        self.capture_elt.bind("click", self._on_capture_elt_click)
+        self.data_channel = data_channel
+        data_channel.bind("open", self._on_open)
+        self.simulate_mouse = simulate_mouse
+        self.last_mouse_position = (0, 0)
+
+    def _on_capture_elt_click(self, __):
+        self.capture_elt.focus()
+
+    def _on_open(self, __):
+        log.info(f"Data channel open, starting to send inputs.")
+        self.start_capture()
+
+    def start_capture(self) -> None:
+        """Start capturing input events to send them to the controlled device."""
+        for event_name in [
+                "mousedown",
+                "mouseup",
+                "mousemove",
+                "keydown",
+                "keyup",
+                "touchstart",
+                "touchend",
+                "touchmove",
+                "wheel",
+        ]:
+            self.capture_elt.bind(event_name, self._send_event)
+        self.capture_elt.focus()
+
+    def get_stream_coordinates(self, client_x: float, client_y: float) -> tuple[float, float]:
+        """Calculate coordinates relative to the actual video stream.
+
+        This method calculates the coordinates relative to the video content inside the <video>
+        element, considering any scaling or letterboxing due to aspect ratio differences.
+
+        @param client_x: The clientX value from the event, relative to the viewport.
+        @param client_y: The clientY value from the event, relative to the viewport.
+        @return: The x and y coordinates relative to the actual video stream.
+        """
+        video_element = self.capture_elt
+        video_rect = video_element.getBoundingClientRect()
+
+        # Calculate offsets relative to the capture element
+        element_x = client_x - video_rect.left
+        element_y = client_y - video_rect.top
+
+        element_width, element_height = video_rect.width, video_rect.height
+        stream_width, stream_height = video_element.videoWidth, video_element.videoHeight
+
+        if not all((element_width, element_height, stream_width, stream_height)):
+            log.warning("Invalid dimensions for video or element, using offsets.")
+            return element_x, element_y
+
+        element_aspect_ratio = element_width / element_height
+        stream_aspect_ratio = stream_width / stream_height
+
+        # Calculate scale and offset based on aspect ratio comparison
+        if stream_aspect_ratio > element_aspect_ratio:
+            # Video is more "wide" than the element: letterboxes will be top and bottom
+            scale = element_width / stream_width
+            scaled_height = stream_height * scale
+            offset_x, offset_y = 0, (element_height - scaled_height) / 2
+        else:
+            # Video is more "tall" than the element: letterboxes will be on the sides
+            scale = element_height / stream_height
+            scaled_width = stream_width * scale
+            offset_x, offset_y = (element_width - scaled_width) / 2, 0
+
+        # Mouse coordinates relative to the video stream
+        x = (element_x - offset_x) / scale
+        y = (element_y - offset_y) / scale
+
+        # Ensure the coordinates are within the bounds of the video stream
+        x = max(0.0, min(x, stream_width))
+        y = max(0.0, min(y, stream_height))
+
+        return x, y
+
+    def _send_event(self, event: JSObject) -> None:
+        """Serialize and send the event to the controlled device through the data channel."""
+        event.preventDefault()
+        serialized_event = self._serialize_event(event)
+        # TODO: we should join events instead
+        self.data_channel.send(CBOR.encode(serialized_event))
+
+    def _serialize_event(self, event: JSObject) -> dict[str, object]:
+        """Serialize event data for transmission.
+
+        @param event: an input event.
+        @return: event data to send to peer.
+        """
+        if event.type.startswith("key"):
+            ret = {
+                "type": event.type,
+                "key": event.key,
+            }
+            if event.location:
+                ret["location"] = event.location
+            return ret
+        elif event.type.startswith("mouse"):
+            x, y = self.get_stream_coordinates(event.clientX, event.clientY)
+            return {
+                "type": event.type,
+                "buttons": event.buttons,
+                "x": x,
+                "y": y,
+            }
+        elif event.type.startswith("touch"):
+            touches =  [
+                {
+                    "identifier": touch.identifier,
+                    **dict(zip(["x", "y"], self.get_stream_coordinates(
+                        touch.clientX,
+                        touch.clientY
+                    ))),
+                }
+                for touch in event.touches
+            ]
+            if self.simulate_mouse:
+                # In simulate mouse mode, we send mouse events.
+                if touches:
+                    touch_data = touches[0]
+                    x, y = touch_data["x"], touch_data["y"]
+                    self.last_mouse_position = (x, y)
+                else:
+                    x, y = self.last_mouse_position
+
+                mouse_event: dict[str, object] = {
+                    "x": x,
+                    "y": y,
+                }
+                if event.type == "touchstart":
+                    mouse_event.update({
+                        "type": "mousedown",
+                        "buttons": 1,
+                    })
+                elif event.type == "touchend":
+                    mouse_event.update({
+                        "type": "mouseup",
+                        "buttons": 1,
+                    })
+                elif event.type == "touchmove":
+                    mouse_event.update({
+                        "type": "mousemove",
+                    })
+                return mouse_event
+            else:
+                # Normal mode, with send touch events.
+                return {
+                    "type": event.type,
+                    "touches": touches
+                }
+        elif event.type == "wheel":
+            return {
+                "type": event.type,
+                "deltaX": event.deltaX,
+                "deltaY": event.deltaY,
+                "deltaZ": event.deltaZ,
+                "deltaMode": event.deltaMode,
+            }
+        else:
+            raise Exception(f"Internal Error: unexpected event {event.type=}")
+
+
 class WebRTC:
 
     def __init__(
@@ -100,12 +284,13 @@
         on_video_devices=None,
         on_reset_cb=None,
         file_only: bool = False,
-        extra_data: dict|None = None
+        extra_data: dict | None = None,
     ):
         """Initialise WebRTC instance.
 
         @param screen_sharing_cb: callable function for screen sharing event
-        @param on_connection_established_cb: callable function for connection established event
+        @param on_connection_established_cb: callable function for connection established
+            event
         @param on_reconnect_cb: called when a reconnection is triggered.
         @param on_connection_lost_cb: called when the connection is lost.
         @param on_video_devices: called when new video devices are set.
@@ -376,7 +561,8 @@
                 self.on_connection_established_cb()
         elif state == "failed":
             log.error(
-                "ICE connection failed. Check network connectivity and ICE configurations."
+                "ICE connection failed. Check network connectivity and ICE"
+                " configurations."
             )
         elif state == "disconnected":
             log.warning("ICE connection was disconnected.")
@@ -431,9 +617,9 @@
             if server["type"] == "stun":
                 ice_server["urls"] = f"stun:{server['host']}:{server['port']}"
             elif server["type"] == "turn":
-                ice_server[
-                    "urls"
-                ] = f"turn:{server['host']}:{server['port']}?transport={server['transport']}"
+                ice_server["urls"] = (
+                    f"turn:{server['host']}:{server['port']}?transport={server['transport']}"
+                )
                 ice_server["username"] = server["username"]
                 ice_server["credential"] = server["password"]
             ice_servers.append(ice_server)
@@ -496,9 +682,9 @@
                         track.stop()
 
             media_constraints = {
-                "video": {"deviceId": self.current_camera}
-                if self.current_camera
-                else True
+                "video": (
+                    {"deviceId": self.current_camera} if self.current_camera else True
+                )
             }
 
             new_stream = await window.navigator.mediaDevices.getUserMedia(
@@ -540,8 +726,8 @@
             await video_sender.replaceTrack(new_video_tracks[0])
 
             if screen:
-                # For screen sharing, we track the end event to properly stop the sharing when
-                # the user clicks on the browser's stop sharing dialog.
+                # For screen sharing, we track the end event to properly stop the sharing
+                # when the user clicks on the browser's stop sharing dialog.
                 def on_track_ended(event):
                     aio.run(self.toggle_screen_sharing())
 
@@ -687,18 +873,17 @@
         """
         log.debug(f"new peer candidates received: {candidates}")
 
-        try:
-            # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+
-            remoteDescription_is_none = self._peer_connection.remoteDescription is None
-        except Exception as e:
-            # FIXME: should be fine in Brython 3.12.3+
-            log.debug("Workaround for Brython bug activated.")
-            remoteDescription_is_none = True
+        # try:
+        #     # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+
+        #     remoteDescription_is_none = self._peer_connection.remoteDescription is None
+        # except Exception as e:
+        #     # FIXME: should be fine in Brython 3.12.3+
+        #     log.debug("Workaround for Brython bug activated.")
+        #     remoteDescription_is_none = True
 
         if (
             self._peer_connection is None
-            # or self._peer_connection.remoteDescription is NULL
-            or remoteDescription_is_none
+            or self._peer_connection.remoteDescription is NULL
         ):
             for media_type in ("audio", "video", "application"):
                 media_candidates = candidates.get(media_type)
@@ -786,13 +971,11 @@
                 ice_data[media_type] = {
                     "ufrag": self.ufrag,
                     "pwd": self.pwd,
-                    "candidates": candidates
+                    "candidates": candidates,
                 }
             await bridge.ice_candidates_add(
                 self.sid,
-                json.dumps(
-                    ice_data
-                ),
+                json.dumps(ice_data),
             )
             self.local_candidates_buffer.clear()
 
@@ -811,6 +994,53 @@
         log.debug(f"Call SID: {self.sid}")
         await self._send_buffered_local_candidates()
 
+    async def start_remote_control(
+        self, callee_jid: jid.JID, audio: bool = True, video: bool = True
+    ) -> None:
+        """Starts a Remote Control session.
+
+        If both audio and video are False, no screenshare will be done, the input will be
+        sent without feedback.
+        @param audio: True if an audio flux is required
+        @param video: True if a video flux is required
+        """
+        if audio or not video:
+            raise NotImplementedError("Only video screenshare is supported for now.")
+        peer_connection = await self._create_peer_connection()
+        if video:
+            peer_connection.addTransceiver("video", {"direction": "recvonly"})
+        data_channel = peer_connection.createDataChannel("remote-control")
+
+        call_data = await self._get_call_data()
+
+        try:
+            remote_control_data = json.loads(
+                await bridge.remote_control_start(
+                    str(callee_jid),
+                    json.dumps(
+                        {
+                            "devices": {
+                                "keyboard": {},
+                                "mouse": {},
+                                "wheel": {}
+                            },
+                            "call_data": call_data,
+                        }
+                    ),
+                )
+            )
+        except Exception as e:
+            dialog.notification.show(f"Can't start remote control: {e}", level="error")
+            return
+
+        self.sid = remote_control_data["session_id"]
+
+        log.debug(f"Remote Control SID: {self.sid}")
+        await self._send_buffered_local_candidates()
+        self.remote_controller = RemoteControler(
+            self.sid, self.remote_video_elt, data_channel
+        )
+
     def _on_opened_data_channel(self, event):
         log.info("Datachannel has been opened.")
 
@@ -820,28 +1050,22 @@
         data_channel = peer_connection.createDataChannel("file")
         call_data = await self._get_call_data()
         log.info(f"sending file to {callee_jid!r}")
-        file_meta = {
-            "size": file.size
-        }
+        file_meta = {"size": file.size}
         if file.type:
             file_meta["media_type"] = file.type
 
         try:
-            file_data = json.loads(await bridge.file_jingle_send(
+            file_data = json.loads(
+                await bridge.file_jingle_send(
                     str(callee_jid),
                     "",
                     file.name,
                     "",
-                    json.dumps({
-                        "webrtc": True,
-                        "call_data": call_data,
-                        **file_meta
-                    })
-            ))
+                    json.dumps({"webrtc": True, "call_data": call_data, **file_meta}),
+                )
+            )
         except Exception as e:
-            dialog.notification.show(
-                f"Can't send file: {e}", level="error"
-            )
+            dialog.notification.show(f"Can't send file: {e}", level="error")
             return
 
         self.sid = file_data["session_id"]