changeset 1602:6feac4a25e60

browser: Remote Control implementation: - Add `cbor-x` JS dependency. - In "Call" page, a Remote Control session can now be started. This is done by clicking on a search item 3 dots menu. Libervia Web will act as a controlling device. The call box is then adapted, and mouse/wheel and keyboard events are sent to remote, touch events are converted to mouse one. - Some Brython 3.12* related changes. rel 436
author Goffi <goffi@goffi.org>
date Sat, 11 May 2024 14:02:22 +0200
parents d07838fc9d99
children e105d7719479
files libervia/web/pages/_browser/browser_meta.json libervia/web/pages/calls/_browser/__init__.py libervia/web/pages/calls/_browser/webrtc.py libervia/web/server/restricted_bridge.py
diffstat 4 files changed, 338 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/libervia/web/pages/_browser/browser_meta.json	Sat May 11 13:57:49 2024 +0200
+++ b/libervia/web/pages/_browser/browser_meta.json	Sat May 11 14:02:22 2024 +0200
@@ -7,7 +7,8 @@
                 "moment": "^2.29.4",
                 "ogv": "^1.8.9",
                 "tippy.js": "^6.3",
-                "emoji-picker-element": "^1.18"
+                "emoji-picker-element": "^1.18",
+                "cbor-x": "^1.5.9"
             }
         },
         "brython_map": {
@@ -33,6 +34,10 @@
                 "import_type": "module",
                 "path": "emoji-picker-element/picker.js",
                 "export": []
+            },
+            "cbor-x": {
+                "path": "cbor-x/dist/index.min.js",
+                "export": ["CBOR"]
             }
         }
     }
--- a/libervia/web/pages/calls/_browser/__init__.py	Sat May 11 13:57:49 2024 +0200
+++ b/libervia/web/pages/calls/_browser/__init__.py	Sat May 11 14:02:22 2024 +0200
@@ -4,7 +4,7 @@
 from browser import aio, console as log, document, window
 from cache import cache
 import dialog
-from javascript import JSObject
+from javascript import JSObject, NULL
 from jid import JID
 from jid_search import JidSearch
 import loading
@@ -27,7 +27,8 @@
 )
 AUDIO = "audio"
 VIDEO = "video"
-ALLOWED_CALL_MODES = {AUDIO, VIDEO}
+REMOTE = "remote-control"
+ALLOWED_CALL_MODES = {AUDIO, VIDEO, REMOTE}
 INACTIVE_CLASS = "inactive"
 MUTED_CLASS = "muted"
 SCREEN_OFF_CLASS = "screen-off"
@@ -107,6 +108,9 @@
                     ".click-to-audio": lambda evt, item: aio.run(
                         self.on_entity_action(evt, AUDIO, item)
                     ),
+                    ".click-to-remote-control": lambda evt, item: aio.run(
+                        self.on_entity_action(evt, REMOTE, item)
+                    ),
                 },
             },
         )
@@ -155,13 +159,28 @@
         if mode in ALLOWED_CALL_MODES:
             if self._call_mode == mode:
                 return
+            log.debug("Switching to {mode} call mode.")
             self._call_mode = mode
-            with_video = mode == VIDEO
-            for elt in self.call_box_elt.select(".is-video-only"):
-                if with_video:
+            selector = ".is-video-only, .is-not-remote"
+            for elt in self.call_box_elt.select(selector):
+                if mode == VIDEO:
+                    # In video, all elements are visible.
                     elt.classList.remove("is-hidden")
+                elif mode == AUDIO:
+                    # In audio, we hide video-only elements.
+                    if elt.classList.contains("is-video-only"):
+                        elt.classList.add("is-hidden")
+                    else:
+                        elt.classList.remove("is-hidden")
+                elif mode == REMOTE:
+                    # In remote, we show all video element, except if they are
+                    # `is-not-remote`
+                    if elt.classList.contains("is-not-remote"):
+                        elt.classList.add("is-hidden")
+                    else:
+                        elt.classList.remove("is-hidden")
                 else:
-                    elt.classList.add("is-hidden")
+                    raise Exception("This line should never be reached.")
         else:
             raise ValueError("Invalid call mode")
 
@@ -190,7 +209,10 @@
         @param profile: Profile associated with the action
         """
         action_data = json.loads(action_data_s)
-        if action_data.get("type") == "confirm" and action_data.get("subtype") == "file":
+        if (
+            action_data.get("type") in ("confirm", "not_in_roster_leak")
+            and action_data.get("subtype") == "file"
+        ):
             aio.run(self.on_file_preflight(action_data, action_id))
         elif action_data.get("type") == "file":
             aio.run(self.on_file_proposal(action_data, action_id))
@@ -286,7 +308,7 @@
             # TODO: Check if any other frontend is connected for this profile, and refuse
             # the file if none is.
             return
-        if action_data.get("file_accepted", False):
+        if action_data.get("pre_accepted", False):
             # File proposal has already been accepted in preflight.
             accepted = True
         else:
@@ -472,13 +494,24 @@
             btn_elt.classList.remove(INACTIVE_CLASS, MUTED_CLASS, "is-warning")
             btn_elt.classList.add("is-success")
 
-    async def make_call(self, audio: bool = True, video: bool = True) -> None:
+    async def make_call(
+        self,
+        audio: bool = True,
+        video: bool = True,
+        remote: bool = False
+    ) -> None:
         """Start a WebRTC call
 
         @param audio: True if an audio flux is required
         @param video: True if a video flux is required
+        @param remote: True if this is a Remote Control session.
         """
-        self.call_mode = VIDEO if video else AUDIO
+        if remote:
+            self.call_mode = REMOTE
+        elif video:
+            self.call_mode = VIDEO
+        else:
+            self.call_mode = AUDIO
         try:
             callee_jid = JID(self.search_elt.value.strip())
             if not callee_jid.is_valid:
@@ -495,7 +528,12 @@
         self.set_avatar(callee_jid)
 
         self.switch_mode("call")
-        await self.webrtc.make_call(callee_jid, audio, video)
+        if remote:
+            await self.webrtc.start_remote_control(
+                callee_jid, audio, video
+            )
+        else:
+            await self.webrtc.make_call(callee_jid, audio, video)
 
     async def end_call(self, data: dict) -> None:
         """Stop streaming and clean instance"""
@@ -612,18 +650,17 @@
         @param fullscreen: if set, determine the fullscreen state; otherwise,
             the fullscreen mode will be toggled.
         """
-        do_fullscreen = (
-            document.fullscreenElement is None if fullscreen is None else fullscreen
-        )
+        if fullscreen is None:
+            fullscreen = document.fullscreenElement is NULL
 
         try:
-            if do_fullscreen:
-                if document.fullscreenElement is None:
+            if fullscreen:
+                if document.fullscreenElement is NULL:
                     self.call_box_elt.requestFullscreen()
                     document["full_screen_btn"].classList.add("is-hidden")
                     document["exit_full_screen_btn"].classList.remove("is-hidden")
             else:
-                if document.fullscreenElement is not None:
+                if document.fullscreenElement is not NULL:
                     document.exitFullscreen()
                     document["full_screen_btn"].classList.remove("is-hidden")
                     document["exit_full_screen_btn"].classList.add("is-hidden")
@@ -711,11 +748,15 @@
         evt.stopPropagation()
         if action == "menu":
             evt.currentTarget.parent.classList.toggle("is-active")
-        elif action in (VIDEO, AUDIO):
+        elif action in (VIDEO, AUDIO, REMOTE):
             self.search_elt.value = item["entity"]
             # we want the dropdown to be inactive
             evt.currentTarget.closest(".dropdown").classList.remove("is-active")
-            await self.make_call(video=action == VIDEO)
+            if action == REMOTE:
+                await self.make_call(audio=False, video=True, remote=True)
+
+            else:
+                await self.make_call(video=action == VIDEO)
 
 
 CallUI()
--- a/libervia/web/pages/calls/_browser/webrtc.py	Sat May 11 13:57:49 2024 +0200
+++ b/libervia/web/pages/calls/_browser/webrtc.py	Sat May 11 14:02:22 2024 +0200
@@ -2,10 +2,11 @@
 import re
 
 from bridge import AsyncBridge as Bridge
-from browser import aio, console as log, document, window
+from browser import aio, console as log, document, window, DOMNode
 import dialog
-from javascript import JSObject
+from javascript import JSObject, NULL
 import jid
+from js_modules.cbor_x import CBOR
 
 log.warning = log.warn
 profile = window.profile or ""
@@ -89,6 +90,189 @@
         aio.run(bridge.call_end(self.session_id, ""))
 
 
+class RemoteControler:
+    """Send input events to controlled device"""
+
+    def __init__(
+        self,
+        session_id: str,
+        capture_elt: DOMNode,
+        data_channel: JSObject,
+        simulate_mouse: bool = True
+    ) -> None:
+        """Initialize a RemoteControler instance.
+
+        @param session_id: ID of the Jingle Session
+        @param capture_elt: element where the input events are captured.
+        @param data_channel: WebRTCDataChannel instance linking to controlled device.
+        @simulate_mouse: if True, touch event will be converted to mouse events.
+        """
+        self.session_id = session_id
+        self.capture_elt = capture_elt
+        self.capture_elt.bind("click", self._on_capture_elt_click)
+        self.data_channel = data_channel
+        data_channel.bind("open", self._on_open)
+        self.simulate_mouse = simulate_mouse
+        self.last_mouse_position = (0, 0)
+
+    def _on_capture_elt_click(self, __):
+        self.capture_elt.focus()
+
+    def _on_open(self, __):
+        log.info(f"Data channel open, starting to send inputs.")
+        self.start_capture()
+
+    def start_capture(self) -> None:
+        """Start capturing input events to send them to the controlled device."""
+        for event_name in [
+                "mousedown",
+                "mouseup",
+                "mousemove",
+                "keydown",
+                "keyup",
+                "touchstart",
+                "touchend",
+                "touchmove",
+                "wheel",
+        ]:
+            self.capture_elt.bind(event_name, self._send_event)
+        self.capture_elt.focus()
+
+    def get_stream_coordinates(self, client_x: float, client_y: float) -> tuple[float, float]:
+        """Calculate coordinates relative to the actual video stream.
+
+        This method calculates the coordinates relative to the video content inside the <video>
+        element, considering any scaling or letterboxing due to aspect ratio differences.
+
+        @param client_x: The clientX value from the event, relative to the viewport.
+        @param client_y: The clientY value from the event, relative to the viewport.
+        @return: The x and y coordinates relative to the actual video stream.
+        """
+        video_element = self.capture_elt
+        video_rect = video_element.getBoundingClientRect()
+
+        # Calculate offsets relative to the capture element
+        element_x = client_x - video_rect.left
+        element_y = client_y - video_rect.top
+
+        element_width, element_height = video_rect.width, video_rect.height
+        stream_width, stream_height = video_element.videoWidth, video_element.videoHeight
+
+        if not all((element_width, element_height, stream_width, stream_height)):
+            log.warning("Invalid dimensions for video or element, using offsets.")
+            return element_x, element_y
+
+        element_aspect_ratio = element_width / element_height
+        stream_aspect_ratio = stream_width / stream_height
+
+        # Calculate scale and offset based on aspect ratio comparison
+        if stream_aspect_ratio > element_aspect_ratio:
+            # Video is more "wide" than the element: letterboxes will be top and bottom
+            scale = element_width / stream_width
+            scaled_height = stream_height * scale
+            offset_x, offset_y = 0, (element_height - scaled_height) / 2
+        else:
+            # Video is more "tall" than the element: letterboxes will be on the sides
+            scale = element_height / stream_height
+            scaled_width = stream_width * scale
+            offset_x, offset_y = (element_width - scaled_width) / 2, 0
+
+        # Mouse coordinates relative to the video stream
+        x = (element_x - offset_x) / scale
+        y = (element_y - offset_y) / scale
+
+        # Ensure the coordinates are within the bounds of the video stream
+        x = max(0.0, min(x, stream_width))
+        y = max(0.0, min(y, stream_height))
+
+        return x, y
+
+    def _send_event(self, event: JSObject) -> None:
+        """Serialize and send the event to the controlled device through the data channel."""
+        event.preventDefault()
+        serialized_event = self._serialize_event(event)
+        # TODO: we should join events instead
+        self.data_channel.send(CBOR.encode(serialized_event))
+
+    def _serialize_event(self, event: JSObject) -> dict[str, object]:
+        """Serialize event data for transmission.
+
+        @param event: an input event.
+        @return: event data to send to peer.
+        """
+        if event.type.startswith("key"):
+            ret = {
+                "type": event.type,
+                "key": event.key,
+            }
+            if event.location:
+                ret["location"] = event.location
+            return ret
+        elif event.type.startswith("mouse"):
+            x, y = self.get_stream_coordinates(event.clientX, event.clientY)
+            return {
+                "type": event.type,
+                "buttons": event.buttons,
+                "x": x,
+                "y": y,
+            }
+        elif event.type.startswith("touch"):
+            touches =  [
+                {
+                    "identifier": touch.identifier,
+                    **dict(zip(["x", "y"], self.get_stream_coordinates(
+                        touch.clientX,
+                        touch.clientY
+                    ))),
+                }
+                for touch in event.touches
+            ]
+            if self.simulate_mouse:
+                # In simulate mouse mode, we send mouse events.
+                if touches:
+                    touch_data = touches[0]
+                    x, y = touch_data["x"], touch_data["y"]
+                    self.last_mouse_position = (x, y)
+                else:
+                    x, y = self.last_mouse_position
+
+                mouse_event: dict[str, object] = {
+                    "x": x,
+                    "y": y,
+                }
+                if event.type == "touchstart":
+                    mouse_event.update({
+                        "type": "mousedown",
+                        "buttons": 1,
+                    })
+                elif event.type == "touchend":
+                    mouse_event.update({
+                        "type": "mouseup",
+                        "buttons": 1,
+                    })
+                elif event.type == "touchmove":
+                    mouse_event.update({
+                        "type": "mousemove",
+                    })
+                return mouse_event
+            else:
+                # Normal mode, with send touch events.
+                return {
+                    "type": event.type,
+                    "touches": touches
+                }
+        elif event.type == "wheel":
+            return {
+                "type": event.type,
+                "deltaX": event.deltaX,
+                "deltaY": event.deltaY,
+                "deltaZ": event.deltaZ,
+                "deltaMode": event.deltaMode,
+            }
+        else:
+            raise Exception(f"Internal Error: unexpected event {event.type=}")
+
+
 class WebRTC:
 
     def __init__(
@@ -100,12 +284,13 @@
         on_video_devices=None,
         on_reset_cb=None,
         file_only: bool = False,
-        extra_data: dict|None = None
+        extra_data: dict | None = None,
     ):
         """Initialise WebRTC instance.
 
         @param screen_sharing_cb: callable function for screen sharing event
-        @param on_connection_established_cb: callable function for connection established event
+        @param on_connection_established_cb: callable function for connection established
+            event
         @param on_reconnect_cb: called when a reconnection is triggered.
         @param on_connection_lost_cb: called when the connection is lost.
         @param on_video_devices: called when new video devices are set.
@@ -376,7 +561,8 @@
                 self.on_connection_established_cb()
         elif state == "failed":
             log.error(
-                "ICE connection failed. Check network connectivity and ICE configurations."
+                "ICE connection failed. Check network connectivity and ICE"
+                " configurations."
             )
         elif state == "disconnected":
             log.warning("ICE connection was disconnected.")
@@ -431,9 +617,9 @@
             if server["type"] == "stun":
                 ice_server["urls"] = f"stun:{server['host']}:{server['port']}"
             elif server["type"] == "turn":
-                ice_server[
-                    "urls"
-                ] = f"turn:{server['host']}:{server['port']}?transport={server['transport']}"
+                ice_server["urls"] = (
+                    f"turn:{server['host']}:{server['port']}?transport={server['transport']}"
+                )
                 ice_server["username"] = server["username"]
                 ice_server["credential"] = server["password"]
             ice_servers.append(ice_server)
@@ -496,9 +682,9 @@
                         track.stop()
 
             media_constraints = {
-                "video": {"deviceId": self.current_camera}
-                if self.current_camera
-                else True
+                "video": (
+                    {"deviceId": self.current_camera} if self.current_camera else True
+                )
             }
 
             new_stream = await window.navigator.mediaDevices.getUserMedia(
@@ -540,8 +726,8 @@
             await video_sender.replaceTrack(new_video_tracks[0])
 
             if screen:
-                # For screen sharing, we track the end event to properly stop the sharing when
-                # the user clicks on the browser's stop sharing dialog.
+                # For screen sharing, we track the end event to properly stop the sharing
+                # when the user clicks on the browser's stop sharing dialog.
                 def on_track_ended(event):
                     aio.run(self.toggle_screen_sharing())
 
@@ -687,18 +873,17 @@
         """
         log.debug(f"new peer candidates received: {candidates}")
 
-        try:
-            # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+
-            remoteDescription_is_none = self._peer_connection.remoteDescription is None
-        except Exception as e:
-            # FIXME: should be fine in Brython 3.12.3+
-            log.debug("Workaround for Brython bug activated.")
-            remoteDescription_is_none = True
+        # try:
+        #     # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+
+        #     remoteDescription_is_none = self._peer_connection.remoteDescription is None
+        # except Exception as e:
+        #     # FIXME: should be fine in Brython 3.12.3+
+        #     log.debug("Workaround for Brython bug activated.")
+        #     remoteDescription_is_none = True
 
         if (
             self._peer_connection is None
-            # or self._peer_connection.remoteDescription is NULL
-            or remoteDescription_is_none
+            or self._peer_connection.remoteDescription is NULL
         ):
             for media_type in ("audio", "video", "application"):
                 media_candidates = candidates.get(media_type)
@@ -786,13 +971,11 @@
                 ice_data[media_type] = {
                     "ufrag": self.ufrag,
                     "pwd": self.pwd,
-                    "candidates": candidates
+                    "candidates": candidates,
                 }
             await bridge.ice_candidates_add(
                 self.sid,
-                json.dumps(
-                    ice_data
-                ),
+                json.dumps(ice_data),
             )
             self.local_candidates_buffer.clear()
 
@@ -811,6 +994,53 @@
         log.debug(f"Call SID: {self.sid}")
         await self._send_buffered_local_candidates()
 
+    async def start_remote_control(
+        self, callee_jid: jid.JID, audio: bool = True, video: bool = True
+    ) -> None:
+        """Starts a Remote Control session.
+
+        If both audio and video are False, no screenshare will be done, the input will be
+        sent without feedback.
+        @param audio: True if an audio flux is required
+        @param video: True if a video flux is required
+        """
+        if audio or not video:
+            raise NotImplementedError("Only video screenshare is supported for now.")
+        peer_connection = await self._create_peer_connection()
+        if video:
+            peer_connection.addTransceiver("video", {"direction": "recvonly"})
+        data_channel = peer_connection.createDataChannel("remote-control")
+
+        call_data = await self._get_call_data()
+
+        try:
+            remote_control_data = json.loads(
+                await bridge.remote_control_start(
+                    str(callee_jid),
+                    json.dumps(
+                        {
+                            "devices": {
+                                "keyboard": {},
+                                "mouse": {},
+                                "wheel": {}
+                            },
+                            "call_data": call_data,
+                        }
+                    ),
+                )
+            )
+        except Exception as e:
+            dialog.notification.show(f"Can't start remote control: {e}", level="error")
+            return
+
+        self.sid = remote_control_data["session_id"]
+
+        log.debug(f"Remote Control SID: {self.sid}")
+        await self._send_buffered_local_candidates()
+        self.remote_controller = RemoteControler(
+            self.sid, self.remote_video_elt, data_channel
+        )
+
     def _on_opened_data_channel(self, event):
         log.info("Datachannel has been opened.")
 
@@ -820,28 +1050,22 @@
         data_channel = peer_connection.createDataChannel("file")
         call_data = await self._get_call_data()
         log.info(f"sending file to {callee_jid!r}")
-        file_meta = {
-            "size": file.size
-        }
+        file_meta = {"size": file.size}
         if file.type:
             file_meta["media_type"] = file.type
 
         try:
-            file_data = json.loads(await bridge.file_jingle_send(
+            file_data = json.loads(
+                await bridge.file_jingle_send(
                     str(callee_jid),
                     "",
                     file.name,
                     "",
-                    json.dumps({
-                        "webrtc": True,
-                        "call_data": call_data,
-                        **file_meta
-                    })
-            ))
+                    json.dumps({"webrtc": True, "call_data": call_data, **file_meta}),
+                )
+            )
         except Exception as e:
-            dialog.notification.show(
-                f"Can't send file: {e}", level="error"
-            )
+            dialog.notification.show(f"Can't send file: {e}", level="error")
             return
 
         self.sid = file_data["session_id"]
--- a/libervia/web/server/restricted_bridge.py	Sat May 11 13:57:49 2024 +0200
+++ b/libervia/web/server/restricted_bridge.py	Sat May 11 14:02:22 2024 +0200
@@ -318,3 +318,12 @@
         return await self.host.bridge_call(
             "jid_search", search_term, options_s, profile
         )
+
+    async def remote_control_start(
+        self, peer_jid_s: str, extra_s: str, profile: str
+    ) -> None:
+        self.no_service_profile(profile)
+        return await self.host.bridge_call(
+            "remote_control_start", peer_jid_s, extra_s, profile
+        )
+