Mercurial > libervia-web
diff libervia/web/pages/calls/_browser/webrtc.py @ 1602:6feac4a25e60
browser: Remote Control implementation:
- Add `cbor-x` JS dependency.
- In "Call" page, a Remote Control session can now be started. This is done by clicking on
a search item 3 dots menu. Libervia Web will act as a controlling device. The call box
is then adapted, and mouse/wheel and keyboard events are sent to remote, touch events
are converted to mouse one.
- Some Brython 3.12* related changes.
rel 436
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 11 May 2024 14:02:22 +0200 |
parents | 0a4433a343a3 |
children | 4a9679369856 |
line wrap: on
line diff
--- a/libervia/web/pages/calls/_browser/webrtc.py Sat May 11 13:57:49 2024 +0200 +++ b/libervia/web/pages/calls/_browser/webrtc.py Sat May 11 14:02:22 2024 +0200 @@ -2,10 +2,11 @@ import re from bridge import AsyncBridge as Bridge -from browser import aio, console as log, document, window +from browser import aio, console as log, document, window, DOMNode import dialog -from javascript import JSObject +from javascript import JSObject, NULL import jid +from js_modules.cbor_x import CBOR log.warning = log.warn profile = window.profile or "" @@ -89,6 +90,189 @@ aio.run(bridge.call_end(self.session_id, "")) +class RemoteControler: + """Send input events to controlled device""" + + def __init__( + self, + session_id: str, + capture_elt: DOMNode, + data_channel: JSObject, + simulate_mouse: bool = True + ) -> None: + """Initialize a RemoteControler instance. + + @param session_id: ID of the Jingle Session + @param capture_elt: element where the input events are captured. + @param data_channel: WebRTCDataChannel instance linking to controlled device. + @simulate_mouse: if True, touch event will be converted to mouse events. + """ + self.session_id = session_id + self.capture_elt = capture_elt + self.capture_elt.bind("click", self._on_capture_elt_click) + self.data_channel = data_channel + data_channel.bind("open", self._on_open) + self.simulate_mouse = simulate_mouse + self.last_mouse_position = (0, 0) + + def _on_capture_elt_click(self, __): + self.capture_elt.focus() + + def _on_open(self, __): + log.info(f"Data channel open, starting to send inputs.") + self.start_capture() + + def start_capture(self) -> None: + """Start capturing input events to send them to the controlled device.""" + for event_name in [ + "mousedown", + "mouseup", + "mousemove", + "keydown", + "keyup", + "touchstart", + "touchend", + "touchmove", + "wheel", + ]: + self.capture_elt.bind(event_name, self._send_event) + self.capture_elt.focus() + + def get_stream_coordinates(self, client_x: float, client_y: float) -> tuple[float, float]: + """Calculate coordinates relative to the actual video stream. + + This method calculates the coordinates relative to the video content inside the <video> + element, considering any scaling or letterboxing due to aspect ratio differences. + + @param client_x: The clientX value from the event, relative to the viewport. + @param client_y: The clientY value from the event, relative to the viewport. + @return: The x and y coordinates relative to the actual video stream. + """ + video_element = self.capture_elt + video_rect = video_element.getBoundingClientRect() + + # Calculate offsets relative to the capture element + element_x = client_x - video_rect.left + element_y = client_y - video_rect.top + + element_width, element_height = video_rect.width, video_rect.height + stream_width, stream_height = video_element.videoWidth, video_element.videoHeight + + if not all((element_width, element_height, stream_width, stream_height)): + log.warning("Invalid dimensions for video or element, using offsets.") + return element_x, element_y + + element_aspect_ratio = element_width / element_height + stream_aspect_ratio = stream_width / stream_height + + # Calculate scale and offset based on aspect ratio comparison + if stream_aspect_ratio > element_aspect_ratio: + # Video is more "wide" than the element: letterboxes will be top and bottom + scale = element_width / stream_width + scaled_height = stream_height * scale + offset_x, offset_y = 0, (element_height - scaled_height) / 2 + else: + # Video is more "tall" than the element: letterboxes will be on the sides + scale = element_height / stream_height + scaled_width = stream_width * scale + offset_x, offset_y = (element_width - scaled_width) / 2, 0 + + # Mouse coordinates relative to the video stream + x = (element_x - offset_x) / scale + y = (element_y - offset_y) / scale + + # Ensure the coordinates are within the bounds of the video stream + x = max(0.0, min(x, stream_width)) + y = max(0.0, min(y, stream_height)) + + return x, y + + def _send_event(self, event: JSObject) -> None: + """Serialize and send the event to the controlled device through the data channel.""" + event.preventDefault() + serialized_event = self._serialize_event(event) + # TODO: we should join events instead + self.data_channel.send(CBOR.encode(serialized_event)) + + def _serialize_event(self, event: JSObject) -> dict[str, object]: + """Serialize event data for transmission. + + @param event: an input event. + @return: event data to send to peer. + """ + if event.type.startswith("key"): + ret = { + "type": event.type, + "key": event.key, + } + if event.location: + ret["location"] = event.location + return ret + elif event.type.startswith("mouse"): + x, y = self.get_stream_coordinates(event.clientX, event.clientY) + return { + "type": event.type, + "buttons": event.buttons, + "x": x, + "y": y, + } + elif event.type.startswith("touch"): + touches = [ + { + "identifier": touch.identifier, + **dict(zip(["x", "y"], self.get_stream_coordinates( + touch.clientX, + touch.clientY + ))), + } + for touch in event.touches + ] + if self.simulate_mouse: + # In simulate mouse mode, we send mouse events. + if touches: + touch_data = touches[0] + x, y = touch_data["x"], touch_data["y"] + self.last_mouse_position = (x, y) + else: + x, y = self.last_mouse_position + + mouse_event: dict[str, object] = { + "x": x, + "y": y, + } + if event.type == "touchstart": + mouse_event.update({ + "type": "mousedown", + "buttons": 1, + }) + elif event.type == "touchend": + mouse_event.update({ + "type": "mouseup", + "buttons": 1, + }) + elif event.type == "touchmove": + mouse_event.update({ + "type": "mousemove", + }) + return mouse_event + else: + # Normal mode, with send touch events. + return { + "type": event.type, + "touches": touches + } + elif event.type == "wheel": + return { + "type": event.type, + "deltaX": event.deltaX, + "deltaY": event.deltaY, + "deltaZ": event.deltaZ, + "deltaMode": event.deltaMode, + } + else: + raise Exception(f"Internal Error: unexpected event {event.type=}") + + class WebRTC: def __init__( @@ -100,12 +284,13 @@ on_video_devices=None, on_reset_cb=None, file_only: bool = False, - extra_data: dict|None = None + extra_data: dict | None = None, ): """Initialise WebRTC instance. @param screen_sharing_cb: callable function for screen sharing event - @param on_connection_established_cb: callable function for connection established event + @param on_connection_established_cb: callable function for connection established + event @param on_reconnect_cb: called when a reconnection is triggered. @param on_connection_lost_cb: called when the connection is lost. @param on_video_devices: called when new video devices are set. @@ -376,7 +561,8 @@ self.on_connection_established_cb() elif state == "failed": log.error( - "ICE connection failed. Check network connectivity and ICE configurations." + "ICE connection failed. Check network connectivity and ICE" + " configurations." ) elif state == "disconnected": log.warning("ICE connection was disconnected.") @@ -431,9 +617,9 @@ if server["type"] == "stun": ice_server["urls"] = f"stun:{server['host']}:{server['port']}" elif server["type"] == "turn": - ice_server[ - "urls" - ] = f"turn:{server['host']}:{server['port']}?transport={server['transport']}" + ice_server["urls"] = ( + f"turn:{server['host']}:{server['port']}?transport={server['transport']}" + ) ice_server["username"] = server["username"] ice_server["credential"] = server["password"] ice_servers.append(ice_server) @@ -496,9 +682,9 @@ track.stop() media_constraints = { - "video": {"deviceId": self.current_camera} - if self.current_camera - else True + "video": ( + {"deviceId": self.current_camera} if self.current_camera else True + ) } new_stream = await window.navigator.mediaDevices.getUserMedia( @@ -540,8 +726,8 @@ await video_sender.replaceTrack(new_video_tracks[0]) if screen: - # For screen sharing, we track the end event to properly stop the sharing when - # the user clicks on the browser's stop sharing dialog. + # For screen sharing, we track the end event to properly stop the sharing + # when the user clicks on the browser's stop sharing dialog. def on_track_ended(event): aio.run(self.toggle_screen_sharing()) @@ -687,18 +873,17 @@ """ log.debug(f"new peer candidates received: {candidates}") - try: - # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+ - remoteDescription_is_none = self._peer_connection.remoteDescription is None - except Exception as e: - # FIXME: should be fine in Brython 3.12.3+ - log.debug("Workaround for Brython bug activated.") - remoteDescription_is_none = True + # try: + # # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+ + # remoteDescription_is_none = self._peer_connection.remoteDescription is None + # except Exception as e: + # # FIXME: should be fine in Brython 3.12.3+ + # log.debug("Workaround for Brython bug activated.") + # remoteDescription_is_none = True if ( self._peer_connection is None - # or self._peer_connection.remoteDescription is NULL - or remoteDescription_is_none + or self._peer_connection.remoteDescription is NULL ): for media_type in ("audio", "video", "application"): media_candidates = candidates.get(media_type) @@ -786,13 +971,11 @@ ice_data[media_type] = { "ufrag": self.ufrag, "pwd": self.pwd, - "candidates": candidates + "candidates": candidates, } await bridge.ice_candidates_add( self.sid, - json.dumps( - ice_data - ), + json.dumps(ice_data), ) self.local_candidates_buffer.clear() @@ -811,6 +994,53 @@ log.debug(f"Call SID: {self.sid}") await self._send_buffered_local_candidates() + async def start_remote_control( + self, callee_jid: jid.JID, audio: bool = True, video: bool = True + ) -> None: + """Starts a Remote Control session. + + If both audio and video are False, no screenshare will be done, the input will be + sent without feedback. + @param audio: True if an audio flux is required + @param video: True if a video flux is required + """ + if audio or not video: + raise NotImplementedError("Only video screenshare is supported for now.") + peer_connection = await self._create_peer_connection() + if video: + peer_connection.addTransceiver("video", {"direction": "recvonly"}) + data_channel = peer_connection.createDataChannel("remote-control") + + call_data = await self._get_call_data() + + try: + remote_control_data = json.loads( + await bridge.remote_control_start( + str(callee_jid), + json.dumps( + { + "devices": { + "keyboard": {}, + "mouse": {}, + "wheel": {} + }, + "call_data": call_data, + } + ), + ) + ) + except Exception as e: + dialog.notification.show(f"Can't start remote control: {e}", level="error") + return + + self.sid = remote_control_data["session_id"] + + log.debug(f"Remote Control SID: {self.sid}") + await self._send_buffered_local_candidates() + self.remote_controller = RemoteControler( + self.sid, self.remote_video_elt, data_channel + ) + def _on_opened_data_channel(self, event): log.info("Datachannel has been opened.") @@ -820,28 +1050,22 @@ data_channel = peer_connection.createDataChannel("file") call_data = await self._get_call_data() log.info(f"sending file to {callee_jid!r}") - file_meta = { - "size": file.size - } + file_meta = {"size": file.size} if file.type: file_meta["media_type"] = file.type try: - file_data = json.loads(await bridge.file_jingle_send( + file_data = json.loads( + await bridge.file_jingle_send( str(callee_jid), "", file.name, "", - json.dumps({ - "webrtc": True, - "call_data": call_data, - **file_meta - }) - )) + json.dumps({"webrtc": True, "call_data": call_data, **file_meta}), + ) + ) except Exception as e: - dialog.notification.show( - f"Can't send file: {e}", level="error" - ) + dialog.notification.show(f"Can't send file: {e}", level="error") return self.sid = file_data["session_id"]