Mercurial > libervia-web
changeset 1602:6feac4a25e60
browser: Remote Control implementation:
- Add `cbor-x` JS dependency.
- In "Call" page, a Remote Control session can now be started. This is done by clicking on
a search item 3 dots menu. Libervia Web will act as a controlling device. The call box
is then adapted, and mouse/wheel and keyboard events are sent to remote, touch events
are converted to mouse one.
- Some Brython 3.12* related changes.
rel 436
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 11 May 2024 14:02:22 +0200 |
parents | d07838fc9d99 |
children | e105d7719479 |
files | libervia/web/pages/_browser/browser_meta.json libervia/web/pages/calls/_browser/__init__.py libervia/web/pages/calls/_browser/webrtc.py libervia/web/server/restricted_bridge.py |
diffstat | 4 files changed, 338 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/libervia/web/pages/_browser/browser_meta.json Sat May 11 13:57:49 2024 +0200 +++ b/libervia/web/pages/_browser/browser_meta.json Sat May 11 14:02:22 2024 +0200 @@ -7,7 +7,8 @@ "moment": "^2.29.4", "ogv": "^1.8.9", "tippy.js": "^6.3", - "emoji-picker-element": "^1.18" + "emoji-picker-element": "^1.18", + "cbor-x": "^1.5.9" } }, "brython_map": { @@ -33,6 +34,10 @@ "import_type": "module", "path": "emoji-picker-element/picker.js", "export": [] + }, + "cbor-x": { + "path": "cbor-x/dist/index.min.js", + "export": ["CBOR"] } } }
--- a/libervia/web/pages/calls/_browser/__init__.py Sat May 11 13:57:49 2024 +0200 +++ b/libervia/web/pages/calls/_browser/__init__.py Sat May 11 14:02:22 2024 +0200 @@ -4,7 +4,7 @@ from browser import aio, console as log, document, window from cache import cache import dialog -from javascript import JSObject +from javascript import JSObject, NULL from jid import JID from jid_search import JidSearch import loading @@ -27,7 +27,8 @@ ) AUDIO = "audio" VIDEO = "video" -ALLOWED_CALL_MODES = {AUDIO, VIDEO} +REMOTE = "remote-control" +ALLOWED_CALL_MODES = {AUDIO, VIDEO, REMOTE} INACTIVE_CLASS = "inactive" MUTED_CLASS = "muted" SCREEN_OFF_CLASS = "screen-off" @@ -107,6 +108,9 @@ ".click-to-audio": lambda evt, item: aio.run( self.on_entity_action(evt, AUDIO, item) ), + ".click-to-remote-control": lambda evt, item: aio.run( + self.on_entity_action(evt, REMOTE, item) + ), }, }, ) @@ -155,13 +159,28 @@ if mode in ALLOWED_CALL_MODES: if self._call_mode == mode: return + log.debug("Switching to {mode} call mode.") self._call_mode = mode - with_video = mode == VIDEO - for elt in self.call_box_elt.select(".is-video-only"): - if with_video: + selector = ".is-video-only, .is-not-remote" + for elt in self.call_box_elt.select(selector): + if mode == VIDEO: + # In video, all elements are visible. elt.classList.remove("is-hidden") + elif mode == AUDIO: + # In audio, we hide video-only elements. + if elt.classList.contains("is-video-only"): + elt.classList.add("is-hidden") + else: + elt.classList.remove("is-hidden") + elif mode == REMOTE: + # In remote, we show all video element, except if they are + # `is-not-remote` + if elt.classList.contains("is-not-remote"): + elt.classList.add("is-hidden") + else: + elt.classList.remove("is-hidden") else: - elt.classList.add("is-hidden") + raise Exception("This line should never be reached.") else: raise ValueError("Invalid call mode") @@ -190,7 +209,10 @@ @param profile: Profile associated with the action """ action_data = json.loads(action_data_s) - if action_data.get("type") == "confirm" and action_data.get("subtype") == "file": + if ( + action_data.get("type") in ("confirm", "not_in_roster_leak") + and action_data.get("subtype") == "file" + ): aio.run(self.on_file_preflight(action_data, action_id)) elif action_data.get("type") == "file": aio.run(self.on_file_proposal(action_data, action_id)) @@ -286,7 +308,7 @@ # TODO: Check if any other frontend is connected for this profile, and refuse # the file if none is. return - if action_data.get("file_accepted", False): + if action_data.get("pre_accepted", False): # File proposal has already been accepted in preflight. accepted = True else: @@ -472,13 +494,24 @@ btn_elt.classList.remove(INACTIVE_CLASS, MUTED_CLASS, "is-warning") btn_elt.classList.add("is-success") - async def make_call(self, audio: bool = True, video: bool = True) -> None: + async def make_call( + self, + audio: bool = True, + video: bool = True, + remote: bool = False + ) -> None: """Start a WebRTC call @param audio: True if an audio flux is required @param video: True if a video flux is required + @param remote: True if this is a Remote Control session. """ - self.call_mode = VIDEO if video else AUDIO + if remote: + self.call_mode = REMOTE + elif video: + self.call_mode = VIDEO + else: + self.call_mode = AUDIO try: callee_jid = JID(self.search_elt.value.strip()) if not callee_jid.is_valid: @@ -495,7 +528,12 @@ self.set_avatar(callee_jid) self.switch_mode("call") - await self.webrtc.make_call(callee_jid, audio, video) + if remote: + await self.webrtc.start_remote_control( + callee_jid, audio, video + ) + else: + await self.webrtc.make_call(callee_jid, audio, video) async def end_call(self, data: dict) -> None: """Stop streaming and clean instance""" @@ -612,18 +650,17 @@ @param fullscreen: if set, determine the fullscreen state; otherwise, the fullscreen mode will be toggled. """ - do_fullscreen = ( - document.fullscreenElement is None if fullscreen is None else fullscreen - ) + if fullscreen is None: + fullscreen = document.fullscreenElement is NULL try: - if do_fullscreen: - if document.fullscreenElement is None: + if fullscreen: + if document.fullscreenElement is NULL: self.call_box_elt.requestFullscreen() document["full_screen_btn"].classList.add("is-hidden") document["exit_full_screen_btn"].classList.remove("is-hidden") else: - if document.fullscreenElement is not None: + if document.fullscreenElement is not NULL: document.exitFullscreen() document["full_screen_btn"].classList.remove("is-hidden") document["exit_full_screen_btn"].classList.add("is-hidden") @@ -711,11 +748,15 @@ evt.stopPropagation() if action == "menu": evt.currentTarget.parent.classList.toggle("is-active") - elif action in (VIDEO, AUDIO): + elif action in (VIDEO, AUDIO, REMOTE): self.search_elt.value = item["entity"] # we want the dropdown to be inactive evt.currentTarget.closest(".dropdown").classList.remove("is-active") - await self.make_call(video=action == VIDEO) + if action == REMOTE: + await self.make_call(audio=False, video=True, remote=True) + + else: + await self.make_call(video=action == VIDEO) CallUI()
--- a/libervia/web/pages/calls/_browser/webrtc.py Sat May 11 13:57:49 2024 +0200 +++ b/libervia/web/pages/calls/_browser/webrtc.py Sat May 11 14:02:22 2024 +0200 @@ -2,10 +2,11 @@ import re from bridge import AsyncBridge as Bridge -from browser import aio, console as log, document, window +from browser import aio, console as log, document, window, DOMNode import dialog -from javascript import JSObject +from javascript import JSObject, NULL import jid +from js_modules.cbor_x import CBOR log.warning = log.warn profile = window.profile or "" @@ -89,6 +90,189 @@ aio.run(bridge.call_end(self.session_id, "")) +class RemoteControler: + """Send input events to controlled device""" + + def __init__( + self, + session_id: str, + capture_elt: DOMNode, + data_channel: JSObject, + simulate_mouse: bool = True + ) -> None: + """Initialize a RemoteControler instance. + + @param session_id: ID of the Jingle Session + @param capture_elt: element where the input events are captured. + @param data_channel: WebRTCDataChannel instance linking to controlled device. + @simulate_mouse: if True, touch event will be converted to mouse events. + """ + self.session_id = session_id + self.capture_elt = capture_elt + self.capture_elt.bind("click", self._on_capture_elt_click) + self.data_channel = data_channel + data_channel.bind("open", self._on_open) + self.simulate_mouse = simulate_mouse + self.last_mouse_position = (0, 0) + + def _on_capture_elt_click(self, __): + self.capture_elt.focus() + + def _on_open(self, __): + log.info(f"Data channel open, starting to send inputs.") + self.start_capture() + + def start_capture(self) -> None: + """Start capturing input events to send them to the controlled device.""" + for event_name in [ + "mousedown", + "mouseup", + "mousemove", + "keydown", + "keyup", + "touchstart", + "touchend", + "touchmove", + "wheel", + ]: + self.capture_elt.bind(event_name, self._send_event) + self.capture_elt.focus() + + def get_stream_coordinates(self, client_x: float, client_y: float) -> tuple[float, float]: + """Calculate coordinates relative to the actual video stream. + + This method calculates the coordinates relative to the video content inside the <video> + element, considering any scaling or letterboxing due to aspect ratio differences. + + @param client_x: The clientX value from the event, relative to the viewport. + @param client_y: The clientY value from the event, relative to the viewport. + @return: The x and y coordinates relative to the actual video stream. + """ + video_element = self.capture_elt + video_rect = video_element.getBoundingClientRect() + + # Calculate offsets relative to the capture element + element_x = client_x - video_rect.left + element_y = client_y - video_rect.top + + element_width, element_height = video_rect.width, video_rect.height + stream_width, stream_height = video_element.videoWidth, video_element.videoHeight + + if not all((element_width, element_height, stream_width, stream_height)): + log.warning("Invalid dimensions for video or element, using offsets.") + return element_x, element_y + + element_aspect_ratio = element_width / element_height + stream_aspect_ratio = stream_width / stream_height + + # Calculate scale and offset based on aspect ratio comparison + if stream_aspect_ratio > element_aspect_ratio: + # Video is more "wide" than the element: letterboxes will be top and bottom + scale = element_width / stream_width + scaled_height = stream_height * scale + offset_x, offset_y = 0, (element_height - scaled_height) / 2 + else: + # Video is more "tall" than the element: letterboxes will be on the sides + scale = element_height / stream_height + scaled_width = stream_width * scale + offset_x, offset_y = (element_width - scaled_width) / 2, 0 + + # Mouse coordinates relative to the video stream + x = (element_x - offset_x) / scale + y = (element_y - offset_y) / scale + + # Ensure the coordinates are within the bounds of the video stream + x = max(0.0, min(x, stream_width)) + y = max(0.0, min(y, stream_height)) + + return x, y + + def _send_event(self, event: JSObject) -> None: + """Serialize and send the event to the controlled device through the data channel.""" + event.preventDefault() + serialized_event = self._serialize_event(event) + # TODO: we should join events instead + self.data_channel.send(CBOR.encode(serialized_event)) + + def _serialize_event(self, event: JSObject) -> dict[str, object]: + """Serialize event data for transmission. + + @param event: an input event. + @return: event data to send to peer. + """ + if event.type.startswith("key"): + ret = { + "type": event.type, + "key": event.key, + } + if event.location: + ret["location"] = event.location + return ret + elif event.type.startswith("mouse"): + x, y = self.get_stream_coordinates(event.clientX, event.clientY) + return { + "type": event.type, + "buttons": event.buttons, + "x": x, + "y": y, + } + elif event.type.startswith("touch"): + touches = [ + { + "identifier": touch.identifier, + **dict(zip(["x", "y"], self.get_stream_coordinates( + touch.clientX, + touch.clientY + ))), + } + for touch in event.touches + ] + if self.simulate_mouse: + # In simulate mouse mode, we send mouse events. + if touches: + touch_data = touches[0] + x, y = touch_data["x"], touch_data["y"] + self.last_mouse_position = (x, y) + else: + x, y = self.last_mouse_position + + mouse_event: dict[str, object] = { + "x": x, + "y": y, + } + if event.type == "touchstart": + mouse_event.update({ + "type": "mousedown", + "buttons": 1, + }) + elif event.type == "touchend": + mouse_event.update({ + "type": "mouseup", + "buttons": 1, + }) + elif event.type == "touchmove": + mouse_event.update({ + "type": "mousemove", + }) + return mouse_event + else: + # Normal mode, with send touch events. + return { + "type": event.type, + "touches": touches + } + elif event.type == "wheel": + return { + "type": event.type, + "deltaX": event.deltaX, + "deltaY": event.deltaY, + "deltaZ": event.deltaZ, + "deltaMode": event.deltaMode, + } + else: + raise Exception(f"Internal Error: unexpected event {event.type=}") + + class WebRTC: def __init__( @@ -100,12 +284,13 @@ on_video_devices=None, on_reset_cb=None, file_only: bool = False, - extra_data: dict|None = None + extra_data: dict | None = None, ): """Initialise WebRTC instance. @param screen_sharing_cb: callable function for screen sharing event - @param on_connection_established_cb: callable function for connection established event + @param on_connection_established_cb: callable function for connection established + event @param on_reconnect_cb: called when a reconnection is triggered. @param on_connection_lost_cb: called when the connection is lost. @param on_video_devices: called when new video devices are set. @@ -376,7 +561,8 @@ self.on_connection_established_cb() elif state == "failed": log.error( - "ICE connection failed. Check network connectivity and ICE configurations." + "ICE connection failed. Check network connectivity and ICE" + " configurations." ) elif state == "disconnected": log.warning("ICE connection was disconnected.") @@ -431,9 +617,9 @@ if server["type"] == "stun": ice_server["urls"] = f"stun:{server['host']}:{server['port']}" elif server["type"] == "turn": - ice_server[ - "urls" - ] = f"turn:{server['host']}:{server['port']}?transport={server['transport']}" + ice_server["urls"] = ( + f"turn:{server['host']}:{server['port']}?transport={server['transport']}" + ) ice_server["username"] = server["username"] ice_server["credential"] = server["password"] ice_servers.append(ice_server) @@ -496,9 +682,9 @@ track.stop() media_constraints = { - "video": {"deviceId": self.current_camera} - if self.current_camera - else True + "video": ( + {"deviceId": self.current_camera} if self.current_camera else True + ) } new_stream = await window.navigator.mediaDevices.getUserMedia( @@ -540,8 +726,8 @@ await video_sender.replaceTrack(new_video_tracks[0]) if screen: - # For screen sharing, we track the end event to properly stop the sharing when - # the user clicks on the browser's stop sharing dialog. + # For screen sharing, we track the end event to properly stop the sharing + # when the user clicks on the browser's stop sharing dialog. def on_track_ended(event): aio.run(self.toggle_screen_sharing()) @@ -687,18 +873,17 @@ """ log.debug(f"new peer candidates received: {candidates}") - try: - # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+ - remoteDescription_is_none = self._peer_connection.remoteDescription is None - except Exception as e: - # FIXME: should be fine in Brython 3.12.3+ - log.debug("Workaround for Brython bug activated.") - remoteDescription_is_none = True + # try: + # # FIXME: javascript.NULL must be used here, once we move to Brython 3.12.3+ + # remoteDescription_is_none = self._peer_connection.remoteDescription is None + # except Exception as e: + # # FIXME: should be fine in Brython 3.12.3+ + # log.debug("Workaround for Brython bug activated.") + # remoteDescription_is_none = True if ( self._peer_connection is None - # or self._peer_connection.remoteDescription is NULL - or remoteDescription_is_none + or self._peer_connection.remoteDescription is NULL ): for media_type in ("audio", "video", "application"): media_candidates = candidates.get(media_type) @@ -786,13 +971,11 @@ ice_data[media_type] = { "ufrag": self.ufrag, "pwd": self.pwd, - "candidates": candidates + "candidates": candidates, } await bridge.ice_candidates_add( self.sid, - json.dumps( - ice_data - ), + json.dumps(ice_data), ) self.local_candidates_buffer.clear() @@ -811,6 +994,53 @@ log.debug(f"Call SID: {self.sid}") await self._send_buffered_local_candidates() + async def start_remote_control( + self, callee_jid: jid.JID, audio: bool = True, video: bool = True + ) -> None: + """Starts a Remote Control session. + + If both audio and video are False, no screenshare will be done, the input will be + sent without feedback. + @param audio: True if an audio flux is required + @param video: True if a video flux is required + """ + if audio or not video: + raise NotImplementedError("Only video screenshare is supported for now.") + peer_connection = await self._create_peer_connection() + if video: + peer_connection.addTransceiver("video", {"direction": "recvonly"}) + data_channel = peer_connection.createDataChannel("remote-control") + + call_data = await self._get_call_data() + + try: + remote_control_data = json.loads( + await bridge.remote_control_start( + str(callee_jid), + json.dumps( + { + "devices": { + "keyboard": {}, + "mouse": {}, + "wheel": {} + }, + "call_data": call_data, + } + ), + ) + ) + except Exception as e: + dialog.notification.show(f"Can't start remote control: {e}", level="error") + return + + self.sid = remote_control_data["session_id"] + + log.debug(f"Remote Control SID: {self.sid}") + await self._send_buffered_local_candidates() + self.remote_controller = RemoteControler( + self.sid, self.remote_video_elt, data_channel + ) + def _on_opened_data_channel(self, event): log.info("Datachannel has been opened.") @@ -820,28 +1050,22 @@ data_channel = peer_connection.createDataChannel("file") call_data = await self._get_call_data() log.info(f"sending file to {callee_jid!r}") - file_meta = { - "size": file.size - } + file_meta = {"size": file.size} if file.type: file_meta["media_type"] = file.type try: - file_data = json.loads(await bridge.file_jingle_send( + file_data = json.loads( + await bridge.file_jingle_send( str(callee_jid), "", file.name, "", - json.dumps({ - "webrtc": True, - "call_data": call_data, - **file_meta - }) - )) + json.dumps({"webrtc": True, "call_data": call_data, **file_meta}), + ) + ) except Exception as e: - dialog.notification.show( - f"Can't send file: {e}", level="error" - ) + dialog.notification.show(f"Can't send file: {e}", level="error") return self.sid = file_data["session_id"]
--- a/libervia/web/server/restricted_bridge.py Sat May 11 13:57:49 2024 +0200 +++ b/libervia/web/server/restricted_bridge.py Sat May 11 14:02:22 2024 +0200 @@ -318,3 +318,12 @@ return await self.host.bridge_call( "jid_search", search_term, options_s, profile ) + + async def remote_control_start( + self, peer_jid_s: str, extra_s: str, profile: str + ) -> None: + self.no_service_profile(profile) + return await self.host.bridge_call( + "remote_control_start", peer_jid_s, extra_s, profile + ) +