From 996cc1910bde53a2bc1857c948f387d55a29a00d Mon Sep 17 00:00:00 2001
From: kozabrada123 <59031733+kozabrada123@users.noreply.github.com>
Date: Fri, 12 Jan 2024 16:45:56 +0100
Subject: [PATCH] feat: new encryption modes, minor code quality

---
 Cargo.toml                                    |   1 +
 src/errors.rs                                 |   2 +
 src/lib.rs                                    |   5 +-
 .../events/voice_gateway/client_connect.rs    |   4 +-
 .../events/voice_gateway/ssrc_definition.rs   |  10 ++
 src/voice/crypto.rs                           |  35 +++++-
 src/voice/mod.rs                              |   2 +-
 src/voice/udp/handle.rs                       |  53 +++++++-
 src/voice/udp/handler.rs                      | 115 +++++++++++++-----
 src/voice/voice_data.rs                       |   3 +
 10 files changed, 191 insertions(+), 39 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 0c88387..6f55a7d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -69,6 +69,7 @@ tokio-tungstenite = { version = "0.20.1", features = [
 ] }
 native-tls = "0.2.11"
 hostname = "0.3.1"
+getrandom = { version = "0.2.11" }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 getrandom = { version = "0.2.11", features = ["js"] }
diff --git a/src/errors.rs b/src/errors.rs
index bf3727c..e129cf6 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -142,6 +142,8 @@ custom_error! {
     // Encryption errors
     NoKey = "Tried to encrypt / decrypt rtp data, but no key has been received yet",
     FailedEncryption = "Tried to encrypt rtp data, but failed. Most likely this is an issue chorus' nonce generation. Please open an issue on the chorus github: https://github.com/polyphony-chat/chorus/issues/new",
+    FailedDecryption = "Tried to decrypt rtp data, but failed. Most likely this is an issue chorus' nonce generation. Please open an issue on the chorus github: https://github.com/polyphony-chat/chorus/issues/new",
+    FailedNonceGeneration{error: String} = "Tried to generate nonce, but failed due to error: {error}.",
 
     // Errors when initiating a socket connection
     CannotBind{error: String} = "Cannot bind socket due to a udp error: {error}",
diff --git a/src/lib.rs b/src/lib.rs
index abe8101..f1a3591 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -128,7 +128,10 @@ pub mod instance;
 #[cfg(feature = "client")]
 pub mod ratelimiter;
 pub mod types;
-#[cfg(all(feature = "client", any(feature = "voice_udp", feature = "voice_gateway")))]
+#[cfg(all(
+    feature = "client",
+    any(feature = "voice_udp", feature = "voice_gateway")
+))]
 pub mod voice;
 
 #[derive(Clone, Default, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
diff --git a/src/types/events/voice_gateway/client_connect.rs b/src/types/events/voice_gateway/client_connect.rs
index d367ff3..5b54797 100644
--- a/src/types/events/voice_gateway/client_connect.rs
+++ b/src/types/events/voice_gateway/client_connect.rs
@@ -12,7 +12,9 @@ use serde::{Deserialize, Serialize};
 pub struct VoiceClientConnectFlags {
     pub user_id: Snowflake,
     // Likely some sort of bitflags
-    pub flags: u8,
+    //
+    // Not always sent, sometimes null?
+    pub flags: Option<u8>,
 }
 
 impl WebSocketEvent for VoiceClientConnectFlags {}
diff --git a/src/types/events/voice_gateway/ssrc_definition.rs b/src/types/events/voice_gateway/ssrc_definition.rs
index 738f483..e19e563 100644
--- a/src/types/events/voice_gateway/ssrc_definition.rs
+++ b/src/types/events/voice_gateway/ssrc_definition.rs
@@ -3,6 +3,10 @@ use serde::{Deserialize, Serialize};
 
 /// Defines an event which provides ssrcs for voice and video for a user id.
 ///
+/// This event is sent when we begin to speak.
+///
+/// It must be sent before sending audio, or else clients will not be able to play the stream.
+///
 /// This event is sent via opcode 12.
 ///
 /// Examples of the event:
@@ -28,12 +32,18 @@ pub struct SsrcDefinition {
     /// Is always sent and received, though is 0 if describing only the video ssrc.
     #[serde(default)]
     pub audio_ssrc: usize,
+    // Not sure what this is
+    // It is usually 0
+    #[serde(default)]
+    pub rtx_ssrc: usize,
     /// The user id these ssrcs apply to.
     ///
     /// Is never sent by the user and is filled in by the server
     #[serde(skip_serializing)]
     pub user_id: Option<Snowflake>,
     // TODO: Add video streams
+    #[serde(default)]
+    pub streams: Vec<String>,
 }
 
 impl WebSocketEvent for SsrcDefinition {}
diff --git a/src/voice/crypto.rs b/src/voice/crypto.rs
index 7bcc056..7657383 100644
--- a/src/voice/crypto.rs
+++ b/src/voice/crypto.rs
@@ -2,15 +2,46 @@
 //!
 //! All functions in this module return a 24 byte long [Vec<u8>].
 
-/// Gets an xsalsa20poly1305 nonce from an rtppacket.
+use crypto_secretbox::cipher::typenum::Len;
+
+/// Gets an xsalsa20_poly1305 nonce from an rtppacket.
+///
+/// See <https://discord-userdoccers.vercel.app/topics/voice-connections#encryption-mode>
 pub(crate) fn get_xsalsa20_poly1305_nonce(packet: &[u8]) -> Vec<u8> {
     let mut rtp_header = Vec::with_capacity(24);
     rtp_header.append(&mut packet[0..12].to_vec());
 
     // The header is only 12 bytes, but the nonce has to be 24
-    for _i in 0..12 {
+    while rtp_header.len() < 24 {
         rtp_header.push(0);
     }
 
     rtp_header
 }
+
+/// Gets an xsalsa20_poly1305_suffix nonce from an rtppacket.
+///
+/// See <https://discord-userdoccers.vercel.app/topics/voice-connections#encryption-mode>
+pub(crate) fn get_xsalsa20_poly1305_suffix_nonce(packet: &[u8]) -> Vec<u8> {
+    let mut nonce = Vec::with_capacity(24);
+
+    nonce.append(&mut packet[(packet.len() - 24)..packet.len()].to_vec());
+
+    nonce
+}
+
+/// Gets an xsalsa20_poly1305_lite nonce from an rtppacket.
+///
+/// See <https://discord-userdoccers.vercel.app/topics/voice-connections#encryption-mode>
+pub(crate) fn get_xsalsa20_poly1305_lite_nonce(packet: &[u8]) -> Vec<u8> {
+    let mut nonce = Vec::with_capacity(24);
+
+    nonce.append(&mut packet[(packet.len() - 4)..packet.len()].to_vec());
+
+    // The suffix is only 4 bytes, but the nonce has to be 24
+    while nonce.len() < 24 {
+        nonce.push(0);
+    }
+
+    nonce
+}
diff --git a/src/voice/mod.rs b/src/voice/mod.rs
index 660f806..8731fd1 100644
--- a/src/voice/mod.rs
+++ b/src/voice/mod.rs
@@ -1,8 +1,8 @@
 //! Module for all voice functionality within chorus.
 
+mod crypto;
 #[cfg(feature = "voice_gateway")]
 pub mod gateway;
-mod crypto;
 #[cfg(all(feature = "voice_udp", feature = "voice_gateway"))]
 pub mod handler;
 #[cfg(feature = "voice_udp")]
diff --git a/src/voice/udp/handle.rs b/src/voice/udp/handle.rs
index 7b2cca9..87550d3 100644
--- a/src/voice/udp/handle.rs
+++ b/src/voice/udp/handle.rs
@@ -5,6 +5,7 @@ use crypto_secretbox::{
 };
 use discortp::Packet;
 
+use getrandom::getrandom;
 use log::*;
 
 use tokio::{sync::Mutex, sync::RwLock};
@@ -13,7 +14,11 @@ use super::UdpSocket;
 
 use crate::{
     errors::VoiceUdpError,
-    voice::{crypto, voice_data::VoiceData},
+    types::VoiceEncryptionMode,
+    voice::{
+        crypto::{self, get_xsalsa20_poly1305_nonce},
+        voice_data::VoiceData,
+    },
 };
 
 use super::{events::VoiceUDPEvents, RTP_HEADER_SIZE};
@@ -104,6 +109,8 @@ impl UdpHandle {
     ///
     /// # Errors
     /// If we have not received an encryption key, this returns a [VoiceUdpError::NoKey] error.
+    ///
+    /// When using voice encryption modes which require special nonce generation, and said generation fails, this returns a [VoiceUdpError::FailedNonceGeneration] error.
     pub async fn encrypt_rtp_packet_payload(
         &self,
         packet: &discortp::rtp::MutableRtpPacket<'_>,
@@ -120,7 +127,42 @@ impl UdpHandle {
 
         let session_description = session_description_result.unwrap();
 
-        let nonce_bytes = crypto::get_xsalsa20_poly1305_nonce(packet.packet());
+        let mut nonce_bytes = match session_description.encryption_mode {
+            VoiceEncryptionMode::Xsalsa20Poly1305 => get_xsalsa20_poly1305_nonce(packet.packet()),
+            VoiceEncryptionMode::Xsalsa20Poly1305Suffix => {
+                // Generate 24 random bytes
+                let mut random_destinaton: Vec<u8> = vec![0; 24];
+                let random_result = getrandom(&mut random_destinaton);
+                if let Err(e) = random_result {
+                    return Err(VoiceUdpError::FailedNonceGeneration {
+                        error: format!("{:?}", e),
+                    });
+                }
+                random_destinaton
+            }
+            VoiceEncryptionMode::Xsalsa20Poly1305Lite => {
+                // "Incremental 4 bytes (32bit) int value"
+                let mut data_lock = self.data.write().await;
+                let nonce = data_lock
+                    .last_udp_encryption_nonce
+                    .unwrap_or_default()
+                    .wrapping_add(1);
+                data_lock.last_udp_encryption_nonce = Some(nonce);
+                drop(data_lock);
+                // TODO: Is le correct? This is not documented anywhere
+                let mut bytes = nonce.to_le_bytes().to_vec();
+                // This is 4 bytes, it has to be 24, so we need to append 20
+                while bytes.len() < 24 {
+                    bytes.push(0);
+                }
+                bytes
+            }
+            _ => {
+                // TODO: Implement aead_aes256_gcm
+                todo!("This voice encryption mode is not yet implemented.");
+            }
+        };
+
         let nonce = GenericArray::from_slice(&nonce_bytes);
 
         let key = GenericArray::from_slice(&session_description.secret_key);
@@ -139,6 +181,13 @@ impl UdpHandle {
 
         let mut encrypted_payload = encryption_result.unwrap();
 
+        // Append the nonce bytes, if needed
+        // All other encryption modes have an explicit nonce, where as Xsalsa20Poly1305
+        // has the nonce as the rtp header.
+        if session_description.encryption_mode != VoiceEncryptionMode::Xsalsa20Poly1305 {
+            encrypted_payload.append(&mut nonce_bytes);
+        }
+
         // We need to allocate a new buffer, since the old one is too small for our new encrypted
         // data
         let buffer_size = encrypted_payload.len() + RTP_HEADER_SIZE as usize;
diff --git a/src/voice/udp/handler.rs b/src/voice/udp/handler.rs
index a7b05b2..67f4f3e 100644
--- a/src/voice/udp/handler.rs
+++ b/src/voice/udp/handler.rs
@@ -19,7 +19,10 @@ use super::UdpSocket;
 
 use super::RTP_HEADER_SIZE;
 use crate::errors::VoiceUdpError;
+use crate::types::VoiceEncryptionMode;
+use crate::voice::crypto::get_xsalsa20_poly1305_lite_nonce;
 use crate::voice::crypto::get_xsalsa20_poly1305_nonce;
+use crate::voice::crypto::get_xsalsa20_poly1305_suffix_nonce;
 use crate::voice::voice_data::VoiceData;
 
 use super::{events::VoiceUDPEvents, UdpHandle};
@@ -167,41 +170,24 @@ impl UdpHandler {
 
         match parsed {
             Demuxed::Rtp(rtp) => {
-                let ciphertext = buf[(RTP_HEADER_SIZE as usize)..buf.len()].to_vec();
-                trace!("VUDP: Parsed packet as rtp!");
+                trace!("VUDP: Parsed packet as rtp! {:?}", buf);
 
-                let session_description_result = self.data.read().await.session_description.clone();
+                let decryption_result = self.decrypt_rtp_packet_payload(&rtp).await;
 
-                if session_description_result.is_none() {
-                    warn!("VUDP: Received encyrpted voice data, but no encryption key, CANNOT DECRYPT!");
-                    return;
-                }
-
-                let session_description = session_description_result.unwrap();
-
-                let nonce_bytes = match session_description.encryption_mode {
-                    crate::types::VoiceEncryptionMode::Xsalsa20Poly1305 => {
-                        get_xsalsa20_poly1305_nonce(rtp.packet())
+                if let Err(err) = decryption_result {
+                    match err {
+                        VoiceUdpError::NoKey => {
+                            warn!("VUDP: Received encyrpted voice data, but no encryption key, CANNOT DECRYPT!");
+                            return;
+                        }
+                        VoiceUdpError::FailedDecryption => {
+                            warn!("VUDP: Failed to decrypt voice data!");
+                            return;
+                        }
+                        _ => {
+                            unreachable!();
+                        }
                     }
-                    _ => {
-                        unimplemented!();
-                    }
-                };
-
-                let nonce = GenericArray::from_slice(&nonce_bytes);
-
-                let key = GenericArray::from_slice(&session_description.secret_key);
-
-                let decryptor = XSalsa20Poly1305::new(key);
-
-                let decryption_result = decryptor.decrypt(nonce, ciphertext.as_ref());
-
-                if let Err(decryption_error) = decryption_result {
-                    warn!(
-                        "VUDP: Failed to decypt voice data! ({:?})",
-                        decryption_error
-                    );
-                    return;
                 }
 
                 let decrypted = decryption_result.unwrap();
@@ -273,4 +259,69 @@ impl UdpHandler {
             }
         }
     }
+
+    /// Decrypts an encrypted rtp packet, returning a decrypted copy of the packet's payload
+    /// bytes.
+    ///
+    /// # Errors
+    /// If we have not received an encryption key, this returns a [VoiceUdpError::NoKey] error.
+    ///
+    /// If the decryption fails, this returns a [VoiceUdpError::FailedDecryption].
+    pub async fn decrypt_rtp_packet_payload(
+        &self,
+        rtp: &discortp::rtp::RtpPacket<'_>,
+    ) -> Result<Vec<u8>, VoiceUdpError> {
+        let packet_bytes = rtp.packet();
+
+        let mut ciphertext: Vec<u8> =
+            packet_bytes[(RTP_HEADER_SIZE as usize)..packet_bytes.len()].to_vec();
+
+        let session_description_result = self.data.read().await.session_description.clone();
+
+        // We are trying to decrypt, but have not received SessionDescription yet,
+        // which contains the secret key
+        if session_description_result.is_none() {
+            return Err(VoiceUdpError::NoKey);
+        }
+
+        let session_description = session_description_result.unwrap();
+
+        let nonce_bytes = match session_description.encryption_mode {
+            VoiceEncryptionMode::Xsalsa20Poly1305 => get_xsalsa20_poly1305_nonce(packet_bytes),
+            VoiceEncryptionMode::Xsalsa20Poly1305Suffix => {
+                // Remove the suffix from the ciphertext
+                ciphertext = ciphertext[0..ciphertext.len() - 24].to_vec();
+                get_xsalsa20_poly1305_suffix_nonce(packet_bytes)
+            }
+            // Note: Rtpsize is documented by userdoccers to be the same, yet decryption
+            // doesn't work.
+            //
+            // I have no idea how Rtpsize works.
+            VoiceEncryptionMode::Xsalsa20Poly1305Lite => {
+                // Remove the suffix from the ciphertext
+                ciphertext = ciphertext[0..ciphertext.len() - 4].to_vec();
+                get_xsalsa20_poly1305_lite_nonce(packet_bytes)
+            }
+            _ => {
+                // TODO: Implement aead_aes256_gcm
+                todo!("This voice encryption mode is not yet implemented.");
+            }
+        };
+
+        let nonce = GenericArray::from_slice(&nonce_bytes);
+
+        let key = GenericArray::from_slice(&session_description.secret_key);
+
+        let decryptor = XSalsa20Poly1305::new(key);
+
+        let decryption_result = decryptor.decrypt(nonce, ciphertext.as_ref());
+
+        // Note: this may seem like we are throwing away valuable error handling data,
+        // but the decryption error provides no extra info.
+        if decryption_result.is_err() {
+            return Err(VoiceUdpError::FailedDecryption);
+        }
+
+        Ok(decryption_result.unwrap())
+    }
 }
diff --git a/src/voice/voice_data.rs b/src/voice/voice_data.rs
index 064ebda..5252ac5 100644
--- a/src/voice/voice_data.rs
+++ b/src/voice/voice_data.rs
@@ -15,4 +15,7 @@ pub struct VoiceData {
     /// The last sequence number we used, has to be incremeted by one every time we send a message
     pub last_sequence_number: u16,
     pub ip_discovery: Option<IpDiscovery>,
+
+    /// The last udp encryption nonce, if we are using an encryption mode with incremental nonces.
+    pub last_udp_encryption_nonce: Option<u32>,
 }