diff --git a/src/utils/source_bundle.rs b/src/utils/source_bundle.rs index 5378df0ef0..a32477c811 100644 --- a/src/utils/source_bundle.rs +++ b/src/utils/source_bundle.rs @@ -195,7 +195,6 @@ fn url_to_bundle_path(url: &str) -> Result { #[cfg(test)] mod tests { - use sha1_smol::Sha1; use symbolic::debuginfo::sourcebundle::SourceFileType; use crate::utils::file_upload::SourceFile; @@ -229,37 +228,37 @@ mod tests { #[test] fn build_deterministic() { - let projects_slice = &["wat-project".into()]; - let context = BundleContext { - org: "wat-org", - projects: Some(projects_slice.into()), - release: None, - dist: None, - note: None, + let make_bundle = || { + let projects_slice = &["wat-project".into()]; + let context = BundleContext { + org: "wat-org", + projects: Some(projects_slice.into()), + release: None, + dist: None, + note: None, + }; + + let source_files = ["bundle.min.js.map", "vendor.min.js.map"] + .into_iter() + .map(|name| SourceFile { + url: format!("~/{name}"), + path: format!("tests/integration/_fixtures/{name}").into(), + contents: std::fs::read(format!("tests/integration/_fixtures/{name}")) + .unwrap() + .into(), + ty: SourceFileType::SourceMap, + headers: Default::default(), + messages: Default::default(), + already_uploaded: false, + }) + .collect::>(); + + let file = build(context, &source_files, None).unwrap(); + std::fs::read(file.path()).unwrap() }; - let source_files = ["bundle.min.js.map", "vendor.min.js.map"] - .into_iter() - .map(|name| SourceFile { - url: format!("~/{name}"), - path: format!("tests/integration/_fixtures/{name}").into(), - contents: std::fs::read(format!("tests/integration/_fixtures/{name}")) - .unwrap() - .into(), - ty: SourceFileType::SourceMap, - headers: Default::default(), - messages: Default::default(), - already_uploaded: false, - }) - .collect::>(); - - let file = build(context, &source_files, None).unwrap(); - - let buf = std::fs::read(file.path()).unwrap(); - let hash = Sha1::from(buf); - assert_eq!( - hash.digest().to_string(), - "f0e25ae149b711c510148e022ebc883ad62c7c4c" - ); + let first = make_bundle(); + let second = make_bundle(); + assert_eq!(first, second, "bundle output should be deterministic"); } } diff --git a/tests/integration/_expected_requests/build/apk_chunk.bin b/tests/integration/_expected_requests/build/apk_chunk.bin deleted file mode 100644 index 2a35b07da0..0000000000 Binary files a/tests/integration/_expected_requests/build/apk_chunk.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload.bin deleted file mode 100644 index a52717aeaf..0000000000 Binary files a/tests/integration/_expected_requests/debug_files/upload/chunk_upload.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin deleted file mode 100644 index 8237fa49aa..0000000000 Binary files a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files_only_some.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files_only_some.bin deleted file mode 100644 index fc2f522a53..0000000000 Binary files a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files_only_some.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks.bin deleted file mode 100644 index 326c8b5929..0000000000 Binary files a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks_only_some.bin b/tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks_only_some.bin deleted file mode 100644 index c9a3749115..0000000000 Binary files a/tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks_only_some.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/proguard/upload/chunk_upload_needs_upload.bin b/tests/integration/_expected_requests/proguard/upload/chunk_upload_needs_upload.bin deleted file mode 100644 index e1ca8d8b4e..0000000000 Binary files a/tests/integration/_expected_requests/proguard/upload/chunk_upload_needs_upload.bin and /dev/null differ diff --git a/tests/integration/_expected_requests/proguard/upload/chunk_upload_two_files.bin b/tests/integration/_expected_requests/proguard/upload/chunk_upload_two_files.bin deleted file mode 100644 index 05f3fd75da..0000000000 Binary files a/tests/integration/_expected_requests/proguard/upload/chunk_upload_two_files.bin and /dev/null differ diff --git a/tests/integration/build/upload.rs b/tests/integration/build/upload.rs index 3a07ddb4b6..9a4740bce5 100644 --- a/tests/integration/build/upload.rs +++ b/tests/integration/build/upload.rs @@ -1,8 +1,7 @@ -use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager}; -use regex::bytes::Regex; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::LazyLock; -use std::{fs, str}; + +use crate::integration::test_utils::chunk_upload; +use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager}; #[cfg(all(target_os = "macos", target_arch = "aarch64"))] #[test] @@ -105,17 +104,6 @@ fn command_build_upload_apk_invlid_sha() { TestManager::new().register_trycmd_test("build/build-invalid-*-sha.trycmd"); } -/// This regex is used to extract the boundary from the content-type header. -/// We need to match the boundary, since it changes with each request. -/// The regex matches the format as specified in -/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html. -static CONTENT_TYPE_REGEX: LazyLock = LazyLock::new(|| { - Regex::new( - r#"^multipart\/form-data; boundary=(?[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"# - ) - .expect("Regex is valid") -}); - #[test] /// This test simulates a full chunk upload (with only one chunk). /// It verifies that the Sentry CLI makes the expected API calls to the chunk upload endpoint @@ -123,8 +111,6 @@ static CONTENT_TYPE_REGEX: LazyLock = LazyLock::new(|| { /// It also verifies that the correct calls are made to the assemble endpoint. fn command_build_upload_apk_chunked() { let is_first_assemble_call = AtomicBool::new(true); - let expected_chunk_body = fs::read("tests/integration/_expected_requests/build/apk_chunk.bin") - .expect("expected chunk body file should be present"); TestManager::new() .mock_endpoint( @@ -134,34 +120,35 @@ fn command_build_upload_apk_chunked() { .mock_endpoint( MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/") .with_response_fn(move |request| { - let content_type_headers = request.header("content-type"); + let boundary = chunk_upload::boundary_from_request(request) + .expect("content-type header should be a valid multipart/form-data header"); + + let body = request.body().expect("body should be readable"); + + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); + + assert_eq!(decompressed.len(), 1, "expected exactly one chunk"); + + // The CLI wraps the APK in a zip bundle with metadata. + // Verify the bundle is a valid zip containing the APK. + let chunk = decompressed.iter().next().unwrap(); + let cursor = std::io::Cursor::new(chunk); + let mut archive = + zip::ZipArchive::new(cursor).expect("chunk should be a valid zip"); + let apk_entry = archive + .by_name("apk.apk") + .expect("bundle should contain the APK file"); + let expected_size = + std::fs::metadata("tests/integration/_fixtures/build/apk.apk") + .expect("fixture file should exist") + .len(); assert_eq!( - content_type_headers.len(), - 1, - "content-type header should be present exactly once, found {} times", - content_type_headers.len() + apk_entry.size(), + expected_size, + "APK size in bundle should match the fixture" ); - let content_type = content_type_headers[0].as_bytes(); - let boundary = CONTENT_TYPE_REGEX - .captures(content_type) - .expect("content-type should match regex") - .name("boundary") - .expect("boundary should be present") - .as_bytes(); - let boundary_str = str::from_utf8(boundary).expect("boundary should be valid utf-8"); - let boundary_escaped = regex::escape(boundary_str); - let body_regex = Regex::new(&format!( - r#"^--{boundary_escaped}(?(?s-u:.)*?)--{boundary_escaped}--\s*$"# - )) - .expect("regex should be valid"); - let body = request.body().expect("body should be readable"); - let chunk_body = body_regex - .captures(body) - .expect("body should match regex") - .name("chunk_body") - .expect("chunk_body section should be present") - .as_bytes(); - assert_eq!(chunk_body, expected_chunk_body); + vec![] // Client does not expect a response body }), ) diff --git a/tests/integration/debug_files/upload.rs b/tests/integration/debug_files/upload.rs index 7214f9cf3f..d87288174e 100644 --- a/tests/integration/debug_files/upload.rs +++ b/tests/integration/debug_files/upload.rs @@ -1,23 +1,13 @@ -use std::collections::HashSet; +use std::collections::BTreeMap; +use std::fs; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, LazyLock, Mutex}; -use std::{fs, str}; +use std::sync::{Arc, Mutex}; -use regex::bytes::Regex; +use serde::Deserialize; +use sha1_smol::Sha1; use crate::integration::{chunk_upload, AssertCommand, MockEndpointBuilder, TestManager}; -/// This regex is used to extract the boundary from the content-type header. -/// We need to match the boundary, since it changes with each request. -/// The regex matches the format as specified in -/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html. -static CONTENT_TYPE_REGEX: LazyLock = LazyLock::new(|| { - Regex::new( - r#"^multipart\/form-data; boundary=(?[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"# - ) - .expect("Regex is valid") -}); - #[test] fn command_debug_files_upload() { TestManager::new() @@ -197,9 +187,6 @@ fn ensure_correct_assemble_call() { /// It also verifies that the correct calls are made to the assemble endpoint. fn ensure_correct_chunk_upload() { let is_first_assemble_call = AtomicBool::new(true); - let expected_chunk_body = - fs::read("tests/integration/_expected_requests/debug_files/upload/chunk_upload.bin") - .expect("expected chunk body file should be present"); TestManager::new() .mock_endpoint( @@ -209,42 +196,23 @@ fn ensure_correct_chunk_upload() { .mock_endpoint( MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/") .with_response_fn(move |request| { - let content_type_headers = request.header("content-type"); - assert_eq!( - content_type_headers.len(), - 1, - "content-type header should be present exactly once, found {} times", - content_type_headers.len() - ); - - let content_type = content_type_headers[0].as_bytes(); - - let boundary = CONTENT_TYPE_REGEX - .captures(content_type) - .expect("content-type should match regex") - .name("boundary") - .expect("boundary should be present") - .as_bytes(); - - let boundary_str = str::from_utf8(boundary).expect("boundary should be valid utf-8"); - - let boundary_escaped = regex::escape(boundary_str); - - let body_regex = Regex::new(&format!( - r#"^--{boundary_escaped}(?(?s-u:.)*?)--{boundary_escaped}--\s*$"# - )) - .expect("regex should be valid"); + let boundary = chunk_upload::boundary_from_request(request) + .expect("content-type header should be a valid multipart/form-data header"); let body = request.body().expect("body should be readable"); - let chunk_body = body_regex - .captures(body) - .expect("body should match regex") - .name("chunk_body") - .expect("chunk_body section should be present") - .as_bytes(); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - assert_eq!(chunk_body, expected_chunk_body); + let expected_content = + fs::read("tests/integration/_fixtures/SrcGenSampleApp.pdb") + .expect("fixture file should be readable"); + + assert_eq!(decompressed.len(), 1, "expected exactly one chunk"); + assert!( + decompressed.contains(&expected_content), + "decompressed chunk should match the source file" + ); vec![] // Client does not expect a response body }), @@ -287,15 +255,6 @@ fn ensure_correct_chunk_upload() { #[test] /// This test verifies a correct chunk upload of multiple debug files. fn chunk_upload_multiple_files() { - /// This is the boundary used in the expected request file. - /// It was randomly generated when the expected request was recorded. - const EXPECTED_REQUEST_BOUNDARY: &str = "------------------------b26LKrHFvpOPfwMoDhYNY8"; - - let expected_chunk_body = fs::read( - "tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin", - ) - .expect("expected chunk body file should be present"); - let is_first_assemble_call = AtomicBool::new(true); TestManager::new() .mock_endpoint( @@ -310,20 +269,21 @@ fn chunk_upload_multiple_files() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - let expected_chunks = chunk_upload::split_chunk_body( - &expected_chunk_body, - EXPECTED_REQUEST_BOUNDARY, - ) - .expect("expected chunk body is a valid multipart/form-data body"); + let fixture_dir = "tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files"; + let expected_files: std::collections::HashSet> = ["fibonacci", "fibonacci-fast", "main"] + .iter() + .map(|name| fs::read(format!("{fixture_dir}/{name}")).expect("fixture should be readable")) + .collect(); - // Using assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert!( - chunks == expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!(decompressed.len(), 3, "expected exactly three chunks"); + let decompressed: std::collections::HashSet<_> = + decompressed.into_iter().collect(); + assert_eq!( + decompressed, expected_files, + "decompressed chunks should match the fixture files" ); vec![] @@ -385,15 +345,6 @@ fn chunk_upload_multiple_files() { /// where one of the files is already uploaded. /// Only the missing files should be uploaded. fn chunk_upload_multiple_files_only_some() { - let expected_chunk_body = fs::read( - "tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files_only_some.bin", - ) - .expect("expected chunk body file should be present"); - - /// This is the boundary used in the expected request file. - /// It was randomly generated when the expected request was recorded. - const EXPECTED_REQUEST_BOUNDARY: &str = "------------------------mfIgsRj6pG8q8GGnJIShDh"; - let is_first_assemble_call = AtomicBool::new(true); TestManager::new() .mock_endpoint( @@ -407,20 +358,23 @@ fn chunk_upload_multiple_files_only_some() { .expect("content-type header should be a valid multipart/form-data header"); let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); - let expected_chunks = chunk_upload::split_chunk_body( - &expected_chunk_body, - EXPECTED_REQUEST_BOUNDARY, - ) - .expect("expected chunk body is a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - // Using assert! because in case of failure, the output with assert_eq! - // is too long to be useful. + let fixture_dir = "tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files"; + let all_fixtures: std::collections::HashSet> = ["fibonacci", "fibonacci-fast", "main"] + .iter() + .map(|name| fs::read(format!("{fixture_dir}/{name}")).expect("fixture should be readable")) + .collect(); + + // Only 2 of 3 files need uploading (one is already on the server). + assert_eq!(decompressed.len(), 2, "expected exactly two chunks"); + let decompressed: std::collections::HashSet<_> = + decompressed.into_iter().collect(); assert!( - chunks == expected_chunks, - "Uploaded chunks differ from the expected chunks" + decompressed.is_subset(&all_fixtures), + "uploaded chunks should be a subset of the fixture files" ); vec![] @@ -477,6 +431,59 @@ fn chunk_upload_multiple_files_only_some() { .run_and_assert(AssertCommand::Success); } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct SmallChunkUploadOptions { + chunk_size: usize, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct AssembleFileResponse { + missing_chunks: Vec, +} + +fn sha1_hex(data: &[u8]) -> String { + let mut sha = Sha1::new(); + sha.update(data); + sha.digest().to_string() +} + +fn expected_uploaded_chunk_digests(assemble_response_path: &str) -> BTreeMap { + let chunk_upload_options: SmallChunkUploadOptions = serde_json::from_slice( + &fs::read("tests/integration/_responses/debug_files/get-chunk-upload-small-chunks.json") + .expect("chunk upload options response file should be present"), + ) + .expect("chunk upload options response should be valid JSON"); + + let assemble_response: BTreeMap = serde_json::from_slice( + &fs::read(assemble_response_path).expect("assemble response file should be present"), + ) + .expect("assemble response should be valid JSON"); + + let fixture_dir = "tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files"; + let mut chunk_digests = BTreeMap::new(); + + for fixture_name in ["fibonacci", "fibonacci-fast", "main"] { + let fixture_data = fs::read(format!("{fixture_dir}/{fixture_name}")) + .expect("fixture file should be readable"); + let file_checksum = sha1_hex(&fixture_data); + let file_response = assemble_response + .get(&file_checksum) + .expect("assemble response should contain an entry for every fixture file"); + + for chunk_checksum in fixture_data + .chunks(chunk_upload_options.chunk_size) + .map(sha1_hex) + .filter(|chunk_checksum| file_response.missing_chunks.contains(chunk_checksum)) + { + *chunk_digests.entry(chunk_checksum).or_default() += 1; + } + } + + chunk_digests +} + #[test] /// This test verifies a correct chunk upload of multiple debug files /// with a small chunk size (2048 bytes). @@ -487,25 +494,14 @@ fn chunk_upload_multiple_files_small_chunks() { /// at most 64 chunks. const CHUNKS_PER_REQUEST: usize = 64; - /// 582 chunks will be uploaded in total. - const TOTAL_CHUNKS: usize = 582; - - /// We expect the smallest number of requests that can be used to upload - /// all chunks, given the maximum number of chunks per request. - const EXPECTED_REQUESTS: usize = TOTAL_CHUNKS.div_ceil(CHUNKS_PER_REQUEST); - - /// This is the boundary used in the expected request file. - /// It was randomly generated when the expected request was recorded. - const EXPECTED_CHUNKS_BOUNDARY: &str = "------------------------au0ff4bj3Ky3LCMhImSTsy"; - - // Store all chunks received at the chunk upload endpoint. - let received_chunks = Arc::new(Mutex::new(HashSet::>::new())); - let received_chunks_closure = received_chunks.clone(); + let expected_chunk_digests = expected_uploaded_chunk_digests( + "tests/integration/_responses/debug_files/assemble-chunk-upload-small-chunks.json", + ); + let expected_total_chunks: usize = expected_chunk_digests.values().sum(); + let expected_requests = expected_total_chunks.div_ceil(CHUNKS_PER_REQUEST); - let expected_chunks = fs::read( - "tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks.bin", - ) - .expect("expected chunks file should be present"); + let received_chunk_digests = Arc::new(Mutex::new(BTreeMap::::new())); + let received_chunk_digests_closure = received_chunk_digests.clone(); let is_first_assemble_call = AtomicBool::new(true); TestManager::new() @@ -554,20 +550,23 @@ fn chunk_upload_multiple_files_small_chunks() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); // No single request should contain more than CHUNKS_PER_REQUEST chunks. - assert!(chunks.len() <= CHUNKS_PER_REQUEST); + assert!(decompressed.len() <= CHUNKS_PER_REQUEST); - received_chunks_closure + let request_chunk_digests = chunk_upload::chunk_digest_counts(&decompressed); + let mut received_chunk_digests = received_chunk_digests_closure .lock() - .expect("should be able to lock mutex") - .extend(chunks.into_iter().map(|chunk| chunk.into())); + .expect("should be able to lock mutex"); + for (chunk_digest, count) in request_chunk_digests { + *received_chunk_digests.entry(chunk_digest).or_default() += count; + } vec![] }) - .expect(EXPECTED_REQUESTS), + .expect(expected_requests), ) .assert_cmd(vec![ "debug-files", @@ -577,28 +576,12 @@ fn chunk_upload_multiple_files_small_chunks() { .with_default_token() .run_and_assert(AssertCommand::Success); - // For simplicity, even though multiple requests are expected, the expected chunks - // are all stored in a single file, which was generated by setting a larger maximum - // number of chunks per request when generating the expected request. - let expected_chunks = - chunk_upload::split_chunk_body(&expected_chunks, EXPECTED_CHUNKS_BOUNDARY) - .expect("expected chunks file should be a valid multipart/form-data body"); - - // Transform received chunks from a set of vectors to a set of slices. - let received_chunks_guard = received_chunks + let received_chunk_digests = received_chunk_digests .lock() .expect("should be able to lock mutex"); - - let received_chunks: HashSet<_> = received_chunks_guard - .iter() - .map(|chunk| chunk.as_slice()) - .collect(); - - // Use assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert!( - received_chunks == expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!( + *received_chunk_digests, expected_chunk_digests, + "uploaded chunk digests should match the missing chunks requested by assemble" ); } @@ -613,25 +596,14 @@ fn chunk_upload_multiple_files_small_chunks_only_some() { /// at most 64 chunks. const CHUNKS_PER_REQUEST: usize = 64; - /// Total number of chunks which will be uploaded. - const TOTAL_CHUNKS: usize = 327; - - /// We expect the smallest number of requests that can be used to upload - /// all chunks, given the maximum number of chunks per request. - const EXPECTED_REQUESTS: usize = TOTAL_CHUNKS.div_ceil(CHUNKS_PER_REQUEST); - - /// This is the boundary used in the expected request file. - /// It was randomly generated when the expected request was recorded. - const EXPECTED_CHUNKS_BOUNDARY: &str = "------------------------7MEblR9wDkWDeus1Jn5Qwy"; - - // Store all chunks received at the chunk upload endpoint. - let received_chunks = Arc::new(Mutex::new(HashSet::>::new())); - let received_chunks_closure = received_chunks.clone(); + let expected_chunk_digests = expected_uploaded_chunk_digests( + "tests/integration/_responses/debug_files/assemble-chunk-upload-small-chunks-only-some-missing.json", + ); + let expected_total_chunks: usize = expected_chunk_digests.values().sum(); + let expected_requests = expected_total_chunks.div_ceil(CHUNKS_PER_REQUEST); - let expected_chunks = fs::read( - "tests/integration/_expected_requests/debug_files/upload/chunk_upload_small_chunks_only_some.bin", - ) - .expect("expected chunks file should be present"); + let received_chunk_digests = Arc::new(Mutex::new(BTreeMap::::new())); + let received_chunk_digests_closure = received_chunk_digests.clone(); let is_first_assemble_call = AtomicBool::new(true); @@ -681,20 +653,23 @@ fn chunk_upload_multiple_files_small_chunks_only_some() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); // No single request should contain more than CHUNKS_PER_REQUEST chunks. - assert!(chunks.len() <= CHUNKS_PER_REQUEST); + assert!(decompressed.len() <= CHUNKS_PER_REQUEST); - received_chunks_closure + let request_chunk_digests = chunk_upload::chunk_digest_counts(&decompressed); + let mut received_chunk_digests = received_chunk_digests_closure .lock() - .expect("should be able to lock mutex") - .extend(chunks.into_iter().map(|chunk| chunk.into())); + .expect("should be able to lock mutex"); + for (chunk_digest, count) in request_chunk_digests { + *received_chunk_digests.entry(chunk_digest).or_default() += count; + } vec![] }) - .expect(EXPECTED_REQUESTS), + .expect(expected_requests), ) .assert_cmd(vec![ "debug-files", @@ -704,28 +679,12 @@ fn chunk_upload_multiple_files_small_chunks_only_some() { .with_default_token() .run_and_assert(AssertCommand::Success); - // For simplicity, even though multiple requests are expected, the expected chunks - // are all stored in a single file, which was generated by setting a larger maximum - // number of chunks per request when generating the expected request. - let expected_chunks = - chunk_upload::split_chunk_body(&expected_chunks, EXPECTED_CHUNKS_BOUNDARY) - .expect("expected chunks file should be a valid multipart/form-data body"); - - // Transform received chunks from a set of vectors to a set of slices. - let received_chunks_guard = received_chunks + let received_chunk_digests = received_chunk_digests .lock() .expect("should be able to lock mutex"); - - let received_chunks: HashSet<_> = received_chunks_guard - .iter() - .map(|chunk| chunk.as_slice()) - .collect(); - - // Use assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert!( - received_chunks == expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!( + *received_chunk_digests, expected_chunk_digests, + "uploaded chunk digests should match the missing chunks requested by assemble" ); } diff --git a/tests/integration/proguard/upload.rs b/tests/integration/proguard/upload.rs index 2e9be130b1..98dd10c70e 100644 --- a/tests/integration/proguard/upload.rs +++ b/tests/integration/proguard/upload.rs @@ -72,13 +72,7 @@ fn chunk_upload_already_there() { #[test] fn chunk_upload_needs_upload() { - const EXPECTED_CHUNKS_BOUNDARY: &str = "------------------------w2uOUUnuLEYTmQorc0ix48"; - let call_count = AtomicU8::new(0); - let expected_chunk_body = fs::read( - "tests/integration/_expected_requests/proguard/upload/chunk_upload_needs_upload.bin", - ) - .expect("expected chunk upload request file should be readable"); TestManager::new() .mock_endpoint( @@ -93,20 +87,17 @@ fn chunk_upload_needs_upload() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - let expected_chunks = chunk_upload::split_chunk_body( - &expected_chunk_body, - EXPECTED_CHUNKS_BOUNDARY, - ) - .expect("expected body is valid multipart form data"); + let expected_content = + fs::read("tests/integration/_fixtures/proguard/upload/mapping.txt") + .expect("fixture file should be readable"); - // Using assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert_eq!( - chunks, expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!(decompressed.len(), 1, "expected exactly one chunk"); + assert!( + decompressed.contains(&expected_content), + "decompressed chunk should match the source file" ); vec![] @@ -167,12 +158,7 @@ fn chunk_upload_needs_upload() { #[test] fn chunk_upload_two_files() { - const EXPECTED_CHUNKS_BOUNDARY: &str = "------------------------HNdDRjCgjkRtu3COUTCcJV"; - let call_count = AtomicU8::new(0); - let expected_chunk_body = - fs::read("tests/integration/_expected_requests/proguard/upload/chunk_upload_two_files.bin") - .expect("expected chunk upload request file should be readable"); TestManager::new() .mock_endpoint( @@ -187,20 +173,20 @@ fn chunk_upload_two_files() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - let expected_chunks = chunk_upload::split_chunk_body( - &expected_chunk_body, - EXPECTED_CHUNKS_BOUNDARY, - ) - .expect("expected body is valid multipart form data"); + let expected_1 = + fs::read("tests/integration/_fixtures/proguard/upload/mapping.txt") + .expect("fixture file should be readable"); + let expected_2 = + fs::read("tests/integration/_fixtures/proguard/upload/mapping-2.txt") + .expect("fixture file should be readable"); - // Using assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert_eq!( - chunks, expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!(decompressed.len(), 2, "expected exactly two chunks"); + assert!( + decompressed.contains(&expected_1) && decompressed.contains(&expected_2), + "decompressed chunks should match the source files" ); vec![] diff --git a/tests/integration/react_native/xcode.rs b/tests/integration/react_native/xcode.rs index 9255b491d2..60b2abebe9 100644 --- a/tests/integration/react_native/xcode.rs +++ b/tests/integration/react_native/xcode.rs @@ -3,7 +3,7 @@ use crate::integration::TestManager; #[test] fn xcode_upload_source_maps_missing_plist() { TestManager::new() - .mock_common_upload_endpoints(None, None) + .mock_common_upload_endpoints_with(None, false) .register_trycmd_test("react_native/xcode-upload-source-maps-invalid-plist.trycmd") .with_default_token(); } @@ -11,7 +11,7 @@ fn xcode_upload_source_maps_missing_plist() { #[test] fn xcode_upload_source_maps_release_and_dist_from_env() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["60f215dae7d29497357013d08c35e93716b6a46c"])) + .mock_common_upload_endpoints() .register_trycmd_test( "react_native/xcode-upload-source-maps-release_and_dist_from_env.trycmd", ) diff --git a/tests/integration/sourcemaps/upload.rs b/tests/integration/sourcemaps/upload.rs index 4d74feecf7..dac331d696 100644 --- a/tests/integration/sourcemaps/upload.rs +++ b/tests/integration/sourcemaps/upload.rs @@ -13,7 +13,7 @@ fn command_sourcemaps_upload() { #[test] fn command_sourcemaps_upload_modern() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["95d152c0530efb498133138c7e7092612f5abab1"])) + .mock_common_upload_endpoints() .register_trycmd_test("sourcemaps/sourcemaps-upload-modern.trycmd") .with_default_token() .assert_mock_endpoints(); @@ -22,10 +22,7 @@ fn command_sourcemaps_upload_modern() { #[test] fn command_sourcemaps_upload_modern_v2() { TestManager::new() - .mock_common_upload_endpoints( - Some(512), - Some(vec!["ec8450a9db19805703a27a2545c18b7b27ba0d7d"]), - ) + .mock_common_upload_endpoints_with(Some(512), true) .register_trycmd_test("sourcemaps/sourcemaps-upload-modern.trycmd") .with_default_token() .assert_mock_endpoints(); @@ -34,7 +31,7 @@ fn command_sourcemaps_upload_modern_v2() { #[test] fn command_sourcemaps_upload_some_debugids() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["5c854c641249fb5ba1075735c68980f9f7ed72b6"])) + .mock_common_upload_endpoints() .register_trycmd_test("sourcemaps/sourcemaps-upload-some-debugids.trycmd") .with_default_token() .assert_mock_endpoints(); @@ -44,7 +41,7 @@ fn command_sourcemaps_upload_some_debugids() { #[test] fn command_sourcemaps_upload_debugid_alias() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["4d668e3d5e4e436057d4b7157a450a9b7f130dfa"])) + .mock_common_upload_endpoints() .register_trycmd_test("sourcemaps/sourcemaps-upload-debugid-alias.trycmd") .with_default_token() .assert_mock_endpoints(); @@ -53,7 +50,7 @@ fn command_sourcemaps_upload_debugid_alias() { #[test] fn command_sourcemaps_upload_no_debugids() { TestManager::new() - .mock_common_upload_endpoints(None, None) + .mock_common_upload_endpoints_with(None, false) .register_trycmd_test("sourcemaps/sourcemaps-upload-no-debugids.trycmd") .with_default_token(); } @@ -61,7 +58,7 @@ fn command_sourcemaps_upload_no_debugids() { #[test] fn command_sourcemaps_upload_file_ram_bundle() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["e268173df7cbb38ca44334572c2815a264a2c28f"])) + .mock_common_upload_endpoints() .register_trycmd_test("sourcemaps/sourcemaps-upload-file-ram-bundle.trycmd") .with_default_token(); } @@ -69,7 +66,7 @@ fn command_sourcemaps_upload_file_ram_bundle() { #[test] fn command_sourcemaps_upload_indexed_ram_bundle() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["47ef8e33f7213b9baa452715d04e251c090d0aaa"])) + .mock_common_upload_endpoints() .register_trycmd_test("sourcemaps/sourcemaps-upload-indexed-ram-bundle.trycmd") .with_default_token(); } @@ -77,7 +74,7 @@ fn command_sourcemaps_upload_indexed_ram_bundle() { #[test] fn command_sourcemaps_upload_hermes_bundle_with_referencing_debug_id() { TestManager::new() - .mock_common_upload_endpoints(None, Some(vec!["a67a1e76159fc49e2fcc432fba8dbcd5d9696a73"])) + .mock_common_upload_endpoints() .register_trycmd_test( "sourcemaps/sourcemaps-upload-file-hermes-bundle-reference-debug-id.trycmd", ) @@ -87,7 +84,7 @@ fn command_sourcemaps_upload_hermes_bundle_with_referencing_debug_id() { #[test] fn command_sourcemaps_upload_cjs_mjs() { TestManager::new() - .mock_common_upload_endpoints(None, None) + .mock_common_upload_endpoints_with(None, false) .register_trycmd_test("sourcemaps/sourcemaps-upload-cjs-mjs.trycmd") .with_default_token(); } @@ -95,7 +92,7 @@ fn command_sourcemaps_upload_cjs_mjs() { #[test] fn command_sourcemaps_upload_complex_extension() { TestManager::new() - .mock_common_upload_endpoints(None, None) + .mock_common_upload_endpoints_with(None, false) .register_trycmd_test("sourcemaps/sourcemaps-upload-complex-extension.trycmd") .with_default_token(); } @@ -103,7 +100,7 @@ fn command_sourcemaps_upload_complex_extension() { #[test] fn command_sourcemaps_upload_skip_invalid_utf8() { TestManager::new() - .mock_common_upload_endpoints(None, None) + .mock_common_upload_endpoints_with(None, false) .register_trycmd_test("sourcemaps/sourcemaps-with-invalid-utf8.trycmd") .with_default_token(); } diff --git a/tests/integration/test_utils/chunk_upload.rs b/tests/integration/test_utils/chunk_upload.rs index 273da24a9a..eda9311e53 100644 --- a/tests/integration/test_utils/chunk_upload.rs +++ b/tests/integration/test_utils/chunk_upload.rs @@ -1,11 +1,14 @@ //! Utilities for chunk upload tests. -use std::collections::HashSet; +use std::collections::BTreeMap; use std::error::Error; +use std::io::Read as _; use std::str; use std::sync::LazyLock; +use flate2::read::GzDecoder; use mockito::Request; use regex::bytes::Regex; +use sha1_smol::Sha1; /// This regex is used to extract the boundary from the content-type header. /// We need to match the boundary, since it changes with each request. @@ -34,14 +37,13 @@ impl HeaderContainer for Request { } /// Split a multipart/form-data body into its constituent chunks. -/// The chunks are returned as a set, since chunk uploading code -/// does not guarantee any specific order of the chunks in the body. -/// We only want to check the invariant that each expected chunk is -/// in the body, not the order of the chunks. +/// +/// The returned vector preserves duplicate chunks. Callers that do not care +/// about multiplicity can collect into a set explicitly. pub fn split_chunk_body<'b>( body: &'b [u8], boundary: &str, -) -> Result, Box> { +) -> Result, Box> { let escaped_boundary = regex::escape(boundary); let inner_body = entire_body_regex(&escaped_boundary) @@ -51,11 +53,6 @@ pub fn split_chunk_body<'b>( .expect("the regex has a \"body\" capture group which should always match") .as_bytes(); - // Using HashSet does have the small disadvantage that we don't - // preserve the count of any duplicate chunks, so our tests will - // fail to detect when the same chunk is included multiple times - // (this would be a bug). But, this way, we don't need to keep - // track of counts of chunks. Ok(boundary_regex(&escaped_boundary) .split(inner_body) .collect()) @@ -105,3 +102,45 @@ fn entire_body_regex(regex_escaped_boundary: &str) -> Regex { fn boundary_regex(regex_escaped_boundary: &str) -> Regex { Regex::new(&format!(r#"--{regex_escaped_boundary}"#)).expect("This regex should be valid") } + +/// Regex to separate multipart headers from the body (separated by \r\n\r\n). +static HEADER_BODY_SEPARATOR: LazyLock = + LazyLock::new(|| Regex::new(r"\r\n\r\n").expect("Regex is valid")); + +/// Extract and decompress the file contents from a multipart chunk upload request. +/// Each chunk part has headers followed by a gzip-compressed body. This function +/// strips the multipart headers, decompresses each chunk, and preserves duplicate +/// chunk contents. +pub fn decompress_chunks(body: &[u8], boundary: &str) -> Result>, Box> { + let parts = split_chunk_body(body, boundary)?; + let mut decompressed = Vec::with_capacity(parts.len()); + for part in parts { + // Each part is: \r\nHeaders\r\n\r\n + // Split on the first \r\n\r\n to separate headers from body. + if let Some(m) = HEADER_BODY_SEPARATOR.find(part) { + let compressed = &part[m.end()..]; + let mut decoder = GzDecoder::new(compressed); + let mut content = Vec::new(); + decoder.read_to_end(&mut content)?; + decompressed.push(content); + } + } + Ok(decompressed) +} + +/// Count chunks by SHA1 digest while preserving duplicate occurrences. +pub fn chunk_digest_counts(chunks: I) -> BTreeMap +where + I: IntoIterator, + T: AsRef<[u8]>, +{ + let mut counts = BTreeMap::new(); + + for chunk in chunks { + let mut sha = Sha1::new(); + sha.update(chunk.as_ref()); + *counts.entry(sha.digest().to_string()).or_default() += 1; + } + + counts +} diff --git a/tests/integration/test_utils/mock_common_endpoints.rs b/tests/integration/test_utils/mock_common_endpoints.rs index fb4a6fb533..f7db29bd8b 100644 --- a/tests/integration/test_utils/mock_common_endpoints.rs +++ b/tests/integration/test_utils/mock_common_endpoints.rs @@ -12,7 +12,7 @@ use crate::integration::test_utils::MockEndpointBuilder; pub(super) fn common_upload_endpoints( server_url: impl Display, chunk_size: Option, - initial_missing_chunks: Option>, + simulate_missing_chunks: bool, ) -> impl Iterator { let chunk_size = chunk_size.unwrap_or(8388608); let assemble_endpoint = "/api/0/organizations/wat-org/artifactbundle/assemble/"; @@ -36,22 +36,29 @@ pub(super) fn common_upload_endpoints( MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/") .with_response_body("[]"), MockEndpointBuilder::new("POST", assemble_endpoint) - .with_response_fn(move |_| { - let response = if let Some(missing_chunks) = is_first_request - .swap(false, Ordering::Relaxed) - .then_some(initial_missing_chunks.as_ref()) - .flatten() - { - json!({ - "state": "not_found", - "missingChunks": missing_chunks, - }) - } else { - json!({ - "state": "created", - "missingChunks": [], - }) - }; + .with_response_fn(move |request| { + let body = request.body().expect("body should be readable"); + let response = + if is_first_request.swap(false, Ordering::Relaxed) && simulate_missing_chunks { + // On the first request, report all chunks from the request body + // as missing so the CLI uploads them. + let parsed: serde_json::Value = serde_json::from_slice(body) + .expect("assemble body should be valid JSON"); + let chunks = parsed + .get("chunks") + .and_then(|c| c.as_array()) + .cloned() + .unwrap_or_default(); + json!({ + "state": "not_found", + "missingChunks": chunks, + }) + } else { + json!({ + "state": "created", + "missingChunks": [], + }) + }; serde_json::to_vec(&response).expect("failed to serialize response") }) diff --git a/tests/integration/test_utils/test_manager.rs b/tests/integration/test_utils/test_manager.rs index 025caab7cc..8faf16e324 100644 --- a/tests/integration/test_utils/test_manager.rs +++ b/tests/integration/test_utils/test_manager.rs @@ -44,15 +44,19 @@ impl TestManager { } /// Mock the common upload endpoints. - pub fn mock_common_upload_endpoints( + pub fn mock_common_upload_endpoints(self) -> Self { + self.mock_common_upload_endpoints_with(None, true) + } + + pub fn mock_common_upload_endpoints_with( self, chunk_size: Option, - initial_missing_chunks: Option>, + simulate_missing_chunks: bool, ) -> Self { mock_common_endpoints::common_upload_endpoints( self.server_url(), chunk_size, - initial_missing_chunks, + simulate_missing_chunks, ) .fold(self, |manager, builder| manager.mock_endpoint(builder)) } diff --git a/tests/integration/upload_proguard.rs b/tests/integration/upload_proguard.rs index 6f5d1ee6ab..574da5353b 100644 --- a/tests/integration/upload_proguard.rs +++ b/tests/integration/upload_proguard.rs @@ -1,5 +1,5 @@ +use std::fs; use std::sync::atomic::{AtomicU8, Ordering}; -use std::{fs, str}; use mockito::Matcher; @@ -71,13 +71,7 @@ fn chunk_upload_already_there() { #[test] fn chunk_upload_needs_upload() { - const EXPECTED_CHUNKS_BOUNDARY: &str = "------------------------w2uOUUnuLEYTmQorc0ix48"; - let call_count = AtomicU8::new(0); - let expected_chunk_body = fs::read( - "tests/integration/_expected_requests/proguard/upload/chunk_upload_needs_upload.bin", - ) - .expect("expected chunk upload request file should be readable"); TestManager::new() .mock_endpoint( @@ -92,20 +86,17 @@ fn chunk_upload_needs_upload() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - let expected_chunks = chunk_upload::split_chunk_body( - &expected_chunk_body, - EXPECTED_CHUNKS_BOUNDARY, - ) - .expect("expected body is valid multipart form data"); + let expected_content = + fs::read("tests/integration/_fixtures/proguard/upload/mapping.txt") + .expect("fixture file should be readable"); - // Using assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert_eq!( - chunks, expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!(decompressed.len(), 1, "expected exactly one chunk"); + assert!( + decompressed.contains(&expected_content), + "decompressed chunk should match the source file" ); vec![] @@ -164,12 +155,7 @@ fn chunk_upload_needs_upload() { #[test] fn chunk_upload_two_files() { - const EXPECTED_CHUNKS_BOUNDARY: &str = "------------------------HNdDRjCgjkRtu3COUTCcJV"; - let call_count = AtomicU8::new(0); - let expected_chunk_body = - fs::read("tests/integration/_expected_requests/proguard/upload/chunk_upload_two_files.bin") - .expect("expected chunk upload request file should be readable"); TestManager::new() .mock_endpoint( @@ -184,20 +170,20 @@ fn chunk_upload_two_files() { let body = request.body().expect("body should be readable"); - let chunks = chunk_upload::split_chunk_body(body, boundary) - .expect("body should be a valid multipart/form-data body"); + let decompressed = chunk_upload::decompress_chunks(body, boundary) + .expect("chunks should be valid gzip data"); - let expected_chunks = chunk_upload::split_chunk_body( - &expected_chunk_body, - EXPECTED_CHUNKS_BOUNDARY, - ) - .expect("expected body is valid multipart form data"); + let expected_1 = + fs::read("tests/integration/_fixtures/proguard/upload/mapping.txt") + .expect("fixture file should be readable"); + let expected_2 = + fs::read("tests/integration/_fixtures/proguard/upload/mapping-2.txt") + .expect("fixture file should be readable"); - // Using assert! because in case of failure, the output with assert_eq! - // is too long to be useful. - assert_eq!( - chunks, expected_chunks, - "Uploaded chunks differ from the expected chunks" + assert_eq!(decompressed.len(), 2, "expected exactly two chunks"); + assert!( + decompressed.contains(&expected_1) && decompressed.contains(&expected_2), + "decompressed chunks should match the source files" ); vec![]