Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/crawlee/storages/_storage_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,20 @@ def remove_from_cache(self, storage_instance: Storage) -> None:
storage_type = type(storage_instance)

# Remove from ID cache
for additional_key in self.by_id[storage_type][storage_instance.id]:
del self.by_id[storage_type][storage_instance.id][additional_key]
break
for additional_key, cached in list(self.by_id[storage_type][storage_instance.id].items()):
if cached is storage_instance:
del self.by_id[storage_type][storage_instance.id][additional_key]

# Remove from name cache or alias cache. It can never be in both.
if storage_instance.name is not None:
for additional_key in self.by_name[storage_type][storage_instance.name]:
del self.by_name[storage_type][storage_instance.name][additional_key]
break
for additional_key, cached in list(self.by_name[storage_type][storage_instance.name].items()):
if cached is storage_instance:
del self.by_name[storage_type][storage_instance.name][additional_key]
else:
for alias_key in self.by_alias[storage_type]:
for additional_key in self.by_alias[storage_type][alias_key]:
del self.by_alias[storage_type][alias_key][additional_key]
break
for additional_key, cached in list(self.by_alias[storage_type][alias_key].items()):
if cached is storage_instance:
del self.by_alias[storage_type][alias_key][additional_key]


ClientOpenerCoro = Coroutine[None, None, DatasetClient | KeyValueStoreClient | RequestQueueClient]
Expand Down
25 changes: 25 additions & 0 deletions tests/unit/storages/test_storage_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,28 @@ async def open_dataset(name: str | None, alias: str | None) -> None:
dataset = await Dataset.open(name=valid_kwargs.get('name'), alias=valid_kwargs.get('alias'))

await dataset.drop()


@pytest.mark.parametrize('name', ['my-storage', None], ids=['named', 'default'])
@pytest.mark.parametrize('drop_first', [True, False], ids=['drop-first', 'drop-second'])
async def test_drop_only_evicts_own_cache_entry(
tmp_path: Path, storage_type: type[Storage], name: str | None, *, drop_first: bool
) -> None:
"""Dropping a storage evicts only its own cache entry; siblings with the same name but a different client stay."""
config = Configuration(purge_on_start=True, storage_dir=str(tmp_path))
mem_client = MemoryStorageClient()
fs_client = FileSystemStorageClient()

# Two different client types produce different cache keys under the same name.
storage_mem = await storage_type.open(name=name, storage_client=mem_client, configuration=config)
storage_fs = await storage_type.open(name=name, storage_client=fs_client, configuration=config)
assert storage_mem is not storage_fs

if drop_first:
await storage_mem.drop()
assert await storage_type.open(name=name, storage_client=fs_client, configuration=config) is storage_fs
assert await storage_type.open(name=name, storage_client=mem_client, configuration=config) is not storage_mem
else:
await storage_fs.drop()
assert await storage_type.open(name=name, storage_client=mem_client, configuration=config) is storage_mem
assert await storage_type.open(name=name, storage_client=fs_client, configuration=config) is not storage_fs
Loading