refact: reorganize raw_datas and adapt tests

This commit is contained in:
2025-01-05 06:42:51 +01:00
parent 86f0dcc49e
commit f0315d09b9
10 changed files with 77 additions and 61 deletions

View File

@@ -13,26 +13,11 @@ FIXTURE_DIR = Path(__file__).parent.parent / Path("raw_datas")
@pytest.fixture
def repository(tmp_path) -> FSRepository:
raw_path = Path(tmp_path) / "raw"
raw_path.mkdir()
example_src = FIXTURE_DIR
assert example_src.exists()
recovery_loc = raw_path / "recovery"
recovery_loc.mkdir()
username_loc = raw_path / "username"
username_loc.mkdir()
salary_loc = raw_path / "salary"
salary_loc.mkdir()
for f in example_src.glob("*"):
if "recovery" in str(f):
shutil.copy(f, recovery_loc)
elif "salary" in str(f):
shutil.copy(f, salary_loc)
else:
shutil.copy(f, username_loc)
raw_path = Path(tmp_path) / "raw"
shutil.copytree(src=example_src.absolute(), dst=raw_path.absolute())
bronze_path = Path(tmp_path) / "bronze"
bronze_path.mkdir()
@@ -123,11 +108,16 @@ def dataplatform(
def test_listing_content(dataplatform: DataPlateform):
assert dataplatform.repository("test").schemas() == ["raw", "bronze", "silver"]
assert dataplatform.repository("test").schema("raw").tables == [
"recovery",
"username",
"recovery",
"salary",
]
assert dataplatform.repository("test").table("raw", "username").partitions == ["username.csv"]
assert dataplatform.repository("test").table("raw", "recovery").partitions == [
"2022.csv",
"2023.csv",
"2024.csv",
]
def test_execute_flux(dataplatform: DataPlateform):
@@ -137,6 +127,6 @@ def test_execute_flux(dataplatform: DataPlateform):
assert dataplatform.repository("test").schema("bronze").tables == []
meta = dataplatform.execute_flux("raw_brz_copy_username")
assert meta.data == {"src_size": 175, "tgt_size": 175}
assert meta.data == {"src_size": 283, "tgt_size": 283}
assert dataplatform.repository("test").schema("bronze").tables == ["username"]