refact: repo id are not based on path but on id
This commit is contained in:
parent
ae61fd3c12
commit
8a43a93cda
@ -7,25 +7,10 @@ from plesna.models.storage import Partition, Schema, Table
|
|||||||
from plesna.storage.repository.repository import Repository
|
from plesna.storage.repository.repository import Repository
|
||||||
|
|
||||||
|
|
||||||
class FSPartition(BaseModel):
|
|
||||||
name: str
|
|
||||||
path: Path
|
|
||||||
|
|
||||||
@computed_field
|
|
||||||
@property
|
|
||||||
def ref(self) -> Partition:
|
|
||||||
return Partition(
|
|
||||||
id=str(self.path),
|
|
||||||
repo_id=str(self.path.parent.parent.parent),
|
|
||||||
schema_id=str(self.path.parent.parent),
|
|
||||||
table_id=str(self.path.parent),
|
|
||||||
name=self.name,
|
|
||||||
value=str(self.path.absolute()),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FSTable(BaseModel):
|
class FSTable(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
|
repo_id: str
|
||||||
|
schema_id: str
|
||||||
id: str
|
id: str
|
||||||
path: Path
|
path: Path
|
||||||
is_partitionned: bool
|
is_partitionned: bool
|
||||||
@ -41,8 +26,8 @@ class FSTable(BaseModel):
|
|||||||
|
|
||||||
return Table(
|
return Table(
|
||||||
id=self.id,
|
id=self.id,
|
||||||
repo_id=str(self.path.parent.parent),
|
repo_id=self.repo_id,
|
||||||
schema_id=str(self.path.parent),
|
schema_id=self.schema_id,
|
||||||
name=self.name,
|
name=self.name,
|
||||||
value=str(self.path.absolute()),
|
value=str(self.path.absolute()),
|
||||||
partitions=self.partitions,
|
partitions=self.partitions,
|
||||||
@ -52,6 +37,7 @@ class FSTable(BaseModel):
|
|||||||
|
|
||||||
class FSSchema(BaseModel):
|
class FSSchema(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
|
repo_id: str
|
||||||
id: str
|
id: str
|
||||||
path: Path
|
path: Path
|
||||||
tables: list[str]
|
tables: list[str]
|
||||||
@ -61,7 +47,7 @@ class FSSchema(BaseModel):
|
|||||||
def ref(self) -> Schema:
|
def ref(self) -> Schema:
|
||||||
return Schema(
|
return Schema(
|
||||||
id=self.id,
|
id=self.id,
|
||||||
repo_id=str(self.path.parent),
|
repo_id=self.repo_id,
|
||||||
name=self.name,
|
name=self.name,
|
||||||
value=str(self.path.absolute()),
|
value=str(self.path.absolute()),
|
||||||
tables=self.tables,
|
tables=self.tables,
|
||||||
@ -82,8 +68,8 @@ class FSRepository(Repository):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
ID_FMT = {
|
ID_FMT = {
|
||||||
"schema": "{repo_name}-{schema_name}",
|
"schema": "{repo_id}-{schema_name}",
|
||||||
"table": "{repo_name}-{schema_name}-{table_name}",
|
"table": "{schema_id}-{table_name}",
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, id: str, name: str, basepath: str):
|
def __init__(self, id: str, name: str, basepath: str):
|
||||||
@ -139,22 +125,29 @@ class FSRepository(Repository):
|
|||||||
"""List schemas (sub directories within basepath)"""
|
"""List schemas (sub directories within basepath)"""
|
||||||
subdirectories = self.ls("", only_directories=True)
|
subdirectories = self.ls("", only_directories=True)
|
||||||
return [
|
return [
|
||||||
self.ID_FMT["schema"].format(repo_name=self.name, schema_name=d) for d in subdirectories
|
self.ID_FMT["schema"].format(repo_id=self.id, schema_name=d) for d in subdirectories
|
||||||
]
|
]
|
||||||
|
|
||||||
def _schema(self, schema_id: str) -> FSSchema:
|
def _schema(self, schema_id: str) -> FSSchema:
|
||||||
"""List schemas (sub directories within basepath)"""
|
"""List schemas (sub directories within basepath)"""
|
||||||
parsed = self.parse_id(schema_id, "schema")
|
parsed = self.parse_id(schema_id, "schema")
|
||||||
|
|
||||||
repo_name = parsed["repo_name"]
|
repo_id = parsed["repo_id"]
|
||||||
schema_name = parsed["schema_name"]
|
schema_name = parsed["schema_name"]
|
||||||
schema_path = self._basepath / schema_name
|
schema_path = self._basepath / schema_name
|
||||||
|
|
||||||
if repo_name != self.name:
|
if repo_id != self.id:
|
||||||
raise FSRepositoryError("Trying to get schema that don't belong in this repository")
|
raise FSRepositoryError("Trying to get schema that don't belong in this repository")
|
||||||
|
|
||||||
tables = self.tables(schema_id)
|
tables = self.tables(schema_id)
|
||||||
return FSSchema(name=schema_name, id=schema_id, path=schema_path, tables=tables)
|
return FSSchema(
|
||||||
|
name=schema_name,
|
||||||
|
id=schema_id,
|
||||||
|
repo_id=self.id,
|
||||||
|
schema_id=schema_id,
|
||||||
|
path=schema_path,
|
||||||
|
tables=tables,
|
||||||
|
)
|
||||||
|
|
||||||
def schema(self, schema_id: str) -> Schema:
|
def schema(self, schema_id: str) -> Schema:
|
||||||
return self._schema(schema_id).ref
|
return self._schema(schema_id).ref
|
||||||
@ -162,7 +155,7 @@ class FSRepository(Repository):
|
|||||||
def _tables(self, schema_id: str) -> list[str]:
|
def _tables(self, schema_id: str) -> list[str]:
|
||||||
parsed = self.parse_id(schema_id, "schema")
|
parsed = self.parse_id(schema_id, "schema")
|
||||||
tables = self.ls(parsed["schema_name"])
|
tables = self.ls(parsed["schema_name"])
|
||||||
return [self.ID_FMT["table"].format(table_name=t, **parsed) for t in tables]
|
return [self.ID_FMT["table"].format(table_name=t, schema_id=schema_id) for t in tables]
|
||||||
|
|
||||||
def tables(self, schema_id: str = "") -> list[str]:
|
def tables(self, schema_id: str = "") -> list[str]:
|
||||||
if schema_id:
|
if schema_id:
|
||||||
@ -176,14 +169,12 @@ class FSRepository(Repository):
|
|||||||
def _table(self, table_id: str) -> FSTable:
|
def _table(self, table_id: str) -> FSTable:
|
||||||
"""Get infos on the table"""
|
"""Get infos on the table"""
|
||||||
parsed = self.parse_id(table_id, "table")
|
parsed = self.parse_id(table_id, "table")
|
||||||
if parsed["repo_name"] != self.name:
|
schema = self._schema(parsed["schema_id"])
|
||||||
raise FSRepositoryError("Trying to get table that don't belong in this repository")
|
|
||||||
|
|
||||||
schema_path = self._basepath / parsed["schema_name"]
|
if not schema.path.exists():
|
||||||
if not schema_path.exists():
|
raise FSRepositoryError(f"The schema {schema.id} does not exists.")
|
||||||
raise FSRepositoryError(f"The schema {parsed['schema_name']} does not exists.")
|
|
||||||
|
|
||||||
table_subpath = f"{parsed['schema_name']}/{parsed['table_name']}"
|
table_subpath = f"{schema.name}/{parsed['table_name']}"
|
||||||
table_path = self._basepath / table_subpath
|
table_path = self._basepath / table_subpath
|
||||||
|
|
||||||
is_partitionned = table_path.is_dir()
|
is_partitionned = table_path.is_dir()
|
||||||
@ -195,6 +186,8 @@ class FSRepository(Repository):
|
|||||||
return FSTable(
|
return FSTable(
|
||||||
name=parsed["table_name"],
|
name=parsed["table_name"],
|
||||||
id=table_id,
|
id=table_id,
|
||||||
|
repo_id=self.id,
|
||||||
|
schema_id=schema.id,
|
||||||
path=table_path,
|
path=table_path,
|
||||||
is_partitionned=is_partitionned,
|
is_partitionned=is_partitionned,
|
||||||
partitions=partitions,
|
partitions=partitions,
|
||||||
|
@ -45,50 +45,50 @@ def test_init(location):
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def repository(location) -> FSRepository:
|
def repository(location) -> FSRepository:
|
||||||
return FSRepository("example", "example", location)
|
return FSRepository("repo_id", "example", location)
|
||||||
|
|
||||||
|
|
||||||
def test_list_schemas(repository):
|
def test_list_schemas(repository):
|
||||||
assert repository.schemas() == ["example-schema"]
|
assert repository.schemas() == ["repo_id-schema"]
|
||||||
|
|
||||||
|
|
||||||
def test_describe_schema(location, repository):
|
def test_describe_schema(location, repository):
|
||||||
schema = repository.schema("example-schema")
|
schema = repository.schema("repo_id-schema")
|
||||||
assert schema.name == "schema"
|
assert schema.name == "schema"
|
||||||
assert schema.id == "example-schema"
|
assert schema.id == "repo_id-schema"
|
||||||
assert schema.repo_id == str(location)
|
assert schema.repo_id == "repo_id"
|
||||||
assert schema.value == str(location / "schema")
|
assert schema.value == str(location / "schema")
|
||||||
assert schema.tables == [
|
assert schema.tables == [
|
||||||
"example-schema-username",
|
"repo_id-schema-username",
|
||||||
"example-schema-recovery",
|
"repo_id-schema-recovery",
|
||||||
"example-schema-salary",
|
"repo_id-schema-salary",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_list_tables_schema(repository):
|
def test_list_tables_schema(repository):
|
||||||
assert repository.schema("example-schema").tables == [
|
assert repository.schema("repo_id-schema").tables == [
|
||||||
"example-schema-username",
|
"repo_id-schema-username",
|
||||||
"example-schema-recovery",
|
"repo_id-schema-recovery",
|
||||||
"example-schema-salary",
|
"repo_id-schema-salary",
|
||||||
]
|
]
|
||||||
assert repository.tables("example-schema") == [
|
assert repository.tables("repo_id-schema") == [
|
||||||
"example-schema-username",
|
"repo_id-schema-username",
|
||||||
"example-schema-recovery",
|
"repo_id-schema-recovery",
|
||||||
"example-schema-salary",
|
"repo_id-schema-salary",
|
||||||
]
|
]
|
||||||
assert repository.tables() == [
|
assert repository.tables() == [
|
||||||
"example-schema-username",
|
"repo_id-schema-username",
|
||||||
"example-schema-recovery",
|
"repo_id-schema-recovery",
|
||||||
"example-schema-salary",
|
"repo_id-schema-salary",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_describe_table(location, repository):
|
def test_describe_table(location, repository):
|
||||||
table = repository.table("example-schema-username")
|
table = repository.table("repo_id-schema-username")
|
||||||
|
|
||||||
assert table.id == "example-schema-username"
|
assert table.id == "repo_id-schema-username"
|
||||||
assert table.repo_id == str(location)
|
assert table.repo_id == "repo_id"
|
||||||
assert table.schema_id == str(location / "schema")
|
assert table.schema_id == "repo_id-schema"
|
||||||
assert table.name == "username"
|
assert table.name == "username"
|
||||||
assert table.value == str(location / "schema" / "username")
|
assert table.value == str(location / "schema" / "username")
|
||||||
assert table.partitions == ["username.csv"]
|
assert table.partitions == ["username.csv"]
|
||||||
@ -96,11 +96,11 @@ def test_describe_table(location, repository):
|
|||||||
|
|
||||||
|
|
||||||
def test_describe_table_with_partitions(location, repository):
|
def test_describe_table_with_partitions(location, repository):
|
||||||
table = repository.table("example-schema-recovery")
|
table = repository.table("repo_id-schema-recovery")
|
||||||
|
|
||||||
assert table.id == "example-schema-recovery"
|
assert table.id == "repo_id-schema-recovery"
|
||||||
assert table.repo_id == str(location)
|
assert table.repo_id == "repo_id"
|
||||||
assert table.schema_id == str(location / "schema")
|
assert table.schema_id == "repo_id-schema"
|
||||||
assert table.name == "recovery"
|
assert table.name == "recovery"
|
||||||
assert table.value == str(location / "schema" / "recovery")
|
assert table.value == str(location / "schema" / "recovery")
|
||||||
assert table.partitions == [
|
assert table.partitions == [
|
||||||
|
Loading…
Reference in New Issue
Block a user