-
Notifications
You must be signed in to change notification settings - Fork 118
Refactor do_connect for better logging #504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -350,6 +350,13 @@ def __init__( | |
| max_concurrency = batch_size | ||
| self.max_concurrency = max_concurrency | ||
|
|
||
| @property | ||
| def account_url(self) -> str: | ||
| if hasattr(self, "account_host"): | ||
| return f"https://{self.account_host}" | ||
| else: | ||
| return f"https://{self.account_name}.blob.core.windows.net" | ||
|
|
||
| @classmethod | ||
| def _strip_protocol(cls, path: str): | ||
| """ | ||
|
|
@@ -464,55 +471,15 @@ def _get_default_azure_credential(self, **kwargs): | |
|
|
||
| def do_connect(self): | ||
| """Connect to the BlobServiceClient, using user-specified connection details. | ||
| Tries credentials first, then connection string and finally account key | ||
| Tries connection string first, then credential and finally account key | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError if none of the connection details are available | ||
| """ | ||
|
|
||
| try: | ||
| if self.connection_string is not None: | ||
| self.service_client = AIOBlobServiceClient.from_connection_string( | ||
| conn_str=self.connection_string | ||
| ) | ||
| elif self.account_name is not None: | ||
| if hasattr(self, "account_host"): | ||
| self.account_url: str = f"https://{self.account_host}" | ||
| else: | ||
| self.account_url: str = ( | ||
| f"https://{self.account_name}.blob.core.windows.net" | ||
| ) | ||
|
|
||
| creds = [self.credential, self.account_key] | ||
| if any(creds): | ||
| self.service_client = [ | ||
| AIOBlobServiceClient( | ||
| account_url=self.account_url, | ||
| credential=cred, | ||
| _location_mode=self.location_mode, | ||
| ) | ||
| for cred in creds | ||
| if cred is not None | ||
| ][0] | ||
| elif self.sas_token is not None: | ||
| if not self.sas_token.startswith("?"): | ||
| self.sas_token = f"?{self.sas_token}" | ||
| self.service_client = AIOBlobServiceClient( | ||
| account_url=self.account_url + self.sas_token, | ||
| credential=None, | ||
| _location_mode=self.location_mode, | ||
| ) | ||
| else: | ||
| # Fall back to anonymous login, and assume public container | ||
| self.service_client = AIOBlobServiceClient( | ||
| account_url=self.account_url | ||
| ) | ||
| else: | ||
| raise ValueError( | ||
| "Must provide either a connection_string or account_name with credentials!!" | ||
| ) | ||
|
|
||
| self.service_client = self._get_service_client() | ||
| except RuntimeError: | ||
| loop = get_loop() | ||
| asyncio.set_event_loop(loop) | ||
|
|
@@ -521,6 +488,46 @@ def do_connect(self): | |
| except Exception as e: | ||
| raise ValueError(f"unable to connect to account for {e}") from e | ||
|
|
||
| def _get_service_client(self) -> AIOBlobServiceClient: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we hoist this helper internal method to the module level instead of making it a method on the class? Mainly that helps better avoid accessing internal methods of one class from another class.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really. It uses a lot of AzureBlobFileSystem attributes, so it needs the AzureBlobFileSystem object, and might as well just be a method. So I'd prefer to make it a public method ( The other option is to make a public method that prepares the arguments (e.g. |
||
| """Connect to the Asynchronous BlobServiceClient. | ||
|
|
||
| Tries connection string first, then credential and finally account key | ||
| """ | ||
| # Shortcut for connection string | ||
| if self.connection_string is not None: | ||
| logger.info("Connect using connection string") | ||
| return AIOBlobServiceClient.from_connection_string( | ||
| conn_str=self.connection_string | ||
| ) | ||
|
|
||
| if self.account_name is not None: | ||
| kwargs = { | ||
| "account_url": self.account_url, | ||
| "credential": None, | ||
| "_location_mode": self.location_mode, | ||
| } | ||
| creds = ["credential", "sync_credential", "account_key"] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any ideas on why the file system's client and the file-object's client were out of sync in terms of credential resolution here in the first place? I need to go through the git history on how these became out of sync, but I'd probably lean toward even removing
I'll look more into this and circle back on what we do here.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this is the only place that I also think that this method should use exactly the same logic as do_connect. Otherwise it will only cause confusion. To me, it looks like a copy-and-paste that has not been properly updated.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I'm fairly sure sync_credentials can be removed: #551
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's also the question of why |
||
| for name in creds: | ||
| if (cred := getattr(self, name, None)) is not None: | ||
| logger.info("Connect using %s", name.replace("_", " ")) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For these logging statements, let's set the level to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
| kwargs["credential"] = cred | ||
| break | ||
| else: | ||
| if self.sas_token is not None: | ||
| logger.info("Connect using SAS token") | ||
| if not self.sas_token.startswith("?"): | ||
| self.sas_token = f"?{self.sas_token}" | ||
|
Comment on lines
+518
to
+519
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be interesting if we could hoist this logic to the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree this shouldn't be mutating The URL can be reconstructed with |
||
| kwargs["account_url"] = f'{kwargs["account_url"]}{self.sas_token}' | ||
| else: | ||
| logger.info("Connect using anonymous login") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the previous logic, the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
| # Fall back to anonymous login, and assume public container | ||
|
|
||
| return AIOBlobServiceClient(**kwargs) | ||
|
|
||
| raise ValueError( | ||
| "Must provide either a connection_string or account_name with credentials!!" | ||
| ) | ||
|
|
||
| def split_path( | ||
| self, path, delimiter="/", return_container: bool = False, **kwargs | ||
| ) -> Tuple[str, str, Optional[str]]: | ||
|
|
@@ -2030,43 +2037,16 @@ def close(self): | |
|
|
||
| def connect_client(self): | ||
| """Connect to the Asynchronous BlobServiceClient, using user-specified connection details. | ||
| Tries credentials first, then connection string and finally account key | ||
| Tries connection string first, then credential and finally account key | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError if none of the connection details are available | ||
| """ | ||
| try: | ||
| if hasattr(self.fs, "account_host"): | ||
| self.fs.account_url: str = f"https://{self.fs.account_host}" | ||
| else: | ||
| self.fs.account_url: str = ( | ||
| f"https://{self.fs.account_name}.blob.core.windows.net" | ||
| ) | ||
|
|
||
| creds = [self.fs.sync_credential, self.fs.account_key, self.fs.credential] | ||
| if any(creds): | ||
| self.container_client = [ | ||
| AIOBlobServiceClient( | ||
| account_url=self.fs.account_url, | ||
| credential=cred, | ||
| _location_mode=self.fs.location_mode, | ||
| ).get_container_client(self.container_name) | ||
| for cred in creds | ||
| if cred is not None | ||
| ][0] | ||
| elif self.fs.connection_string is not None: | ||
| self.container_client = AIOBlobServiceClient.from_connection_string( | ||
| conn_str=self.fs.connection_string | ||
| ).get_container_client(self.container_name) | ||
| elif self.fs.sas_token is not None: | ||
| self.container_client = AIOBlobServiceClient( | ||
| account_url=self.fs.account_url + self.fs.sas_token, credential=None | ||
| ).get_container_client(self.container_name) | ||
| else: | ||
| self.container_client = AIOBlobServiceClient( | ||
| account_url=self.fs.account_url | ||
| ).get_container_client(self.container_name) | ||
| self.container_client = self.fs._get_service_client().get_container_client( | ||
| self.container_name | ||
| ) | ||
|
|
||
| except Exception as e: | ||
| raise ValueError( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically, to keep the existing behavior, we will need to check whether the connection string is set and if so return
None. We should add that since if aconnection_stringis provided, anaccount_namewould not be and we could end up encodingNoneinto the account url.While it is possible to piece together an account url from a connection string, that would be quite a bit of logic for the purpose of this PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll update it to return None