Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions ibis/backends/trino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from functools import cached_property
from operator import itemgetter
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus, urlparse
from urllib.parse import unquote_plus

import sqlglot as sg
import sqlglot.expressions as sge
import trino
from trino.auth import BasicAuthentication
from trino.auth import Authentication, BasicAuthentication

import ibis
import ibis.backends.sql.compilers as sc
Expand Down Expand Up @@ -60,7 +60,10 @@ def _from_url(self, url: ParseResult, **kwarg_overrides):
if url.password:
kwargs["auth"] = unquote_plus(url.password)
if url.hostname:
kwargs["host"] = url.hostname
# Do NOT convert to url.hostname, the trino client expects an entire URL
# to do inference on http vs https, port, etc.
# https://github.com/trinodb/trino-python-client/blob/2108c38dea79518ffb74370177df2dc95f1e6d96/trino/dbapi.py#L169
kwargs["host"] = url
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this will work. I believe the url passed in here is expected to have the form trino://<username>:<password>@<hostname>:<port>/<database> if it's passed from ibis.connect. Most likely the scheme would be trino rather than http or https.

ibis/ibis/backends/__init__.py

Lines 1675 to 1775 in 7cc3686

def connect(resource: Path | str, /, **kwargs: Any) -> BaseBackend:
"""Connect to `resource`, inferring the backend automatically.
The general pattern for `ibis.connect` is
```python
con = ibis.connect("backend://connection-parameters")
```
With many backends that looks like
```python
con = ibis.connect("backend://user:password@host:port/database")
```
See the connection syntax for each backend for details about URL connection
requirements.
Parameters
----------
resource
A URL or path to the resource to be connected to.
kwargs
Backend specific keyword arguments
Examples
--------
Connect to an in-memory DuckDB database:
>>> import ibis
>>> con = ibis.connect("duckdb://")
Connect to an on-disk SQLite database:
>>> con = ibis.connect("sqlite://relative.db")
>>> con = ibis.connect(
... "sqlite:///absolute/path/to/data.db"
... ) # quartodoc: +SKIP # doctest: +SKIP
Connect to a PostgreSQL server:
>>> con = ibis.connect(
... "postgres://user:password@hostname:5432"
... ) # quartodoc: +SKIP # doctest: +SKIP
Connect to BigQuery:
>>> con = ibis.connect(
... "bigquery://my-project/my-dataset"
... ) # quartodoc: +SKIP # doctest: +SKIP
"""
url = resource = str(resource)
if re.match("[A-Za-z]:", url):
# windows path with drive, treat it as a file
url = f"file://{url}"
parsed = urllib.parse.urlparse(url)
scheme = parsed.scheme or "file"
orig_kwargs = kwargs.copy()
kwargs = dict(urllib.parse.parse_qsl(parsed.query))
# convert single parameter lists value to single values
for name, value in kwargs.items():
if len(value) == 1:
kwargs[name] = value[0]
# Merge explicit kwargs with query string, explicit kwargs
# taking precedence
kwargs.update(orig_kwargs)
if scheme == "file":
path = parsed.netloc + parsed.path
if path.endswith(".duckdb"):
return ibis.duckdb.connect(path, **kwargs)
elif path.endswith((".sqlite", ".db")):
return ibis.sqlite.connect(path, **kwargs)
elif path.endswith((".csv", ".csv.gz")):
# Load csv/csv.gz files with duckdb by default
con = ibis.duckdb.connect(**kwargs)
con.read_csv(path)
return con
elif path.endswith(".parquet"):
# Load parquet files with duckdb by default
con = ibis.duckdb.connect(**kwargs)
con.read_parquet(path)
return con
else:
raise ValueError(f"Don't know how to connect to {resource!r}")
# Treat `postgres://` and `postgresql://` the same
scheme = scheme.replace("postgresql", "postgres")
try:
backend = getattr(ibis, scheme)
except AttributeError:
raise ValueError(f"Don't know how to connect to {resource!r}") from None
return backend._from_url(parsed, **kwargs)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that may (I'm not totally sure, what about ibis.trino.connect("https://...")?) be true for the scheme, but the trino library also uses the port and other parts of the URL, so we still want to keep the url as-is.

What do you think the solution should be then?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or at the least, if you can write out the expected behavior in terms of test cases, then that will help.

if database:
kwargs["database"] = database
if url.port:
Expand Down Expand Up @@ -245,11 +248,12 @@ def list_tables(
def do_connect(
self,
user: str = "user",
auth: str | None = None,
auth: str | Authentication | None = None,
host: str = "localhost",
port: int = 8080,
database: str | None = None,
schema: str | None = None,
*,
source: str | None = None,
timezone: str = "UTC",
**kwargs,
Expand All @@ -261,7 +265,7 @@ def do_connect(
user
Username to connect with
auth
Authentication method or password to use for the connection.
Password or authentication method to use for the connection.
host
Hostname of the Trino server
port
Expand Down Expand Up @@ -296,11 +300,7 @@ def do_connect(
>>> con = ibis.trino.connect(database=catalog, schema=schema)
>>> con = ibis.trino.connect(database=catalog, schema=schema, source="my-app")
"""
if (
isinstance(auth, str)
and (scheme := urlparse(host).scheme)
and scheme != "http"
):
if isinstance(auth, str):
auth = BasicAuthentication(user, auth)

self.con = trino.dbapi.connect(
Expand Down
31 changes: 31 additions & 0 deletions ibis/backends/trino/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,37 @@ def test_list_databases(con):
assert {"information_schema", "sf1"}.issubset(con.list_databases(catalog="tpch"))


@pytest.mark.parametrize("auth_type", ["str", "obj"])
@pytest.mark.parametrize(("http_scheme"), ["http", "https", None])
def test_con_auth_str(http_scheme, auth_type):
"""If a non-str object is given to auth, it should be used as is, no matter what.
If a str is given to auth, it should be converted to a BasicAuthentication.
This happens no matter the http_scheme given.

Tests for
https://github.com/ibis-project/ibis/issues/9113
https://github.com/ibis-project/ibis/issues/9956
https://github.com/ibis-project/ibis/issues/11841
"""
from trino.auth import BasicAuthentication

if auth_type == "str":
auth = TRINO_PASS
else:
auth = BasicAuthentication(TRINO_USER, TRINO_PASS)

con = ibis.trino.connect(
user=TRINO_USER,
host=TRINO_HOST,
port=TRINO_PORT,
auth=auth,
database="hive",
schema="default",
http_scheme=http_scheme,
)
assert con.con.auth == BasicAuthentication(TRINO_USER, TRINO_PASS)


@pytest.mark.parametrize(("source", "expected"), [(None, "ibis"), ("foo", "foo")])
def test_con_source(source, expected):
con = ibis.trino.connect(
Expand Down
Loading