mirror of https://github.com/databricks/cli.git
Update
This commit is contained in:
parent
36c08d3cc5
commit
c3c4dcd894
|
@ -1,30 +0,0 @@
|
||||||
|
|
||||||
>>> $CLI bundle init default-python --config-file ./input.json --output-dir output
|
|
||||||
|
|
||||||
Welcome to the default Python template for Databricks Asset Bundles!
|
|
||||||
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): http://$DATABRICKS_HOST
|
|
||||||
|
|
||||||
✨ Your new project has been created in the 'my_default_python' directory!
|
|
||||||
|
|
||||||
Please refer to the README.md file for "getting started" instructions.
|
|
||||||
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
|
|
||||||
|
|
||||||
>>> $CLI bundle validate -t dev
|
|
||||||
Name: my_default_python
|
|
||||||
Target: dev
|
|
||||||
Workspace:
|
|
||||||
Host: http://$DATABRICKS_HOST
|
|
||||||
User: $USERNAME
|
|
||||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev
|
|
||||||
|
|
||||||
Validation OK!
|
|
||||||
|
|
||||||
>>> $CLI bundle validate -t prod
|
|
||||||
Name: my_default_python
|
|
||||||
Target: prod
|
|
||||||
Workspace:
|
|
||||||
Host: http://$DATABRICKS_HOST
|
|
||||||
User: $USERNAME
|
|
||||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod
|
|
||||||
|
|
||||||
Validation OK!
|
|
|
@ -30,3 +30,12 @@ Workspace:
|
||||||
Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod
|
Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod
|
||||||
|
|
||||||
Validation OK!
|
Validation OK!
|
||||||
|
|
||||||
|
>>> ruff format --diff
|
||||||
|
warning: No Python files found under the given path(s)
|
||||||
|
|
||||||
|
Exit code: 0
|
||||||
|
|
||||||
|
>>> ruff clean
|
||||||
|
|
||||||
|
Exit code: 0
|
|
@ -47,7 +47,7 @@ and deployment to production (using Databricks Asset Bundles).
|
||||||
(see https://docs.databricks.com/dev-tools/auth/pat.html).
|
(see https://docs.databricks.com/dev-tools/auth/pat.html).
|
||||||
You can use OAuth as an alternative, but this currently requires manual configuration.
|
You can use OAuth as an alternative, but this currently requires manual configuration.
|
||||||
See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md
|
See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md
|
||||||
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605
|
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/<NUMID>
|
||||||
for advice on setting up OAuth for Azure Databricks.
|
for advice on setting up OAuth for Azure Databricks.
|
||||||
|
|
||||||
To setup up additional profiles, such as a 'prod' profile,
|
To setup up additional profiles, such as a 'prod' profile,
|
|
@ -3,7 +3,7 @@
|
||||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
bundle:
|
bundle:
|
||||||
name: my_dbt_sql
|
name: my_dbt_sql
|
||||||
uuid: 9d4ad8f6-850c-45fb-b51a-04fa314564de
|
uuid: <UUID>
|
||||||
|
|
||||||
include:
|
include:
|
||||||
- resources/*.yml
|
- resources/*.yml
|
|
@ -2,7 +2,7 @@
|
||||||
-- This model file defines a materialized view called 'orders_daily'
|
-- This model file defines a materialized view called 'orders_daily'
|
||||||
--
|
--
|
||||||
-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
||||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561.
|
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/<NUMID>.
|
||||||
{{ config(materialized = 'materialized_view') }}
|
{{ config(materialized = 'materialized_view') }}
|
||||||
|
|
||||||
select order_date, count(*) AS number_of_orders
|
select order_date, count(*) AS number_of_orders
|
||||||
|
@ -11,7 +11,7 @@ from {{ ref('orders_raw') }}
|
||||||
|
|
||||||
-- During development, only process a smaller range of data
|
-- During development, only process a smaller range of data
|
||||||
{% if target.name != 'prod' %}
|
{% if target.name != 'prod' %}
|
||||||
where order_date >= '2019-08-01' and order_date < '2019-09-01'
|
where order_date >= '<NUMID>-08-01' and order_date < '<NUMID>-09-01'
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
group by order_date
|
group by order_date
|
|
@ -2,7 +2,7 @@
|
||||||
--
|
--
|
||||||
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
|
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
|
||||||
-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
||||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561.
|
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/<NUMID>.
|
||||||
{{ config(materialized = 'streaming_table') }}
|
{{ config(materialized = 'streaming_table') }}
|
||||||
|
|
||||||
select
|
select
|
|
@ -0,0 +1,157 @@
|
||||||
|
|
||||||
|
>>> $CLI bundle init default-python --config-file ./input.json --output-dir output
|
||||||
|
|
||||||
|
Welcome to the default Python template for Databricks Asset Bundles!
|
||||||
|
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): http://$DATABRICKS_HOST
|
||||||
|
|
||||||
|
✨ Your new project has been created in the 'my_default_python' directory!
|
||||||
|
|
||||||
|
Please refer to the README.md file for "getting started" instructions.
|
||||||
|
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
|
||||||
|
|
||||||
|
>>> $CLI bundle validate -t dev
|
||||||
|
Name: my_default_python
|
||||||
|
Target: dev
|
||||||
|
Workspace:
|
||||||
|
Host: http://$DATABRICKS_HOST
|
||||||
|
User: $USERNAME
|
||||||
|
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev
|
||||||
|
|
||||||
|
Validation OK!
|
||||||
|
|
||||||
|
>>> $CLI bundle validate -t prod
|
||||||
|
Name: my_default_python
|
||||||
|
Target: prod
|
||||||
|
Workspace:
|
||||||
|
Host: http://$DATABRICKS_HOST
|
||||||
|
User: $USERNAME
|
||||||
|
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod
|
||||||
|
|
||||||
|
Validation OK!
|
||||||
|
|
||||||
|
>>> ruff format --diff
|
||||||
|
--- scratch/exploration.ipynb:cell 1
|
||||||
|
+++ scratch/exploration.ipynb:cell 1
|
||||||
|
--- scratch/exploration.ipynb:cell 2
|
||||||
|
+++ scratch/exploration.ipynb:cell 2
|
||||||
|
@@ -1,5 +1,6 @@
|
||||||
|
import sys
|
||||||
|
-sys.path.append('../src')
|
||||||
|
+
|
||||||
|
+sys.path.append("../src")
|
||||||
|
from my_default_python import main
|
||||||
|
|
||||||
|
main.get_taxis(spark).show(10)
|
||||||
|
|
||||||
|
--- setup.py
|
||||||
|
+++ setup.py
|
||||||
|
@@ -5,11 +5,13 @@
|
||||||
|
be executed directly. See README.md for how to deploy, test, and run
|
||||||
|
the my_default_python project.
|
||||||
|
"""
|
||||||
|
+
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import sys
|
||||||
|
-sys.path.append('./src')
|
||||||
|
|
||||||
|
+sys.path.append("./src")
|
||||||
|
+
|
||||||
|
import datetime
|
||||||
|
import my_default_python
|
||||||
|
|
||||||
|
@@ -17,17 +19,15 @@
|
||||||
|
name="my_default_python",
|
||||||
|
# We use timestamp as Local version identifier (https://peps.python.org/pep-<NUMID>/#local-version-identifiers.)
|
||||||
|
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
||||||
|
- version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
||||||
|
+ version=my_default_python.__version__
|
||||||
|
+ + "+"
|
||||||
|
+ + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
||||||
|
url="https://databricks.com",
|
||||||
|
author="$USERNAME",
|
||||||
|
description="wheel file based on my_default_python/src",
|
||||||
|
- packages=find_packages(where='./src'),
|
||||||
|
- package_dir={'': 'src'},
|
||||||
|
- entry_points={
|
||||||
|
- "packages": [
|
||||||
|
- "main=my_default_python.main:main"
|
||||||
|
- ]
|
||||||
|
- },
|
||||||
|
+ packages=find_packages(where="./src"),
|
||||||
|
+ package_dir={"": "src"},
|
||||||
|
+ entry_points={"packages": ["main=my_default_python.main:main"]},
|
||||||
|
install_requires=[
|
||||||
|
# Dependencies in case the output wheel file is used as a library dependency.
|
||||||
|
# For defining dependencies, when this package is used in Databricks, see:
|
||||||
|
|
||||||
|
--- src/dlt_pipeline.ipynb:cell 2
|
||||||
|
+++ src/dlt_pipeline.ipynb:cell 2
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
# Import DLT and src/my_default_python
|
||||||
|
import dlt
|
||||||
|
import sys
|
||||||
|
+
|
||||||
|
sys.path.append(spark.conf.get("bundle.sourcePath", "."))
|
||||||
|
from pyspark.sql.functions import expr
|
||||||
|
from my_default_python import main
|
||||||
|
--- src/dlt_pipeline.ipynb:cell 3
|
||||||
|
+++ src/dlt_pipeline.ipynb:cell 3
|
||||||
|
@@ -1,7 +1,8 @@
|
||||||
|
@dlt.view
|
||||||
|
def taxi_raw():
|
||||||
|
- return main.get_taxis(spark)
|
||||||
|
+ return main.get_taxis(spark)
|
||||||
|
+
|
||||||
|
|
||||||
|
@dlt.table
|
||||||
|
def filtered_taxis():
|
||||||
|
- return dlt.read("taxi_raw").filter(expr("fare_amount < 30"))
|
||||||
|
+ return dlt.read("taxi_raw").filter(expr("fare_amount < 30"))
|
||||||
|
|
||||||
|
--- src/my_default_python/main.py
|
||||||
|
+++ src/my_default_python/main.py
|
||||||
|
@@ -1,21 +1,25 @@
|
||||||
|
from pyspark.sql import SparkSession, DataFrame
|
||||||
|
|
||||||
|
+
|
||||||
|
def get_taxis(spark: SparkSession) -> DataFrame:
|
||||||
|
- return spark.read.table("samples.nyctaxi.trips")
|
||||||
|
+ return spark.read.table("samples.nyctaxi.trips")
|
||||||
|
|
||||||
|
|
||||||
|
# Create a new Databricks Connect session. If this fails,
|
||||||
|
# check that you have configured Databricks Connect correctly.
|
||||||
|
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
||||||
|
def get_spark() -> SparkSession:
|
||||||
|
- try:
|
||||||
|
- from databricks.connect import DatabricksSession
|
||||||
|
- return DatabricksSession.builder.getOrCreate()
|
||||||
|
- except ImportError:
|
||||||
|
- return SparkSession.builder.getOrCreate()
|
||||||
|
+ try:
|
||||||
|
+ from databricks.connect import DatabricksSession
|
||||||
|
+
|
||||||
|
+ return DatabricksSession.builder.getOrCreate()
|
||||||
|
+ except ImportError:
|
||||||
|
+ return SparkSession.builder.getOrCreate()
|
||||||
|
+
|
||||||
|
|
||||||
|
def main():
|
||||||
|
- get_taxis(get_spark()).show(5)
|
||||||
|
+ get_taxis(get_spark()).show(5)
|
||||||
|
+
|
||||||
|
|
||||||
|
-if __name__ == '__main__':
|
||||||
|
- main()
|
||||||
|
+if __name__ == "__main__":
|
||||||
|
+ main()
|
||||||
|
|
||||||
|
4 files would be reformatted, 3 files already formatted
|
||||||
|
|
||||||
|
Exit code: 1
|
||||||
|
|
||||||
|
>>> ruff clean
|
||||||
|
Removing cache at: .ruff_cache
|
||||||
|
|
||||||
|
Exit code: 0
|
|
@ -2,7 +2,7 @@
|
||||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
bundle:
|
bundle:
|
||||||
name: my_default_python
|
name: my_default_python
|
||||||
uuid: 3c5cdb6b-9e42-46f3-a33c-54769acda6bf
|
uuid: <UUID>
|
||||||
|
|
||||||
include:
|
include:
|
||||||
- resources/*.yml
|
- resources/*.yml
|
|
@ -15,7 +15,7 @@ import my_default_python
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="my_default_python",
|
name="my_default_python",
|
||||||
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
|
# We use timestamp as Local version identifier (https://peps.python.org/pep-<NUMID>/#local-version-identifiers.)
|
||||||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
||||||
version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
||||||
url="https://databricks.com",
|
url="https://databricks.com",
|
|
@ -6,7 +6,7 @@
|
||||||
"application/vnd.databricks.v1+cell": {
|
"application/vnd.databricks.v1+cell": {
|
||||||
"cellMetadata": {},
|
"cellMetadata": {},
|
||||||
"inputWidgets": {},
|
"inputWidgets": {},
|
||||||
"nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec",
|
"nuid": "<UUID>",
|
||||||
"showTitle": false,
|
"showTitle": false,
|
||||||
"title": ""
|
"title": ""
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@
|
||||||
"application/vnd.databricks.v1+cell": {
|
"application/vnd.databricks.v1+cell": {
|
||||||
"cellMetadata": {},
|
"cellMetadata": {},
|
||||||
"inputWidgets": {},
|
"inputWidgets": {},
|
||||||
"nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f",
|
"nuid": "<UUID>",
|
||||||
"showTitle": false,
|
"showTitle": false,
|
||||||
"title": ""
|
"title": ""
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@
|
||||||
"application/vnd.databricks.v1+cell": {
|
"application/vnd.databricks.v1+cell": {
|
||||||
"cellMetadata": {},
|
"cellMetadata": {},
|
||||||
"inputWidgets": {},
|
"inputWidgets": {},
|
||||||
"nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14",
|
"nuid": "<UUID>",
|
||||||
"showTitle": false,
|
"showTitle": false,
|
||||||
"title": ""
|
"title": ""
|
||||||
}
|
}
|
|
@ -6,7 +6,7 @@
|
||||||
"application/vnd.databricks.v1+cell": {
|
"application/vnd.databricks.v1+cell": {
|
||||||
"cellMetadata": {},
|
"cellMetadata": {},
|
||||||
"inputWidgets": {},
|
"inputWidgets": {},
|
||||||
"nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
|
"nuid": "<UUID>",
|
||||||
"showTitle": false,
|
"showTitle": false,
|
||||||
"title": ""
|
"title": ""
|
||||||
}
|
}
|
||||||
|
@ -33,11 +33,11 @@
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"application/vnd.databricks.v1+cell": {
|
"application/vnd.databricks.v1+cell": {
|
||||||
"cellMetadata": {
|
"cellMetadata": {
|
||||||
"byteLimit": 2048000,
|
"byteLimit": <NUMID>,
|
||||||
"rowLimit": 10000
|
"rowLimit": <NUMID>
|
||||||
},
|
},
|
||||||
"inputWidgets": {},
|
"inputWidgets": {},
|
||||||
"nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
|
"nuid": "<UUID>",
|
||||||
"showTitle": false,
|
"showTitle": false,
|
||||||
"title": ""
|
"title": ""
|
||||||
}
|
}
|
|
@ -30,3 +30,13 @@ Workspace:
|
||||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/prod
|
Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/prod
|
||||||
|
|
||||||
Validation OK!
|
Validation OK!
|
||||||
|
|
||||||
|
>>> ruff format --diff
|
||||||
|
error: Failed to parse scratch/exploration.ipynb:1:2:15: Simple statements must be separated by newlines or semicolons
|
||||||
|
|
||||||
|
Exit code: 2
|
||||||
|
|
||||||
|
>>> ruff clean
|
||||||
|
Removing cache at: .ruff_cache
|
||||||
|
|
||||||
|
Exit code: 0
|
|
@ -2,7 +2,7 @@
|
||||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
bundle:
|
bundle:
|
||||||
name: my_default_sql
|
name: my_default_sql
|
||||||
uuid: 631398bf-1d77-42ce-ba4f-9bb29dd64b5a
|
uuid: <UUID>
|
||||||
|
|
||||||
include:
|
include:
|
||||||
- resources/*.yml
|
- resources/*.yml
|
|
@ -7,7 +7,7 @@
|
||||||
"application/vnd.databricks.v1+cell": {
|
"application/vnd.databricks.v1+cell": {
|
||||||
"cellMetadata": {},
|
"cellMetadata": {},
|
||||||
"inputWidgets": {},
|
"inputWidgets": {},
|
||||||
"nuid": "dc8c630c-1ea0-42e4-873f-e4dec4d3d416",
|
"nuid": "<UUID>",
|
||||||
"showTitle": false,
|
"showTitle": false,
|
||||||
"title": ""
|
"title": ""
|
||||||
}
|
}
|
|
@ -15,7 +15,7 @@ WHERE if(
|
||||||
true,
|
true,
|
||||||
|
|
||||||
-- During development, only process a smaller range of data
|
-- During development, only process a smaller range of data
|
||||||
order_date >= '2019-08-01' AND order_date < '2019-09-01'
|
order_date >= '<NUMID>-08-01' AND order_date < '<NUMID>-09-01'
|
||||||
)
|
)
|
||||||
|
|
||||||
GROUP BY order_date
|
GROUP BY order_date
|
Loading…
Reference in New Issue