mirror of https://github.com/databricks/cli.git
Update
This commit is contained in:
parent
36c08d3cc5
commit
c3c4dcd894
|
@ -1,30 +0,0 @@
|
|||
|
||||
>>> $CLI bundle init default-python --config-file ./input.json --output-dir output
|
||||
|
||||
Welcome to the default Python template for Databricks Asset Bundles!
|
||||
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): http://$DATABRICKS_HOST
|
||||
|
||||
✨ Your new project has been created in the 'my_default_python' directory!
|
||||
|
||||
Please refer to the README.md file for "getting started" instructions.
|
||||
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
|
||||
|
||||
>>> $CLI bundle validate -t dev
|
||||
Name: my_default_python
|
||||
Target: dev
|
||||
Workspace:
|
||||
Host: http://$DATABRICKS_HOST
|
||||
User: $USERNAME
|
||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev
|
||||
|
||||
Validation OK!
|
||||
|
||||
>>> $CLI bundle validate -t prod
|
||||
Name: my_default_python
|
||||
Target: prod
|
||||
Workspace:
|
||||
Host: http://$DATABRICKS_HOST
|
||||
User: $USERNAME
|
||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod
|
||||
|
||||
Validation OK!
|
|
@ -30,3 +30,12 @@ Workspace:
|
|||
Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod
|
||||
|
||||
Validation OK!
|
||||
|
||||
>>> ruff format --diff
|
||||
warning: No Python files found under the given path(s)
|
||||
|
||||
Exit code: 0
|
||||
|
||||
>>> ruff clean
|
||||
|
||||
Exit code: 0
|
|
@ -47,7 +47,7 @@ and deployment to production (using Databricks Asset Bundles).
|
|||
(see https://docs.databricks.com/dev-tools/auth/pat.html).
|
||||
You can use OAuth as an alternative, but this currently requires manual configuration.
|
||||
See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md
|
||||
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605
|
||||
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/<NUMID>
|
||||
for advice on setting up OAuth for Azure Databricks.
|
||||
|
||||
To setup up additional profiles, such as a 'prod' profile,
|
|
@ -3,7 +3,7 @@
|
|||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||
bundle:
|
||||
name: my_dbt_sql
|
||||
uuid: 9d4ad8f6-850c-45fb-b51a-04fa314564de
|
||||
uuid: <UUID>
|
||||
|
||||
include:
|
||||
- resources/*.yml
|
|
@ -2,7 +2,7 @@
|
|||
-- This model file defines a materialized view called 'orders_daily'
|
||||
--
|
||||
-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561.
|
||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/<NUMID>.
|
||||
{{ config(materialized = 'materialized_view') }}
|
||||
|
||||
select order_date, count(*) AS number_of_orders
|
||||
|
@ -11,7 +11,7 @@ from {{ ref('orders_raw') }}
|
|||
|
||||
-- During development, only process a smaller range of data
|
||||
{% if target.name != 'prod' %}
|
||||
where order_date >= '2019-08-01' and order_date < '2019-09-01'
|
||||
where order_date >= '<NUMID>-08-01' and order_date < '<NUMID>-09-01'
|
||||
{% endif %}
|
||||
|
||||
group by order_date
|
|
@ -2,7 +2,7 @@
|
|||
--
|
||||
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
|
||||
-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561.
|
||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/<NUMID>.
|
||||
{{ config(materialized = 'streaming_table') }}
|
||||
|
||||
select
|
|
@ -0,0 +1,157 @@
|
|||
|
||||
>>> $CLI bundle init default-python --config-file ./input.json --output-dir output
|
||||
|
||||
Welcome to the default Python template for Databricks Asset Bundles!
|
||||
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): http://$DATABRICKS_HOST
|
||||
|
||||
✨ Your new project has been created in the 'my_default_python' directory!
|
||||
|
||||
Please refer to the README.md file for "getting started" instructions.
|
||||
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
|
||||
|
||||
>>> $CLI bundle validate -t dev
|
||||
Name: my_default_python
|
||||
Target: dev
|
||||
Workspace:
|
||||
Host: http://$DATABRICKS_HOST
|
||||
User: $USERNAME
|
||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev
|
||||
|
||||
Validation OK!
|
||||
|
||||
>>> $CLI bundle validate -t prod
|
||||
Name: my_default_python
|
||||
Target: prod
|
||||
Workspace:
|
||||
Host: http://$DATABRICKS_HOST
|
||||
User: $USERNAME
|
||||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod
|
||||
|
||||
Validation OK!
|
||||
|
||||
>>> ruff format --diff
|
||||
--- scratch/exploration.ipynb:cell 1
|
||||
+++ scratch/exploration.ipynb:cell 1
|
||||
--- scratch/exploration.ipynb:cell 2
|
||||
+++ scratch/exploration.ipynb:cell 2
|
||||
@@ -1,5 +1,6 @@
|
||||
import sys
|
||||
-sys.path.append('../src')
|
||||
+
|
||||
+sys.path.append("../src")
|
||||
from my_default_python import main
|
||||
|
||||
main.get_taxis(spark).show(10)
|
||||
|
||||
--- setup.py
|
||||
+++ setup.py
|
||||
@@ -5,11 +5,13 @@
|
||||
be executed directly. See README.md for how to deploy, test, and run
|
||||
the my_default_python project.
|
||||
"""
|
||||
+
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
import sys
|
||||
-sys.path.append('./src')
|
||||
|
||||
+sys.path.append("./src")
|
||||
+
|
||||
import datetime
|
||||
import my_default_python
|
||||
|
||||
@@ -17,17 +19,15 @@
|
||||
name="my_default_python",
|
||||
# We use timestamp as Local version identifier (https://peps.python.org/pep-<NUMID>/#local-version-identifiers.)
|
||||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
||||
- version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
||||
+ version=my_default_python.__version__
|
||||
+ + "+"
|
||||
+ + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
||||
url="https://databricks.com",
|
||||
author="$USERNAME",
|
||||
description="wheel file based on my_default_python/src",
|
||||
- packages=find_packages(where='./src'),
|
||||
- package_dir={'': 'src'},
|
||||
- entry_points={
|
||||
- "packages": [
|
||||
- "main=my_default_python.main:main"
|
||||
- ]
|
||||
- },
|
||||
+ packages=find_packages(where="./src"),
|
||||
+ package_dir={"": "src"},
|
||||
+ entry_points={"packages": ["main=my_default_python.main:main"]},
|
||||
install_requires=[
|
||||
# Dependencies in case the output wheel file is used as a library dependency.
|
||||
# For defining dependencies, when this package is used in Databricks, see:
|
||||
|
||||
--- src/dlt_pipeline.ipynb:cell 2
|
||||
+++ src/dlt_pipeline.ipynb:cell 2
|
||||
@@ -1,6 +1,7 @@
|
||||
# Import DLT and src/my_default_python
|
||||
import dlt
|
||||
import sys
|
||||
+
|
||||
sys.path.append(spark.conf.get("bundle.sourcePath", "."))
|
||||
from pyspark.sql.functions import expr
|
||||
from my_default_python import main
|
||||
--- src/dlt_pipeline.ipynb:cell 3
|
||||
+++ src/dlt_pipeline.ipynb:cell 3
|
||||
@@ -1,7 +1,8 @@
|
||||
@dlt.view
|
||||
def taxi_raw():
|
||||
- return main.get_taxis(spark)
|
||||
+ return main.get_taxis(spark)
|
||||
+
|
||||
|
||||
@dlt.table
|
||||
def filtered_taxis():
|
||||
- return dlt.read("taxi_raw").filter(expr("fare_amount < 30"))
|
||||
+ return dlt.read("taxi_raw").filter(expr("fare_amount < 30"))
|
||||
|
||||
--- src/my_default_python/main.py
|
||||
+++ src/my_default_python/main.py
|
||||
@@ -1,21 +1,25 @@
|
||||
from pyspark.sql import SparkSession, DataFrame
|
||||
|
||||
+
|
||||
def get_taxis(spark: SparkSession) -> DataFrame:
|
||||
- return spark.read.table("samples.nyctaxi.trips")
|
||||
+ return spark.read.table("samples.nyctaxi.trips")
|
||||
|
||||
|
||||
# Create a new Databricks Connect session. If this fails,
|
||||
# check that you have configured Databricks Connect correctly.
|
||||
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
||||
def get_spark() -> SparkSession:
|
||||
- try:
|
||||
- from databricks.connect import DatabricksSession
|
||||
- return DatabricksSession.builder.getOrCreate()
|
||||
- except ImportError:
|
||||
- return SparkSession.builder.getOrCreate()
|
||||
+ try:
|
||||
+ from databricks.connect import DatabricksSession
|
||||
+
|
||||
+ return DatabricksSession.builder.getOrCreate()
|
||||
+ except ImportError:
|
||||
+ return SparkSession.builder.getOrCreate()
|
||||
+
|
||||
|
||||
def main():
|
||||
- get_taxis(get_spark()).show(5)
|
||||
+ get_taxis(get_spark()).show(5)
|
||||
+
|
||||
|
||||
-if __name__ == '__main__':
|
||||
- main()
|
||||
+if __name__ == "__main__":
|
||||
+ main()
|
||||
|
||||
4 files would be reformatted, 3 files already formatted
|
||||
|
||||
Exit code: 1
|
||||
|
||||
>>> ruff clean
|
||||
Removing cache at: .ruff_cache
|
||||
|
||||
Exit code: 0
|
|
@ -2,7 +2,7 @@
|
|||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||
bundle:
|
||||
name: my_default_python
|
||||
uuid: 3c5cdb6b-9e42-46f3-a33c-54769acda6bf
|
||||
uuid: <UUID>
|
||||
|
||||
include:
|
||||
- resources/*.yml
|
|
@ -15,7 +15,7 @@ import my_default_python
|
|||
|
||||
setup(
|
||||
name="my_default_python",
|
||||
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
|
||||
# We use timestamp as Local version identifier (https://peps.python.org/pep-<NUMID>/#local-version-identifiers.)
|
||||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
||||
version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
||||
url="https://databricks.com",
|
|
@ -6,7 +6,7 @@
|
|||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {},
|
||||
"inputWidgets": {},
|
||||
"nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec",
|
||||
"nuid": "<UUID>",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
||||
|
@ -24,7 +24,7 @@
|
|||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {},
|
||||
"inputWidgets": {},
|
||||
"nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f",
|
||||
"nuid": "<UUID>",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
||||
|
@ -46,7 +46,7 @@
|
|||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {},
|
||||
"inputWidgets": {},
|
||||
"nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14",
|
||||
"nuid": "<UUID>",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {},
|
||||
"inputWidgets": {},
|
||||
"nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
|
||||
"nuid": "<UUID>",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
||||
|
@ -33,11 +33,11 @@
|
|||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {
|
||||
"byteLimit": 2048000,
|
||||
"rowLimit": 10000
|
||||
"byteLimit": <NUMID>,
|
||||
"rowLimit": <NUMID>
|
||||
},
|
||||
"inputWidgets": {},
|
||||
"nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
|
||||
"nuid": "<UUID>",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
|
@ -30,3 +30,13 @@ Workspace:
|
|||
Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/prod
|
||||
|
||||
Validation OK!
|
||||
|
||||
>>> ruff format --diff
|
||||
error: Failed to parse scratch/exploration.ipynb:1:2:15: Simple statements must be separated by newlines or semicolons
|
||||
|
||||
Exit code: 2
|
||||
|
||||
>>> ruff clean
|
||||
Removing cache at: .ruff_cache
|
||||
|
||||
Exit code: 0
|
|
@ -2,7 +2,7 @@
|
|||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||
bundle:
|
||||
name: my_default_sql
|
||||
uuid: 631398bf-1d77-42ce-ba4f-9bb29dd64b5a
|
||||
uuid: <UUID>
|
||||
|
||||
include:
|
||||
- resources/*.yml
|
|
@ -7,7 +7,7 @@
|
|||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {},
|
||||
"inputWidgets": {},
|
||||
"nuid": "dc8c630c-1ea0-42e4-873f-e4dec4d3d416",
|
||||
"nuid": "<UUID>",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
|
@ -15,7 +15,7 @@ WHERE if(
|
|||
true,
|
||||
|
||||
-- During development, only process a smaller range of data
|
||||
order_date >= '2019-08-01' AND order_date < '2019-09-01'
|
||||
order_date >= '<NUMID>-08-01' AND order_date < '<NUMID>-09-01'
|
||||
)
|
||||
|
||||
GROUP BY order_date
|
Loading…
Reference in New Issue