mirror of https://github.com/databricks/cli.git
Update outputs
This commit is contained in:
parent
6c3a1fb049
commit
658458f2df
|
@ -3,7 +3,7 @@
|
||||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
bundle:
|
bundle:
|
||||||
name: my_dbt_sql
|
name: my_dbt_sql
|
||||||
uuid: 77b4c5d0-0d0a-4e1a-a472-c08952b7f38e
|
uuid: 92ca153c-da4d-4bc3-aa88-1a468193ba6a
|
||||||
|
|
||||||
include:
|
include:
|
||||||
- resources/*.yml
|
- resources/*.yml
|
||||||
|
|
|
@ -30,126 +30,9 @@ Workspace:
|
||||||
Validation OK!
|
Validation OK!
|
||||||
|
|
||||||
>>> ruff format --diff
|
>>> ruff format --diff
|
||||||
--- scratch/exploration.ipynb:cell 1
|
7 files already formatted
|
||||||
+++ scratch/exploration.ipynb:cell 1
|
|
||||||
--- scratch/exploration.ipynb:cell 2
|
|
||||||
+++ scratch/exploration.ipynb:cell 2
|
|
||||||
@@ -1,5 +1,6 @@
|
|
||||||
import sys
|
|
||||||
-sys.path.append('../src')
|
|
||||||
+
|
|
||||||
+sys.path.append("../src")
|
|
||||||
from my_default_python import main
|
|
||||||
|
|
||||||
main.get_taxis(spark).show(10)
|
|
||||||
|
|
||||||
--- setup.py
|
Exit code: 0
|
||||||
+++ setup.py
|
|
||||||
@@ -5,11 +5,13 @@
|
|
||||||
be executed directly. See README.md for how to deploy, test, and run
|
|
||||||
the my_default_python project.
|
|
||||||
"""
|
|
||||||
+
|
|
||||||
from setuptools import setup, find_packages
|
|
||||||
|
|
||||||
import sys
|
|
||||||
-sys.path.append('./src')
|
|
||||||
|
|
||||||
+sys.path.append("./src")
|
|
||||||
+
|
|
||||||
import datetime
|
|
||||||
import my_default_python
|
|
||||||
|
|
||||||
@@ -17,17 +19,15 @@
|
|
||||||
name="my_default_python",
|
|
||||||
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
|
|
||||||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
|
||||||
- version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
|
||||||
+ version=my_default_python.__version__
|
|
||||||
+ + "+"
|
|
||||||
+ + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
|
||||||
url="https://databricks.com",
|
|
||||||
author="$USERNAME",
|
|
||||||
description="wheel file based on my_default_python/src",
|
|
||||||
- packages=find_packages(where='./src'),
|
|
||||||
- package_dir={'': 'src'},
|
|
||||||
- entry_points={
|
|
||||||
- "packages": [
|
|
||||||
- "main=my_default_python.main:main"
|
|
||||||
- ]
|
|
||||||
- },
|
|
||||||
+ packages=find_packages(where="./src"),
|
|
||||||
+ package_dir={"": "src"},
|
|
||||||
+ entry_points={"packages": ["main=my_default_python.main:main"]},
|
|
||||||
install_requires=[
|
|
||||||
# Dependencies in case the output wheel file is used as a library dependency.
|
|
||||||
# For defining dependencies, when this package is used in Databricks, see:
|
|
||||||
|
|
||||||
--- src/dlt_pipeline.ipynb:cell 2
|
|
||||||
+++ src/dlt_pipeline.ipynb:cell 2
|
|
||||||
@@ -1,6 +1,7 @@
|
|
||||||
# Import DLT and src/my_default_python
|
|
||||||
import dlt
|
|
||||||
import sys
|
|
||||||
+
|
|
||||||
sys.path.append(spark.conf.get("bundle.sourcePath", "."))
|
|
||||||
from pyspark.sql.functions import expr
|
|
||||||
from my_default_python import main
|
|
||||||
--- src/dlt_pipeline.ipynb:cell 3
|
|
||||||
+++ src/dlt_pipeline.ipynb:cell 3
|
|
||||||
@@ -1,7 +1,8 @@
|
|
||||||
@dlt.view
|
|
||||||
def taxi_raw():
|
|
||||||
- return main.get_taxis(spark)
|
|
||||||
+ return main.get_taxis(spark)
|
|
||||||
+
|
|
||||||
|
|
||||||
@dlt.table
|
|
||||||
def filtered_taxis():
|
|
||||||
- return dlt.read("taxi_raw").filter(expr("fare_amount < 30"))
|
|
||||||
+ return dlt.read("taxi_raw").filter(expr("fare_amount < 30"))
|
|
||||||
|
|
||||||
--- src/my_default_python/main.py
|
|
||||||
+++ src/my_default_python/main.py
|
|
||||||
@@ -1,21 +1,25 @@
|
|
||||||
from pyspark.sql import SparkSession, DataFrame
|
|
||||||
|
|
||||||
+
|
|
||||||
def get_taxis(spark: SparkSession) -> DataFrame:
|
|
||||||
- return spark.read.table("samples.nyctaxi.trips")
|
|
||||||
+ return spark.read.table("samples.nyctaxi.trips")
|
|
||||||
|
|
||||||
|
|
||||||
# Create a new Databricks Connect session. If this fails,
|
|
||||||
# check that you have configured Databricks Connect correctly.
|
|
||||||
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
|
||||||
def get_spark() -> SparkSession:
|
|
||||||
- try:
|
|
||||||
- from databricks.connect import DatabricksSession
|
|
||||||
- return DatabricksSession.builder.getOrCreate()
|
|
||||||
- except ImportError:
|
|
||||||
- return SparkSession.builder.getOrCreate()
|
|
||||||
+ try:
|
|
||||||
+ from databricks.connect import DatabricksSession
|
|
||||||
+
|
|
||||||
+ return DatabricksSession.builder.getOrCreate()
|
|
||||||
+ except ImportError:
|
|
||||||
+ return SparkSession.builder.getOrCreate()
|
|
||||||
+
|
|
||||||
|
|
||||||
def main():
|
|
||||||
- get_taxis(get_spark()).show(5)
|
|
||||||
+ get_taxis(get_spark()).show(5)
|
|
||||||
+
|
|
||||||
|
|
||||||
-if __name__ == '__main__':
|
|
||||||
- main()
|
|
||||||
+if __name__ == "__main__":
|
|
||||||
+ main()
|
|
||||||
|
|
||||||
4 files would be reformatted, 3 files already formatted
|
|
||||||
|
|
||||||
Exit code: 1
|
|
||||||
|
|
||||||
>>> ruff clean
|
>>> ruff clean
|
||||||
Removing cache at: .ruff_cache
|
Removing cache at: .ruff_cache
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
bundle:
|
bundle:
|
||||||
name: my_default_python
|
name: my_default_python
|
||||||
uuid: f0d73e0a-8483-485c-a7f9-0c1588086268
|
uuid: cc022987-d2d7-42c2-9019-3f9615812f23
|
||||||
|
|
||||||
include:
|
include:
|
||||||
- resources/*.yml
|
- resources/*.yml
|
||||||
|
|
|
@ -5,28 +5,32 @@ This file is primarily used by the setuptools library and typically should not
|
||||||
be executed directly. See README.md for how to deploy, test, and run
|
be executed directly. See README.md for how to deploy, test, and run
|
||||||
the my_default_python project.
|
the my_default_python project.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
sys.path.append('./src')
|
|
||||||
|
sys.path.append("./src")
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import my_default_python
|
import my_default_python
|
||||||
|
|
||||||
|
local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="my_default_python",
|
name="my_default_python",
|
||||||
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
|
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
|
||||||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
|
||||||
version=my_default_python.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"),
|
version=my_default_python.__version__ + "+" + local_version,
|
||||||
url="https://databricks.com",
|
url="https://databricks.com",
|
||||||
author="$USERNAME",
|
author="$USERNAME",
|
||||||
description="wheel file based on my_default_python/src",
|
description="wheel file based on my_default_python/src",
|
||||||
packages=find_packages(where='./src'),
|
packages=find_packages(where="./src"),
|
||||||
package_dir={'': 'src'},
|
package_dir={"": "src"},
|
||||||
entry_points={
|
entry_points={
|
||||||
"packages": [
|
"packages": [
|
||||||
"main=my_default_python.main:main"
|
"main=my_default_python.main:main",
|
||||||
]
|
],
|
||||||
},
|
},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
# Dependencies in case the output wheel file is used as a library dependency.
|
# Dependencies in case the output wheel file is used as a library dependency.
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
"# Import DLT and src/my_default_python\n",
|
"# Import DLT and src/my_default_python\n",
|
||||||
"import dlt\n",
|
"import dlt\n",
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
"sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
|
"sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
|
||||||
"from pyspark.sql.functions import expr\n",
|
"from pyspark.sql.functions import expr\n",
|
||||||
"from my_default_python import main"
|
"from my_default_python import main"
|
||||||
|
@ -55,11 +56,12 @@
|
||||||
"source": [
|
"source": [
|
||||||
"@dlt.view\n",
|
"@dlt.view\n",
|
||||||
"def taxi_raw():\n",
|
"def taxi_raw():\n",
|
||||||
" return main.get_taxis(spark)\n",
|
" return main.get_taxis(spark)\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"@dlt.table\n",
|
"@dlt.table\n",
|
||||||
"def filtered_taxis():\n",
|
"def filtered_taxis():\n",
|
||||||
" return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
|
" return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
|
@ -1,21 +1,25 @@
|
||||||
from pyspark.sql import SparkSession, DataFrame
|
from pyspark.sql import SparkSession, DataFrame
|
||||||
|
|
||||||
|
|
||||||
def get_taxis(spark: SparkSession) -> DataFrame:
|
def get_taxis(spark: SparkSession) -> DataFrame:
|
||||||
return spark.read.table("samples.nyctaxi.trips")
|
return spark.read.table("samples.nyctaxi.trips")
|
||||||
|
|
||||||
|
|
||||||
# Create a new Databricks Connect session. If this fails,
|
# Create a new Databricks Connect session. If this fails,
|
||||||
# check that you have configured Databricks Connect correctly.
|
# check that you have configured Databricks Connect correctly.
|
||||||
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
||||||
def get_spark() -> SparkSession:
|
def get_spark() -> SparkSession:
|
||||||
try:
|
try:
|
||||||
from databricks.connect import DatabricksSession
|
from databricks.connect import DatabricksSession
|
||||||
return DatabricksSession.builder.getOrCreate()
|
|
||||||
except ImportError:
|
return DatabricksSession.builder.getOrCreate()
|
||||||
return SparkSession.builder.getOrCreate()
|
except ImportError:
|
||||||
|
return SparkSession.builder.getOrCreate()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
get_taxis(get_spark()).show(5)
|
get_taxis(get_spark()).show(5)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
bundle:
|
bundle:
|
||||||
name: my_default_sql
|
name: my_default_sql
|
||||||
uuid: f99204e5-97fa-4ae2-b358-91d3669fcecc
|
uuid: 19bd03e9-03e7-462a-a918-a8bcd255e45a
|
||||||
|
|
||||||
include:
|
include:
|
||||||
- resources/*.yml
|
- resources/*.yml
|
||||||
|
|
Loading…
Reference in New Issue