diff --git a/CHANGELOG.md b/CHANGELOG.md index 97c2449..f2750c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ -## Spark-submit -##### Latest version: 0.1.0 +### Spark-submit -#### 0.1.0 (2021-10-16) +##### 1.0.1 (2021-10-22) +- Fix wrong return code bug in `get_code()` +- Change argument order in `submit()` + +##### 1.0.0 (2021-10-16) - First release \ No newline at end of file diff --git a/README.md b/README.md index 6470c45..6467f1a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## Spark-submit [![PyPI version](https://badge.fury.io/py/spark-submit.svg)](https://badge.fury.io/py/spark-submit) -[![Github All Releases](https://img.shields.io/github/downloads/PApostol/spark-submit/total.svg)]() +[![PyPI - Downloads](https://img.shields.io/pypi/dm/spark-submit)](https://pypi.org/project/spark-submit/) [![](https://img.shields.io/badge/python-3.5+-blue.svg)](https://www.python.org/downloads/) [![License](https://img.shields.io/badge/License-MIT-blue)](#license "Go to license section") [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/PApostol/spark-submit/issues) @@ -59,7 +59,7 @@ print(app.submit_cmd) print(app.env_vars) # monitor state in the background every x seconds with `await_result=x` -app.submit(use_env_vars=True, await_result=10) +app.submit(await_result=10, use_env_vars=True) print(app.get_state()) # 'SUBMITTED' @@ -134,6 +134,15 @@ spark_args = { main_file = 's3a://mybucket/file.jar' app = SparkJob(main_file, **spark_args) ``` + +#### Testing + +You can do some simple testing with local mode Spark after cloning the repo: + +`python tests/run_test.py` + +Note any additional requirements for running the tests: `pip install -r tests/requirements.txt` + #### Additional methods `spark_submit.system_info()`: Collects Spark related system information, such as versions of spark-submit, Scala, Java, Python and OS @@ -146,8 +155,8 @@ app = SparkJob(main_file, **spark_args) ### License -Released under [MIT](/LICENSE) by [@PApostol](https://github.com/PApostol) +Released under [MIT](/LICENSE) by [@PApostol](https://github.com/PApostol). -- You can freely modify and reuse -- The original license must be included with copies of this software -- Please link back to this repo if you use a significant portion the source code +- You can freely modify and reuse. +- The original license must be included with copies of this software. +- Please link back to this repo if you use a significant portion the source code. diff --git a/setup.py b/setup.py index 25a8bf1..67ad667 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ license = about['__license__'], url = about['__url__'], description = about['__description__'], - long_description_content_type="text/markdown", + long_description_content_type = 'text/markdown', long_description = readme, packages = find_packages(), include_package_data = True, diff --git a/spark_submit/__info__.py b/spark_submit/__info__.py index d17b0e8..d8ed778 100644 --- a/spark_submit/__info__.py +++ b/spark_submit/__info__.py @@ -11,7 +11,7 @@ __author_email__ = 'foo@bar.com' __maintainer__ = 'PApostol' __license__ = 'MIT' -__version__ = '1.0.0' +__version__ = '1.0.1' __description__ = 'Python manager for spark-submit jobs' __url__ = 'https://github.com/PApostol/spark-submit' __bugtrack_url__ = f'{__url__}/issues' diff --git a/spark_submit/sparkjob.py b/spark_submit/sparkjob.py index 5958284..884656d 100644 --- a/spark_submit/sparkjob.py +++ b/spark_submit/sparkjob.py @@ -116,16 +116,16 @@ def _check_submit(self) -> None: self.submit_response['driver_state'] = driver_state[0] - def submit(self, use_env_vars: bool=False, await_result: int=0) -> None: + def submit(self, await_result: int=0, use_env_vars: bool=False) -> None: """Submits the current Spark job to Spark master Parameters - use_env_vars (bool): whether the environment variables obtained should be used (default: False) await_result (int): how often to poll for the Spark driver state in a background thread (default: 0, don't monitor in a background thread) + use_env_vars (bool): whether the environment variables obtained should be used (default: False) Returns: None - """ + """ env_vars = '' if use_env_vars: for env_var, val in self.env_vars.items(): @@ -146,7 +146,7 @@ def submit(self, use_env_vars: bool=False, await_result: int=0) -> None: self.submit_response['driver_state'] = 'ERROR' raise SparkSubmitError(f'{output}\nReturn code: {code}') - elif self.spark_args['deploy_mode']=='client': + elif self.spark_args['deploy_mode'] == 'client': self.submit_response['driver_state'] = 'FINISHED' else: @@ -165,7 +165,7 @@ def get_state(self) -> str: Returns: str: Spark job driver state - """ + """ self._check_submit() return self.submit_response['driver_state'] @@ -175,7 +175,7 @@ def get_output(self) -> str: Returns: str: spark-submit stdout - """ + """ return self.submit_response['output'] @@ -184,8 +184,8 @@ def get_code(self) -> int: Returns: int: spark-submit return code (returns -1 if no code) - """ - return self.submit_response['code'] if self.submit_response['code'] else -1 + """ + return self.submit_response['code'] if isinstance(self.submit_response['code'], int) else -1 def kill(self) -> None: @@ -193,7 +193,7 @@ def kill(self) -> None: Returns: None - """ + """ if self.submit_response['driver_state'] in end_sates: logging.warning('Spark job "{0}" has concluded with state {1} and cannot be killed.'.format(self.spark_args['name'], self.submit_response['driver_state'])) elif self.submit_response['submission_id']: diff --git a/spark_submit/system.py b/spark_submit/system.py index a117d4d..4b3a019 100644 --- a/spark_submit/system.py +++ b/spark_submit/system.py @@ -28,7 +28,7 @@ def system_info() -> str: Returns: str: system information - """ + """ spark_bin = os.environ.get('SPARK_HOME', os.path.expanduser('~/spark_home')) + '/bin/spark-submit' cmd = spark_bin + ' --version'