From 19daa3d32001d91e1711e2c8cb52f995ab8564c8 Mon Sep 17 00:00:00 2001 From: alope107 Date: Tue, 23 Jun 2015 14:50:34 -0700 Subject: [PATCH 1/7] Added option to persist changes by editing bash and csh config files --- README.md | 10 +++++++++- findspark.py | 45 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0c17cd6..5bbf080 100644 --- a/README.md +++ b/README.md @@ -32,4 +32,12 @@ To verify the automatically detected location, call ```python findspark.find() -``` \ No newline at end of file +``` + +Findspark can also add to .bashrc and .cshrc configuration files so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `persist_changes` to true. + +```python +findspark.init(persist_changes=True) +``` + +If changes are persisted, findspark will not need to be called again unless the spark installation is moved. diff --git a/findspark.py b/findspark.py index d3d3195..bf3b460 100644 --- a/findspark.py +++ b/findspark.py @@ -8,7 +8,7 @@ import os import sys -__version__ = '0.0.3' +__version__ = '0.0.4' def find(): @@ -35,7 +35,42 @@ def find(): return spark_home -def init(spark_home=None): +def persist(spark_home, spark_python, py4j): + """Persists changes to enviornment. + + Adds lines to .bashrc to set enviornment variables including + the adding of dependencies to the system path. Currently only + works for Bash. + + Parameters + ---------- + spark_home : str + Path to Spark installation. + spark_python : str + Path to python subdirectory of Spark installation. + py4j : str + Path to py4j library. + """ + + bashrc_location = os.path.expanduser("~/.bashrc") + + with open(bashrc_location, 'a') as bashrc: + bashrc.write("\n# Added by findspark\n") + bashrc.write("export SPARK_HOME=" + spark_home + "\n") + bashrc.write("export PYTHONPATH=" + spark_python + ":" + + py4j + ":$PYTHONPATH\n\n") + + cshrc_location = os.path.expanduser("~/.cshrc") + + with open(cshrc_location, 'a') as cshrc: + cshrc.write("\n# Added by findspark\n") + cshrc.write("setenv SPARK_HOME " + spark_home + "\n") + cshrc.write("setenv PYTHONPATH \"" + spark_python + ":" + + py4j + ":\"$PYTHONPATH") + + + +def init(spark_home=None, persist_changes=False): """Make pyspark importable. Sets environmental variables and adds dependencies to sys.path. @@ -46,6 +81,9 @@ def init(spark_home=None): spark_home : str, optional, default = None Path to Spark installation, will try to find automatically if not provided + persist_changes : bool, optional, default = False + Whether to attempt to persist changes (currently only by + appending to bashrc). """ if not spark_home: @@ -58,3 +96,6 @@ def init(spark_home=None): spark_python = os.path.join(spark_home, 'python') py4j = glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0] sys.path[:0] = [spark_python, py4j] + + if persist_changes: + persist(spark_home, spark_python, py4j) From 2aa2486458ed1ac117202d549684d9253fbd013c Mon Sep 17 00:00:00 2001 From: alope107 Date: Wed, 24 Jun 2015 13:39:58 -0700 Subject: [PATCH 2/7] Added option to create/edit IPython profile --- findspark.py | 66 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/findspark.py b/findspark.py index bf3b460..5eb9d23 100644 --- a/findspark.py +++ b/findspark.py @@ -7,6 +7,7 @@ from glob import glob import os import sys +import subprocess __version__ = '0.0.4' @@ -35,12 +36,12 @@ def find(): return spark_home -def persist(spark_home, spark_python, py4j): - """Persists changes to enviornment. +def change_rc(spark_home, spark_python, py4j): + """Persists changes to enviornment by changing shell config. Adds lines to .bashrc to set enviornment variables including the adding of dependencies to the system path. Currently only - works for Bash. + works for Bash and (t)csh. Parameters ---------- @@ -69,8 +70,47 @@ def persist(spark_home, spark_python, py4j): py4j + ":\"$PYTHONPATH") +def edit_ipython_profile(spark_home, spark_python, py4j, name): + """Creates or appends to an IPython profile to automatically import pyspark. + + Adds lines to the ipython_config file to be run at startup of the IPython + interpreter for a given profile, creating the profile if it does not exist. + These lines set appropriate enviornment variables and import the pyspark + library upon IPython startup. -def init(spark_home=None, persist_changes=False): + Parameters + ---------- + spark_home : str + Path to Spark installation. + spark_python : str + Path to python subdirectory of Spark installation. + py4j : str + Path to py4j library. + name : str + Name of profile to create or append to. + """ + subprocess.call(["ipython", "profile", "create", name]) + + config_dir = subprocess.check_output(["ipython", "profile", "locate", name]).strip() + config_filename = os.path.join(config_dir, "ipython_config.py") + + with open(config_filename, 'a') as config_file: + #Lines of code to be run when IPython starts + lines = ["import sys, os"] + lines.append("os.environ['SPARK_HOME'] = '" + spark_home + "'") + lines.append("sys.path[:0] = " + str([spark_python, py4j])) + lines.append("import pyspark") + + #Code to be placed in config file + config_file.write("\n#pyspark configuration added by findspark\n") + config_file.write("to_exec = " + str(lines) + "\n") + config_file.write("try:\n") + config_file.write(" c.InteractiveShellApp.exec_lines[:0] = to_exec\n") + config_file.write("except TypeError:\n") + config_file.write(" c.InteractiveShellApp.exec_lines = to_exec\n") + + +def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name='spark'): """Make pyspark importable. Sets environmental variables and adds dependencies to sys.path. @@ -81,9 +121,14 @@ def init(spark_home=None, persist_changes=False): spark_home : str, optional, default = None Path to Spark installation, will try to find automatically if not provided - persist_changes : bool, optional, default = False - Whether to attempt to persist changes (currently only by - appending to bashrc). + edit_rc : bool, optional, default = False + Whether to attempt to persist changes by appending to shell + config. + edit_profile : bool, optional, default = False + Whether to create an IPython profile that atuomatically configures + environment variables and imports spark. + profile_name : bool, optional, default = "spark" + Name of the IPython profile to create or edit if edit_profile is True. """ if not spark_home: @@ -97,5 +142,8 @@ def init(spark_home=None, persist_changes=False): py4j = glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0] sys.path[:0] = [spark_python, py4j] - if persist_changes: - persist(spark_home, spark_python, py4j) + if edit_rc: + change_rc(spark_home, spark_python, py4j) + + if edit_profile: + edit_ipython_profile(spark_home, spark_python, py4j, profile_name) From da6a07880b4d7fe94f8bbd5968fb353bc7df5646 Mon Sep 17 00:00:00 2001 From: alope107 Date: Tue, 30 Jun 2015 13:23:16 -0700 Subject: [PATCH 3/7] Now creates startup files --- README.md | 9 ++++++++- findspark.py | 50 +++++++++++++++++++++++++------------------------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 5bbf080..93b11fd 100644 --- a/README.md +++ b/README.md @@ -34,10 +34,17 @@ To verify the automatically detected location, call findspark.find() ``` +Findspark can add a startup file to the current IPython profile so that the enviornment vaiables will be properly set and pyspark will be imported upon IPython startup. This file is created when `edit_profile` is set to true. A profile other than the current one can be modified using the `profile_name` option. + +```ipython --profile=myprofile +findspark.init('/path/to/spark_home', edit_profile=True) +findspark.init('/path/to/spark_home', edit_profile=True, profile_name='otherprofile') +``` + Findspark can also add to .bashrc and .cshrc configuration files so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `persist_changes` to true. ```python -findspark.init(persist_changes=True) +findspark.init('/path/to/spark_home, 'edit_rc=True) ``` If changes are persisted, findspark will not need to be called again unless the spark installation is moved. diff --git a/findspark.py b/findspark.py index 5eb9d23..ce1ed22 100644 --- a/findspark.py +++ b/findspark.py @@ -8,6 +8,7 @@ import os import sys import subprocess +from IPython import get_ipython __version__ = '0.0.4' @@ -71,12 +72,9 @@ def change_rc(spark_home, spark_python, py4j): def edit_ipython_profile(spark_home, spark_python, py4j, name): - """Creates or appends to an IPython profile to automatically import pyspark. + """Adds a startup file to the current IPython profile to import pyspark. - Adds lines to the ipython_config file to be run at startup of the IPython - interpreter for a given profile, creating the profile if it does not exist. - These lines set appropriate enviornment variables and import the pyspark - library upon IPython startup. + The startup file sets the required enviornment variables and imports pyspark. Parameters ---------- @@ -89,28 +87,29 @@ def edit_ipython_profile(spark_home, spark_python, py4j, name): name : str Name of profile to create or append to. """ - subprocess.call(["ipython", "profile", "create", name]) - config_dir = subprocess.check_output(["ipython", "profile", "locate", name]).strip() - config_filename = os.path.join(config_dir, "ipython_config.py") + ip = get_ipython() + + if ip and name is None: + profile_dir = ip.profile_dir.location + else: + from IPython.utils.path import locate_profile + if name: + profile_dir = locate_profile(name) + else: + profile_dir = locate_profile() + + startup_file_loc = os.path.join(profile_dir, "startup", "findspark.py") - with open(config_filename, 'a') as config_file: + with open(startup_file_loc, 'w') as startup_file: #Lines of code to be run when IPython starts - lines = ["import sys, os"] - lines.append("os.environ['SPARK_HOME'] = '" + spark_home + "'") - lines.append("sys.path[:0] = " + str([spark_python, py4j])) - lines.append("import pyspark") - - #Code to be placed in config file - config_file.write("\n#pyspark configuration added by findspark\n") - config_file.write("to_exec = " + str(lines) + "\n") - config_file.write("try:\n") - config_file.write(" c.InteractiveShellApp.exec_lines[:0] = to_exec\n") - config_file.write("except TypeError:\n") - config_file.write(" c.InteractiveShellApp.exec_lines = to_exec\n") + startup_file.write("import sys, os\n") + startup_file.write("os.environ['SPARK_HOME'] = '" + spark_home + "'\n") + startup_file.write("sys.path[:0] = " + str([spark_python, py4j]) + "\n") + startup_file.write("import pyspark\n") -def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name='spark'): +def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name=None): """Make pyspark importable. Sets environmental variables and adds dependencies to sys.path. @@ -125,10 +124,11 @@ def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name='spark Whether to attempt to persist changes by appending to shell config. edit_profile : bool, optional, default = False - Whether to create an IPython profile that atuomatically configures - environment variables and imports spark. - profile_name : bool, optional, default = "spark" + Whether to create a create an IPython startup file to automatically + configure and import pyspark. + profile_name : str, optional, default = None Name of the IPython profile to create or edit if edit_profile is True. + Uses current profile if not set. """ if not spark_home: From 40c320da55c5fe7c26330e8bbeae8da206c31ed6 Mon Sep 17 00:00:00 2001 From: alope107 Date: Tue, 30 Jun 2015 14:03:46 -0700 Subject: [PATCH 4/7] Removed profile name option --- README.md | 3 +-- findspark.py | 18 +++++------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 93b11fd..6423749 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,10 @@ To verify the automatically detected location, call findspark.find() ``` -Findspark can add a startup file to the current IPython profile so that the enviornment vaiables will be properly set and pyspark will be imported upon IPython startup. This file is created when `edit_profile` is set to true. A profile other than the current one can be modified using the `profile_name` option. +Findspark can add a startup file to the current IPython profile so that the enviornment vaiables will be properly set and pyspark will be imported upon IPython startup. This file is created when `edit_profile` is set to true. ```ipython --profile=myprofile findspark.init('/path/to/spark_home', edit_profile=True) -findspark.init('/path/to/spark_home', edit_profile=True, profile_name='otherprofile') ``` Findspark can also add to .bashrc and .cshrc configuration files so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `persist_changes` to true. diff --git a/findspark.py b/findspark.py index ce1ed22..f18a713 100644 --- a/findspark.py +++ b/findspark.py @@ -71,7 +71,7 @@ def change_rc(spark_home, spark_python, py4j): py4j + ":\"$PYTHONPATH") -def edit_ipython_profile(spark_home, spark_python, py4j, name): +def edit_ipython_profile(spark_home, spark_python, py4j): """Adds a startup file to the current IPython profile to import pyspark. The startup file sets the required enviornment variables and imports pyspark. @@ -84,20 +84,15 @@ def edit_ipython_profile(spark_home, spark_python, py4j, name): Path to python subdirectory of Spark installation. py4j : str Path to py4j library. - name : str - Name of profile to create or append to. """ ip = get_ipython() - if ip and name is None: + if ip: profile_dir = ip.profile_dir.location else: from IPython.utils.path import locate_profile - if name: - profile_dir = locate_profile(name) - else: - profile_dir = locate_profile() + profile_dir = locate_profile() startup_file_loc = os.path.join(profile_dir, "startup", "findspark.py") @@ -109,7 +104,7 @@ def edit_ipython_profile(spark_home, spark_python, py4j, name): startup_file.write("import pyspark\n") -def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name=None): +def init(spark_home=None, edit_rc=False, edit_profile=False): """Make pyspark importable. Sets environmental variables and adds dependencies to sys.path. @@ -126,9 +121,6 @@ def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name=None): edit_profile : bool, optional, default = False Whether to create a create an IPython startup file to automatically configure and import pyspark. - profile_name : str, optional, default = None - Name of the IPython profile to create or edit if edit_profile is True. - Uses current profile if not set. """ if not spark_home: @@ -146,4 +138,4 @@ def init(spark_home=None, edit_rc=False, edit_profile=False, profile_name=None): change_rc(spark_home, spark_python, py4j) if edit_profile: - edit_ipython_profile(spark_home, spark_python, py4j, profile_name) + edit_ipython_profile(spark_home, spark_python, py4j) From 076d478226cbcf3896ccf3f3389889f414097429 Mon Sep 17 00:00:00 2001 From: alope107 Date: Wed, 1 Jul 2015 10:22:05 -0700 Subject: [PATCH 5/7] Only append to rc --- README.md | 4 ++-- findspark.py | 29 ++++++++++++++++------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 6423749..797c936 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ Findspark can add a startup file to the current IPython profile so that the envi findspark.init('/path/to/spark_home', edit_profile=True) ``` -Findspark can also add to .bashrc and .cshrc configuration files so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `persist_changes` to true. +Findspark can also add to .bashrc and .cshrc configuration files if they are present so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `edit_rc` to true. ```python -findspark.init('/path/to/spark_home, 'edit_rc=True) +findspark.init('/path/to/spark_home', edit_rc=True) ``` If changes are persisted, findspark will not need to be called again unless the spark installation is moved. diff --git a/findspark.py b/findspark.py index f18a713..de3bd4f 100644 --- a/findspark.py +++ b/findspark.py @@ -40,9 +40,10 @@ def find(): def change_rc(spark_home, spark_python, py4j): """Persists changes to enviornment by changing shell config. - Adds lines to .bashrc to set enviornment variables including - the adding of dependencies to the system path. Currently only - works for Bash and (t)csh. + Adds lines to .bashrc and .cshrc to set enviornment variables + including the adding of dependencies to the system path. Will only + edit these files if they already exist. Currently only works for bash + and (t)csh. Parameters ---------- @@ -56,19 +57,21 @@ def change_rc(spark_home, spark_python, py4j): bashrc_location = os.path.expanduser("~/.bashrc") - with open(bashrc_location, 'a') as bashrc: - bashrc.write("\n# Added by findspark\n") - bashrc.write("export SPARK_HOME=" + spark_home + "\n") - bashrc.write("export PYTHONPATH=" + spark_python + ":" + - py4j + ":$PYTHONPATH\n\n") + if os.path.isfile(bashrc_location): + with open(bashrc_location, 'a') as bashrc: + bashrc.write("\n# Added by findspark\n") + bashrc.write("export SPARK_HOME=" + spark_home + "\n") + bashrc.write("export PYTHONPATH=" + spark_python + ":" + + py4j + ":$PYTHONPATH\n\n") cshrc_location = os.path.expanduser("~/.cshrc") - with open(cshrc_location, 'a') as cshrc: - cshrc.write("\n# Added by findspark\n") - cshrc.write("setenv SPARK_HOME " + spark_home + "\n") - cshrc.write("setenv PYTHONPATH \"" + spark_python + ":" + - py4j + ":\"$PYTHONPATH") + if os.path.isfile(cshrc_location): + with open(cshrc_location, 'a') as cshrc: + cshrc.write("\n# Added by findspark\n") + cshrc.write("setenv SPARK_HOME " + spark_home + "\n") + cshrc.write("setenv PYTHONPATH \"" + spark_python + ":" + + py4j + ":\"$PYTHONPATH") def edit_ipython_profile(spark_home, spark_python, py4j): From fee915e4beb5988b0fc128e57972e8845d31c14f Mon Sep 17 00:00:00 2001 From: alope107 Date: Wed, 1 Jul 2015 10:33:24 -0700 Subject: [PATCH 6/7] Removed csh support --- README.md | 2 +- findspark.py | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 797c936..ca24129 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Findspark can add a startup file to the current IPython profile so that the envi findspark.init('/path/to/spark_home', edit_profile=True) ``` -Findspark can also add to .bashrc and .cshrc configuration files if they are present so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `edit_rc` to true. +Findspark can also add to the .bashrc configuration file if it is present so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `edit_rc` to true. ```python findspark.init('/path/to/spark_home', edit_rc=True) diff --git a/findspark.py b/findspark.py index de3bd4f..f5d645c 100644 --- a/findspark.py +++ b/findspark.py @@ -40,10 +40,9 @@ def find(): def change_rc(spark_home, spark_python, py4j): """Persists changes to enviornment by changing shell config. - Adds lines to .bashrc and .cshrc to set enviornment variables + Adds lines to .bashrc to set enviornment variables including the adding of dependencies to the system path. Will only - edit these files if they already exist. Currently only works for bash - and (t)csh. + edit this file if they already exist. Currently only works for bash. Parameters ---------- @@ -64,15 +63,6 @@ def change_rc(spark_home, spark_python, py4j): bashrc.write("export PYTHONPATH=" + spark_python + ":" + py4j + ":$PYTHONPATH\n\n") - cshrc_location = os.path.expanduser("~/.cshrc") - - if os.path.isfile(cshrc_location): - with open(cshrc_location, 'a') as cshrc: - cshrc.write("\n# Added by findspark\n") - cshrc.write("setenv SPARK_HOME " + spark_home + "\n") - cshrc.write("setenv PYTHONPATH \"" + spark_python + ":" + - py4j + ":\"$PYTHONPATH") - def edit_ipython_profile(spark_home, spark_python, py4j): """Adds a startup file to the current IPython profile to import pyspark. From 678fa7676482e87fe87ba8452b8f4b9fff5d3b00 Mon Sep 17 00:00:00 2001 From: alope107 Date: Wed, 1 Jul 2015 11:29:27 -0700 Subject: [PATCH 7/7] Fixed documentation typo --- findspark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/findspark.py b/findspark.py index f5d645c..be8152a 100644 --- a/findspark.py +++ b/findspark.py @@ -107,12 +107,12 @@ def init(spark_home=None, edit_rc=False, edit_profile=False): ---------- spark_home : str, optional, default = None Path to Spark installation, will try to find automatically - if not provided + if not provided. edit_rc : bool, optional, default = False Whether to attempt to persist changes by appending to shell config. edit_profile : bool, optional, default = False - Whether to create a create an IPython startup file to automatically + Whether to create an IPython startup file to automatically configure and import pyspark. """