Skip to content

Commit

Permalink
Merge pull request #3 from alope107/PersistentChanges
Browse files Browse the repository at this point in the history
Added option to persist changes by editing bash and csh config files
  • Loading branch information
minrk committed Jul 2, 2015
2 parents d657822 + 678fa76 commit 04b6757
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 4 deletions.
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,18 @@ To verify the automatically detected location, call

```python
findspark.find()
```
```

Findspark can add a startup file to the current IPython profile so that the enviornment vaiables will be properly set and pyspark will be imported upon IPython startup. This file is created when `edit_profile` is set to true.

```ipython --profile=myprofile
findspark.init('/path/to/spark_home', edit_profile=True)
```

Findspark can also add to the .bashrc configuration file if it is present so that the enviornment variables will be properly set whenever a new shell is opened. This is enabled by setting the optional argument `edit_rc` to true.

```python
findspark.init('/path/to/spark_home', edit_rc=True)
```

If changes are persisted, findspark will not need to be called again unless the spark installation is moved.
80 changes: 77 additions & 3 deletions findspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from glob import glob
import os
import sys
import subprocess
from IPython import get_ipython

__version__ = '0.0.3'
__version__ = '0.0.4'


def find():
Expand All @@ -35,7 +37,67 @@ def find():
return spark_home


def init(spark_home=None):
def change_rc(spark_home, spark_python, py4j):
"""Persists changes to enviornment by changing shell config.
Adds lines to .bashrc to set enviornment variables
including the adding of dependencies to the system path. Will only
edit this file if they already exist. Currently only works for bash.
Parameters
----------
spark_home : str
Path to Spark installation.
spark_python : str
Path to python subdirectory of Spark installation.
py4j : str
Path to py4j library.
"""

bashrc_location = os.path.expanduser("~/.bashrc")

if os.path.isfile(bashrc_location):
with open(bashrc_location, 'a') as bashrc:
bashrc.write("\n# Added by findspark\n")
bashrc.write("export SPARK_HOME=" + spark_home + "\n")
bashrc.write("export PYTHONPATH=" + spark_python + ":" +
py4j + ":$PYTHONPATH\n\n")


def edit_ipython_profile(spark_home, spark_python, py4j):
"""Adds a startup file to the current IPython profile to import pyspark.
The startup file sets the required enviornment variables and imports pyspark.
Parameters
----------
spark_home : str
Path to Spark installation.
spark_python : str
Path to python subdirectory of Spark installation.
py4j : str
Path to py4j library.
"""

ip = get_ipython()

if ip:
profile_dir = ip.profile_dir.location
else:
from IPython.utils.path import locate_profile
profile_dir = locate_profile()

startup_file_loc = os.path.join(profile_dir, "startup", "findspark.py")

with open(startup_file_loc, 'w') as startup_file:
#Lines of code to be run when IPython starts
startup_file.write("import sys, os\n")
startup_file.write("os.environ['SPARK_HOME'] = '" + spark_home + "'\n")
startup_file.write("sys.path[:0] = " + str([spark_python, py4j]) + "\n")
startup_file.write("import pyspark\n")


def init(spark_home=None, edit_rc=False, edit_profile=False):
"""Make pyspark importable.
Sets environmental variables and adds dependencies to sys.path.
Expand All @@ -45,7 +107,13 @@ def init(spark_home=None):
----------
spark_home : str, optional, default = None
Path to Spark installation, will try to find automatically
if not provided
if not provided.
edit_rc : bool, optional, default = False
Whether to attempt to persist changes by appending to shell
config.
edit_profile : bool, optional, default = False
Whether to create an IPython startup file to automatically
configure and import pyspark.
"""

if not spark_home:
Expand All @@ -58,3 +126,9 @@ def init(spark_home=None):
spark_python = os.path.join(spark_home, 'python')
py4j = glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0]
sys.path[:0] = [spark_python, py4j]

if edit_rc:
change_rc(spark_home, spark_python, py4j)

if edit_profile:
edit_ipython_profile(spark_home, spark_python, py4j)

0 comments on commit 04b6757

Please sign in to comment.