-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy path0_bootstrap.py
executable file
·52 lines (42 loc) · 1.78 KB
/
0_bootstrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# # Part 0: Bootstrap File
# You need to at the start of the project. It will install the requirements, creates the
# STORAGE environment variable and copy the data from
# raw/WA_Fn-UseC_-Telco-Customer-Churn-.csv into /datalake/data/churn of the STORAGE
# location.
# The STORAGE environment variable is the Cloud Storage location used by the DataLake
# to store hive data. On AWS it will s3a://[something], on Azure it will be
# abfs://[something] and on CDSW cluster, it will be hdfs://[something]
# Install the requirements
!pip3 install -r requirements.txt --progress-bar off
# Create the directories and upload data
from cmlbootstrap import CMLBootstrap
from IPython.display import Javascript, HTML
import os
import time
import json
import requests
import xml.etree.ElementTree as ET
import datetime
try:
os.environ["SPARK_HOME"]
print("Spark is enabled")
except:
print('Spark is not enabled, please enable spark before running this script')
raise KeyError('Spark is not enabled, please enable spark before running this script')
run_time_suffix = datetime.datetime.now()
run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")
# Instantiate API Wrapper
cml = CMLBootstrap()
# Set the STORAGE environment variable
try :
storage=os.environ["STORAGE"]
except:
storage = cml.get_cloud_storage()
storage_environment_params = {"STORAGE":storage}
storage_environment = cml.create_environment_variable(storage_environment_params)
os.environ["STORAGE"] = storage
# Upload the data to the cloud storage
!hadoop fs -mkdir -p $STORAGE/datalake
!hadoop fs -mkdir -p $STORAGE/datalake/data
!hadoop fs -mkdir -p $STORAGE/datalake/data/churn
!hadoop fs -copyFromLocal /home/cdsw/raw/WA_Fn-UseC_-Telco-Customer-Churn-.csv $STORAGE/datalake/data/churn/WA_Fn-UseC_-Telco-Customer-Churn-.csv