From 2a3d64ab51b14fa9f0daeead7bda3cd9b9fd122b Mon Sep 17 00:00:00 2001
From: Saif Addin <saif@johnsnowlabs.com>
Date: Thu, 9 Aug 2018 19:46:46 -0300
Subject: [PATCH 1/2] Updated version numbers, ready to release

---
 CHANGELOG                                     |  43 ++++
 README.md                                     |  28 +--
 build.sbt                                     |   4 +-
 docs/index.html                               |   4 +-
 docs/notebooks.html                           |  18 +-
 docs/quickstart.html                          |  24 +--
 .../ModelDownloaderExample.ipynb              |  20 +-
 .../example/model-downloader/assertion.ipynb  | 196 ------------------
 python/example/model-downloader/dl-ner.ipynb  |   2 +-
 python/setup.py                               |   2 +-
 .../scala/com/johnsnowlabs/util/Build.scala   |   2 +-
 11 files changed, 104 insertions(+), 239 deletions(-)
 delete mode 100644 python/example/model-downloader/assertion.ipynb

diff --git a/CHANGELOG b/CHANGELOG
index 73bcbf94f75009..a915a790d72627 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,46 @@
+========
+1.6.1
+========
+
+---------------
+New features
+---------------
+* new Scala-only functions that make it easier to work with Annotations in Dataframes. May be imported through com.johnsnowlabs.nlp.functions._ and allow mapping and filtering within and outside Annotations.
+filterByAnnotations, mapAnnotations and explodeAnnotations work by providing a column and a function. Check out documentation. Possibly later coming to Python.
+
+---------------
+Bug fixes
+---------------
+* Fixed incorrect filesystem readings in some S3 environments for word embeddings
+* Fixed NerCRF not correctly training from CONLL, labeling everything as -O- (Thanks @arnound from Slack Channel)
+
+---------------
+Enhancements
+---------------
+* Added overrideable config sparknlp.settings.cluster_tmp_dir allows setting cluster location for temporary embeddings file. May help S3 based clusters with no fs.defaultFS set to a proper distributed storage.
+* New annotator type: CHUNK. Representes a SUBSTRING of DOCUMENT and it is used as output from NerConverter, TextMatcher, RegexMatcher and other annotators that retrieve a substring from the original document.
+This will make for better modularity and integration within various annotators, such as between NER and AssertionStatus.
+* New annotation transformer: ChunkAssembler. Takes a string or array(string) column from a dataset and creates a CHUNK type annotator. The content must also belong to the current DOCUMENT annotation's content.
+* SentenceDetector new param explodeSentences allow to explode sentences within a single row into different rows to increase parallelism and performance in some scenarios. Particularly OCR based.
+* AssertionDLApproach now may be used within LightPipelines
+* AssertionDLApproach and AssertionLogRegApproach now work from CHUNK type instead of start/end bounds. May still be trained with Start/end though. This means target for assertion may be any CHUNK output annotator now (e.g. RegexMatcher)
+
+---------------
+Other
+---------------
+* PerceptronApproachLegacy moved back to default PerceptronApproach. Distributed PerceptronApproach moved to PerceptronApproachDistributed due to not meeting accuracy expectations yet.
+* Some configuration parameters in application.conf have been appropriately moved to proper annotator Params (NorvigSweeting Spell Checker, Vivekn Approach and Sentiment Detector affected)
+* application.conf renamed configuration values for better consistency
+
+---------------
+Developer API
+---------------
+* Added beforeAnnotate() and afterAnnotate() to manipulate dataframes after or before calling annotate() UDF
+* Added extraValidate() and extraValidateMsg() in all annotators to provide developer to add additional SCHEMA checks in transformSchema() stage
+* Removed validation() stage in fit() stage. Allows for more flexible training when some of the columns are not really required yet.
+* WrapColumnMetadata() will wrap an Annotation column with its appropriate Metadata. Makes it easier not to forget about Metadata in Schema.
+* RawAnnotator trait has now all the basics needed to start a new Annotator without annotate() function. It is a complete previous stage before AnnotatorModel, which inherits from RawAnnotator.
+
 ========
 1.6.0
 ========
diff --git a/README.md b/README.md
index a3bab208e6b35f..a59fad4a9da240 100644
--- a/README.md
+++ b/README.md
@@ -14,18 +14,18 @@ Questions? Feedback? Request access sending an email to nlp@johnsnowlabs.com
 
 This library has been uploaded to the spark-packages repository https://spark-packages.org/package/JohnSnowLabs/spark-nlp .
 
-To use the most recent version just add the `--packages JohnSnowLabs:spark-nlp:1.6.0` to you spark command
+To use the most recent version just add the `--packages JohnSnowLabs:spark-nlp:1.6.1` to you spark command
 
 ```sh
-spark-shell --packages JohnSnowLabs:spark-nlp:1.6.0
+spark-shell --packages JohnSnowLabs:spark-nlp:1.6.1
 ```
 
 ```sh
-pyspark --packages JohnSnowLabs:spark-nlp:1.6.0
+pyspark --packages JohnSnowLabs:spark-nlp:1.6.1
 ```
 
 ```sh
-spark-submit --packages JohnSnowLabs:spark-nlp:1.6.0
+spark-submit --packages JohnSnowLabs:spark-nlp:1.6.1
 ```
 
 ## Jupyter Notebook
@@ -35,23 +35,23 @@ export SPARK_HOME=/path/to/your/spark/folder
 export PYSPARK_DRIVER_PYTHON=jupyter
 export PYSPARK_DRIVER_PYTHON_OPTS=notebook
 
-pyspark --packages JohnSnowLabs:spark-nlp:1.6.0
+pyspark --packages JohnSnowLabs:spark-nlp:1.6.1
 ```
 
 ## Apache Zeppelin
 This way will work for both Scala and Python
 ```
-export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:1.6.0"
+export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:1.6.1"
 ```
 Alternatively, add the following Maven Coordinates to the interpreter's library list
 ```
-com.johnsnowlabs.nlp:spark-nlp_2.11:1.6.0
+com.johnsnowlabs.nlp:spark-nlp_2.11:1.6.1
 ```
 
 ## Python without explicit Spark installation
 If you installed pyspark through pip, you can now install sparknlp through pip
 ```
-pip install --index-url https://test.pypi.org/simple/ spark-nlp==1.6.0
+pip install --index-url https://test.pypi.org/simple/ spark-nlp==1.6.1
 ```
 Then you'll have to create a SparkSession manually, for example:
 ```
@@ -67,11 +67,11 @@ spark = SparkSession.builder \
 
 ## Pre-compiled Spark-NLP and Spark-NLP-OCR
 You may download fat-jar from here:
-[Spark-NLP 1.6.0 FAT-JAR](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-assembly-1.6.0.jar)
+[Spark-NLP 1.6.1 FAT-JAR](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-assembly-1.6.1.jar)
 or non-fat from here
-[Spark-NLP 1.6.0 PKG JAR](http://repo1.maven.org/maven2/com/johnsnowlabs/nlp/spark-nlp_2.11/1.6.0/spark-nlp_2.11-1.6.0.jar)
+[Spark-NLP 1.6.1 PKG JAR](http://repo1.maven.org/maven2/com/johnsnowlabs/nlp/spark-nlp_2.11/1.6.1/spark-nlp_2.11-1.6.1.jar)
 Spark-NLP-OCR Module (Requires native Tesseract 4.x+ for image based OCR. Does not require Spark-NLP to work but highly suggested)
-[Spark-NLP-OCR 1.6.0 FAT-JAR](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-ocr-assembly-1.6.0.jar)
+[Spark-NLP-OCR 1.6.1 FAT-JAR](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-ocr-assembly-1.6.1.jar)
 
 ## Maven central
 
@@ -83,19 +83,19 @@ Our package is deployed to maven central. In order to add this package as a depe
 <dependency>
   <groupId>com.johnsnowlabs.nlp</groupId>
   <artifactId>spark-nlp_2.11</artifactId>
-  <version>1.6.0</version>
+  <version>1.6.1</version>
 </dependency>
 ```
 
 #### SBT
 ```sbtshell
-libraryDependencies += "com.johnsnowlabs.nlp" % "spark-nlp_2.11" % "1.6.0"
+libraryDependencies += "com.johnsnowlabs.nlp" % "spark-nlp_2.11" % "1.6.1"
 ```
 
 If you are using `scala 2.11`
 
 ```sbtshell
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "1.6.0"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "1.6.1"
 ```
 
 ## Using the jar manually 
diff --git a/build.sbt b/build.sbt
index 76800b6f0c816b..f81b9aa039c139 100644
--- a/build.sbt
+++ b/build.sbt
@@ -9,7 +9,7 @@ name := "spark-nlp"
 
 organization := "com.johnsnowlabs.nlp"
 
-version := "1.6.0"
+version := "1.6.1"
 
 scalaVersion in ThisBuild := scalaVer
 
@@ -137,7 +137,7 @@ assemblyMergeStrategy in assembly := {
 lazy val ocr = (project in file("ocr"))
   .settings(
     name := "spark-nlp-ocr",
-    version := "1.6.0",
+    version := "1.6.1",
     libraryDependencies ++= ocrDependencies ++
       analyticsDependencies ++
       testDependencies,
diff --git a/docs/index.html b/docs/index.html
index 8938713c76c817..7075d9a7191c91 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -78,8 +78,8 @@ <h2 class="title">High Performance NLP with Apache Spark </h2>
                     </p>
                 <a class="btn btn-info btn-cta" style="float: center;margin-top: 10px;" href="mailto:nlp@johnsnowlabs.com?subject=SparkNLP%20Slack%20access" target="_blank"> Questions? Join our Slack</a>
                 <b/><p/><p/>
-                <p><span class="label label-warning">2018 Jul 7th - Update!</span> 1.6.0 Released! OCR PDF to Spark-NLP capabilities, new Chunker annotator, fixed AWS compatibility, better performance and much more.
-                    Learn changes <a href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/CHANGELOG">HERE</a> and check out for updated documentation below</p>
+                <p><span class="label label-warning">2018 Aug 9th - Update!</span> 1.6.1 Released! Fixed S3-based clusters support, new CHUNK type annotation and more!
+                    Learn changes <a href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/CHANGELOG">HERE</a> and check out for updated documentation below</p>
             </div>
             <div id="cards-wrapper" class="cards-wrapper row">
                 <div class="item item-green col-md-4 col-sm-6 col-xs-6">
diff --git a/docs/notebooks.html b/docs/notebooks.html
index 359e85b4494b30..384797b1a3bb52 100644
--- a/docs/notebooks.html
+++ b/docs/notebooks.html
@@ -103,7 +103,7 @@ <h4 id="scala-vivekn-notebook" class="section-block"> Vivekn Sentiment Analysis<
                                     Since we are dealing with small amounts of data, we put in practice LightPipelines.
                                 </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/example/src/TrainViveknSentiment.scala" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/example/src/TrainViveknSentiment.scala" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                         </section>
@@ -135,7 +135,7 @@ <h4 id="vivekn-notebook" class="section-block"> Vivekn Sentiment Analysis</h4>
                                     better Sentiment Analysis accuracy
                                   </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/vivekn-sentiment/sentiment.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/vivekn-sentiment/sentiment.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -157,7 +157,7 @@ <h4 id="sentiment-notebook" class="section-block"> Rule-based Sentiment Analysis
                                 Each of these sentences will be used for giving a score to text
                             </p>
                                 </p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/dictionary-sentiment/sentiment.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/dictionary-sentiment/sentiment.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -177,7 +177,7 @@ <h4 id="crfner-notebook" class="section-block"> CRF Named Entity Recognition</h4
                                     approach to use the same pipeline for tagging external resources.
                                 </p>
                                 <p>
-                                <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/crf-ner/ner.ipynb" target="_blank"> Take me to notebook!</a>
+                                <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/crf-ner/ner.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -196,7 +196,7 @@ <h4 id="dlner-notebook" class="section-block"> CNN Deep Learning NER</h4>
                                     and it will leverage batch-based distributed calls to native TensorFlow libraries during prediction.
                                 </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/dl-ner/ner.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/dl-ner/ner.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -211,7 +211,7 @@ <h4 id="text-notebook" class="section-block"> Simple Text Matching</h4>
                                     This annotator is an AnnotatorModel and does not require training.
                                 </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/text-matcher/extractor.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/text-matcher/extractor.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -226,7 +226,7 @@ <h4 id="assertion-notebook" class="section-block"> Assertion Status with LogReg<
                                     dataset will return the appropriate result.
                                 </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/logreg-assertion/assertion.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/logreg-assertion/assertion.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -241,7 +241,7 @@ <h4 id="dlassertion-notebook" class="section-block"> Deep Learning Assertion Sta
                                     graphs may be redesigned if needed.
                                 </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/dl-assertion/assertion.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/dl-assertion/assertion.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                             <div>
@@ -260,7 +260,7 @@ <h4 id="downloader-notebook" class="section-block"> Retrieving Pretrained models
                                     Such components may then be injected seamlessly into further pipelines, and so on.
                                 </p>
                                 <p>
-                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.0/python/example/model-downloader/ModelDownloaderExample.ipynb" target="_blank"> Take me to notebook!</a>
+                                    <a class="btn btn-warning btn-cta" style="float: center;margin-top: 10px;" href="https://github.com/JohnSnowLabs/spark-nlp/blob/1.6.1/python/example/model-downloader/ModelDownloaderExample.ipynb" target="_blank"> Take me to notebook!</a>
                                 </p>
                             </div>
                         </section>
diff --git a/docs/quickstart.html b/docs/quickstart.html
index 89f7c7224f010b..0f435259cb9b25 100644
--- a/docs/quickstart.html
+++ b/docs/quickstart.html
@@ -95,9 +95,9 @@ <h2 class="section-title">Requirements</h2>
                                 To start using the library, execute any of the following lines
                                 depending on your desired use case:
                                 </p>
-                                <pre><code class="language-javascript">spark-shell --packages JohnSnowLabs:spark-nlp:1.6.0
-pyspark --packages JohnSnowLabs:spark-nlp:1.6.0
-spark-submit --packages JohnSnowLabs:spark-nlp:1.6.0
+                                <pre><code class="language-javascript">spark-shell --packages JohnSnowLabs:spark-nlp:1.6.1
+pyspark --packages JohnSnowLabs:spark-nlp:1.6.1
+spark-submit --packages JohnSnowLabs:spark-nlp:1.6.1
 </code></pre>
                                 <div><b>NOTE: </b>Spark packages --packages has been reported to work unproperly, particularly in python, when utilizing physical clusters.
                                     Utilizing --jars is advised. For python, add python Spark-NLP through pip</div>
@@ -105,35 +105,35 @@ <h2 class="section-title">Requirements</h2>
                                 <p>
                                     Using <b>Databricks cloud cluster</b> or an <b>Apache Zeppelin</b> Scala notebook? Add the following Maven coordinates in the appropriate menu:
                                 </p>
-                                <pre><code class="language-javascript">com.johnsnowlabs.nlp:spark-nlp_2.11:1.6.0</code></pre>
+                                <pre><code class="language-javascript">com.johnsnowlabs.nlp:spark-nlp_2.11:1.6.1</code></pre>
                                 <p>
                                     For Python in <b>Apache Zeppelin</b> you may need to setup <i><b>SPARK_SUBMIT_OPTIONS</b></i> utilizing --packages instruction shown above like this
                                 </p>
-                                <pre><code class="language-javascript">export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:1.6.0"</code></pre>
+                                <pre><code class="language-javascript">export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:1.6.1"</code></pre>
                                 <p><b>Python Jupyter Notebook with PySpark</b>? add the following environment variables (depending on your OS)</p>
                                 <pre><code class="language-javascript">export SPARK_HOME=/path/to/your/spark/folder
 export PYSPARK_DRIVER_PYTHON=jupyter
 export PYSPARK_DRIVER_PYTHON_OPTS=notebook
 
-pyspark --packages JohnSnowLabs:spark-nlp:1.6.0</code></pre>
+pyspark --packages JohnSnowLabs:spark-nlp:1.6.1</code></pre>
                                 <p><b>Python without explicit Spark Installationk</b>? Use pip to install (after you pip installed pyspark)</p>
-                                <pre><code class="language-javascript">pip install --index-url https://test.pypi.org/simple/ spark-nlp==1.6.0</code></pre>
+                                <pre><code class="language-javascript">pip install --index-url https://test.pypi.org/simple/ spark-nlp==1.6.1</code></pre>
                                 <p>In this way, you will have to start SparkSession in your python program manually, this is an example</p>
                                 <pre><code class="python">spark = SparkSession.builder \
     .appName("ner")\
     .master("local[*]")\
     .config("spark.driver.memory","4G")\
     .config("spark.driver.maxResultSize", "2G") \
-    .config("spark.driver.extraClassPath", "lib/spark-nlp-assembly-1.6.0.jar")\
+    .config("spark.driver.extraClassPath", "lib/spark-nlp-assembly-1.6.1.jar")\
     .config("spark.kryoserializer.buffer.max", "500m")\
     .getOrCreate()</code></pre>
                                 <p>
                                     Pre-compiled Spark-NLP assembly fat-jar for using in standalone projects, may be downloaded
-                                    <a href="https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-assembly-1.6.0.jar">here</a>
+                                    <a href="https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-assembly-1.6.1.jar">here</a>
                                     Non-fat-jar may be downloaded
-                                    <a href="http://repo1.maven.org/maven2/com/johnsnowlabs/nlp/spark-nlp_2.11/1.6.0/spark-nlp_2.11-1.6.0.jar">here</a>
+                                    <a href="http://repo1.maven.org/maven2/com/johnsnowlabs/nlp/spark-nlp_2.11/1.6.1/spark-nlp_2.11-1.6.1.jar">here</a>
                                     then, run spark-shell or spark-submit with appropriate <b>--jars
-                                    /path/to/spark-nlp_2.11-1.6.0.jar</b> to use the library in spark.
+                                    /path/to/spark-nlp_2.11-1.6.1.jar</b> to use the library in spark.
                                 </p>
                                 <p>
                                     For further alternatives and documentation check out our README page in <a href="https://github.com/JohnSnowLabs/spark-nlp">GitHub</a>.
@@ -419,7 +419,7 @@ <h2 class="section-title">Utilizing Spark-NLP OCR PDF Converter</h2>
                                 <h3 class="block-title">Installing Spark-NLP OCRHelper</h3>
                                 <p>
                                     First, either build from source or download the following standalone jar module (works both from Spark-NLP python and scala):
-                                    <a href="https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-ocr-assembly-1.6.0.jar">Spark-NLP-OCR</a>
+                                    <a href="https://s3.amazonaws.com/auxdata.johnsnowlabs.com/spark-nlp-ocr-assembly-1.6.1.jar">Spark-NLP-OCR</a>
                                     And add it to your Spark environment (with --jars or spark.driver.extraClassPath and spark.executor.extraClassPath configuration)
                                     Second, if your PDFs don't have a text layer (this depends on how PDFs were created), the library will use Tesseract 4.0 on background.
                                     Tesseract will utilize native libraries, so you'll have to get them installed in your system.
diff --git a/python/example/model-downloader/ModelDownloaderExample.ipynb b/python/example/model-downloader/ModelDownloaderExample.ipynb
index 78159e5bf90d17..6d85e8974eb9da 100644
--- a/python/example/model-downloader/ModelDownloaderExample.ipynb
+++ b/python/example/model-downloader/ModelDownloaderExample.ipynb
@@ -209,6 +209,24 @@
     "ner_tagged.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lets try a sentiment analysis pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sparknlp.pretrained.pipeline.en import SentimentPipeline\n",
+    "\n",
+    "SentimentPipeline.annotate(\"This is a good movie!!!\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -233,7 +251,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,
diff --git a/python/example/model-downloader/assertion.ipynb b/python/example/model-downloader/assertion.ipynb
deleted file mode 100644
index 30cd32e8a27587..00000000000000
--- a/python/example/model-downloader/assertion.ipynb
+++ /dev/null
@@ -1,196 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Show how to use pretrained assertion status"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.path.append('../../')\n",
-    "\n",
-    "from pyspark.sql import SparkSession\n",
-    "from pyspark.ml import PipelineModel\n",
-    "\n",
-    "from sparknlp.annotator import *\n",
-    "from sparknlp.common import *\n",
-    "from sparknlp.base import *\n",
-    "from sparknlp.pretrained import ResourceDownloader\n",
-    "\n",
-    "from pathlib import Path\n",
-    "\n",
-    "if sys.version_info[0] < 3:\n",
-    "    from urllib import urlretrieve\n",
-    "else:\n",
-    "    from urllib.request import urlretrieve"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "spark = SparkSession.builder \\\n",
-    "    .appName(\"assertion-status\")\\\n",
-    "    .master(\"local[1]\")\\\n",
-    "    .config(\"spark.driver.memory\",\"4G\")\\\n",
-    "    .config(\"spark.driver.maxResultSize\", \"2G\") \\\n",
-    "    .config(\"spark.driver.extraClassPath\", \"lib/sparknlp.jar\")\\\n",
-    "    .getOrCreate()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create some data for testing purposes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pyspark.sql import Row\n",
-    "R = Row('sentence', 'start', 'end')\n",
-    "test_data = spark.createDataFrame([R('Sister with stomach cancer .',2,3),\n",
-    "                      R('A thallium stress test showed tachycardia and severe dyspnea',5,5),\n",
-    "                      R('Positive for shortness of breath, no cough',2,4),\n",
-    "                      R('Positive for shortness of breath, no cough',7,7)])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create some pipelines, one for each type of assertion classification algorithm, model download can take some time."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import time\n",
-    "\n",
-    "# instantiate the downloader\n",
-    "downloader = ResourceDownloader()\n",
-    "\n",
-    "documentAssembler = DocumentAssembler() \\\n",
-    "    .setInputCol(\"sentence\") \\\n",
-    "    .setOutputCol(\"document\")\n",
-    "\n",
-    "# download bidirectional lstm based assertion status trained on negex dataset\n",
-    "assertion_fast_dl = downloader.downloadModel(AssertionDLModel, \"as_fast_dl\", \"en\") \\\n",
-    "    .setInputCols([\"document\"]) \\\n",
-    "    .setOutputCol(\"assertion\") \\\n",
-    "        \n",
-    "\n",
-    "finisher = Finisher() \\\n",
-    "    .setInputCols([\"assertion\"]) \\\n",
-    "    .setIncludeKeys(True)\n",
-    "\n",
-    "pipeline_fast_dl = PipelineModel(stages = [documentAssembler, assertion_fast_dl, finisher])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now let's use these pipelines and see the results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_fast_dl.transform(test_data).show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# download logistic regression based assertion status trained on negex dataset\n",
-    "assertion_fast_lg = downloader.downloadModel(AssertionLogRegModel, \"as_fast_lg\", \"en\") \\\n",
-    "    .setInputCols([\"document\"]) \\\n",
-    "    .setOutputCol(\"assertion\") \\\n",
-    "\n",
-    "pipeline_fast_lg = PipelineModel(stages = [documentAssembler, assertion_fast_lg, finisher])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_fast_lg.transform(test_data).show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# download bidirectional lstm based assertion status trained on i2b2 dataset\n",
-    "assertion_full_dl = downloader.downloadModel(AssertionDLModel, \"as_fast_dl\", \"en\") \\\n",
-    "    .setInputCols([\"document\"])\\\n",
-    "    .setOutputCol(\"assertion\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_full_dl = PipelineModel(stages = [documentAssembler, assertion_full_dl, finisher])\n",
-    "pipeline_full_dl.transform(test_data).show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/python/example/model-downloader/dl-ner.ipynb b/python/example/model-downloader/dl-ner.ipynb
index 2289fa9035d364..26edd2a9ec3ef4 100644
--- a/python/example/model-downloader/dl-ner.ipynb
+++ b/python/example/model-downloader/dl-ner.ipynb
@@ -139,7 +139,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,
diff --git a/python/setup.py b/python/setup.py
index ced0f1fc3241b8..3ba288838de207 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -40,7 +40,7 @@
     # For a discussion on single-sourcing the version across setup.py and the
     # project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='1.6.0',  # Required
+    version='1.6.1',  # Required
 
     # This is a one-line description or tagline of what your project does. This
     # corresponds to the "Summary" metadata field:
diff --git a/src/main/scala/com/johnsnowlabs/util/Build.scala b/src/main/scala/com/johnsnowlabs/util/Build.scala
index 508ca9bd556cd6..9acdbdefd5829d 100644
--- a/src/main/scala/com/johnsnowlabs/util/Build.scala
+++ b/src/main/scala/com/johnsnowlabs/util/Build.scala
@@ -11,6 +11,6 @@ object Build {
     if (version != null && version.nonEmpty)
       version
     else
-      "1.6.0"
+      "1.6.1"
   }
 }
\ No newline at end of file

From 4f6bd68cd265bcc08491c4eee2a16eed6e3ceeea Mon Sep 17 00:00:00 2001
From: Saif Addin <saif@johnsnowlabs.com>
Date: Thu, 9 Aug 2018 20:04:20 -0300
Subject: [PATCH 2/2] Updated Changelog and added deleted pretrained models

---
 CHANGELOG                                              |  8 ++++++++
 python/sparknlp/annotator.py                           | 10 ++++++++++
 src/main/scala/com/johnsnowlabs/nlp/annotator.scala    |  7 ++++---
 .../nlp/annotators/assertion/dl/AssertionDLModel.scala |  8 +++++++-
 .../assertion/logreg/AssertionLogRegModel.scala        |  9 ++++++++-
 src/main/scala/com/johnsnowlabs/nlp/base.scala         |  3 +++
 6 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index a915a790d72627..ed3101a70806c6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,14 @@
 ========
 1.6.1
 ========
+---------------
+Overview
+---------------
+Hi! We're glad to announce new hotfix 1.6.1. Although changes seem modest or very specific, there is a lot going underground. First of all, we've worked hard with the community to understand S3-based clusters,
+which don't have a common fs.defaultFS configuration, which is the one we use to tell where is the cluster temp folder located in order to distribute word embeddings. We fixed two things here,
+on one side we fixed a bug pointing to the wrong filesystem. Second, we added a custom override setting in application.conf that allows manually setting where to put temp folders in cluster. This should help S3 users.
+Please share your feedback on this regard.
+On the other hand, we created a new annotator type internally. The CHUNK type allows better modulary in the communication between different annotators. Impact will be noticed implicitly and over time.
 
 ---------------
 New features
diff --git a/python/sparknlp/annotator.py b/python/sparknlp/annotator.py
index 386b6f5e796c91..bb7032f4b4e7eb 100755
--- a/python/sparknlp/annotator.py
+++ b/python/sparknlp/annotator.py
@@ -1059,6 +1059,11 @@ def __init__(self, java_model=None):
         else:
             super(AssertionLogRegModel, self).__init__(classname="com.johnsnowlabs.nlp.annotators.assertion.logreg.AssertionLogRegModel")
 
+    @staticmethod
+    def pretrained(name="as_fast_lg", language="en"):
+        from sparknlp.pretrained import ResourceDownloader
+        return ResourceDownloader.downloadModel(AssertionLogRegModel, name, language)
+
 
 class NerDLApproach(AnnotatorApproach, ApproachWithEmbeddings, NerApproach):
 
@@ -1190,3 +1195,8 @@ def __init__(self, java_model=None):
             super(JavaModel, self).__init__(java_model)
         else:
             super(AssertionDLModel, self).__init__(classname="com.johnsnowlabs.nlp.annotators.assertion.dl.AssertionDLModel")
+
+    @staticmethod
+    def pretrained(name="as_fast_dl", language="en"):
+        from sparknlp.pretrained import ResourceDownloader
+        return ResourceDownloader.downloadModel(AssertionDLModel, name, language)
diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotator.scala b/src/main/scala/com/johnsnowlabs/nlp/annotator.scala
index dc8b3754aa6d83..b1d36168734b74 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/annotator.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/annotator.scala
@@ -1,7 +1,8 @@
 package com.johnsnowlabs.nlp
 
 import com.johnsnowlabs.nlp.annotators.PretrainedLemmatizer
-import com.johnsnowlabs.nlp.annotators.assertion.dl.ReadsAssertionGraph
+import com.johnsnowlabs.nlp.annotators.assertion.dl.{PretrainedDLAssertionStatus, ReadsAssertionGraph}
+import com.johnsnowlabs.nlp.annotators.assertion.logreg.PretrainedAssertionLogRegModel
 import com.johnsnowlabs.nlp.annotators.ner.crf.PretrainedNerCrf
 import com.johnsnowlabs.nlp.annotators.ner.dl.{PretrainedNerDL, ReadsNERGraph, WithGraphResolver}
 import com.johnsnowlabs.nlp.annotators.pos.perceptron.PretrainedPerceptronModel
@@ -47,7 +48,7 @@ object annotator {
   type AssertionLogRegApproach = com.johnsnowlabs.nlp.annotators.assertion.logreg.AssertionLogRegApproach
   object AssertionLogRegApproach extends DefaultParamsReadable[AssertionLogRegApproach]
   type AssertionLogRegModel = com.johnsnowlabs.nlp.annotators.assertion.logreg.AssertionLogRegModel
-  object AssertionLogRegModel extends EmbeddingsReadable[AssertionLogRegModel]
+  object AssertionLogRegModel extends EmbeddingsReadable[AssertionLogRegModel] with PretrainedAssertionLogRegModel
 
   type NerCrfApproach = com.johnsnowlabs.nlp.annotators.ner.crf.NerCrfApproach
   object NerCrfApproach extends DefaultParamsReadable[NerCrfApproach]
@@ -95,6 +96,6 @@ object annotator {
   type AssertionDLApproach = com.johnsnowlabs.nlp.annotators.assertion.dl.AssertionDLApproach
   object AssertionDLApproach extends DefaultParamsReadable[AssertionDLApproach]
   type AssertionDLModel = com.johnsnowlabs.nlp.annotators.assertion.dl.AssertionDLModel
-  object AssertionDLModel extends EmbeddingsReadable[AssertionDLModel] with ReadsAssertionGraph
+  object AssertionDLModel extends EmbeddingsReadable[AssertionDLModel] with ReadsAssertionGraph with PretrainedDLAssertionStatus
 
 }
diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/dl/AssertionDLModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/dl/AssertionDLModel.scala
index b8a946d22b8662..ecd24cc62df7a8 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/dl/AssertionDLModel.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/dl/AssertionDLModel.scala
@@ -6,6 +6,7 @@ import com.johnsnowlabs.nlp.annotators.ner.Verbose
 import com.johnsnowlabs.nlp.serialization.StructFeature
 import com.johnsnowlabs.nlp._
 import com.johnsnowlabs.nlp.embeddings.EmbeddingsReadable
+import com.johnsnowlabs.nlp.pretrained.ResourceDownloader
 import org.apache.spark.ml.param.{IntParam, ParamMap}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql._
@@ -117,4 +118,9 @@ trait ReadsAssertionGraph extends ParamsAndFeaturesReadable[AssertionDLModel] wi
   addReader(readAssertionGraph)
 }
 
-object AssertionDLModel extends EmbeddingsReadable[AssertionDLModel] with ReadsAssertionGraph
\ No newline at end of file
+trait PretrainedDLAssertionStatus {
+  def pretrained(name: String = "as_fast_dl", language: Option[String] = Some("en"), folder: String = ResourceDownloader.publicLoc): AssertionDLModel =
+    ResourceDownloader.downloadModel(AssertionDLModel, name, language, folder)
+}
+
+object AssertionDLModel extends EmbeddingsReadable[AssertionDLModel] with ReadsAssertionGraph with PretrainedDLAssertionStatus
\ No newline at end of file
diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/logreg/AssertionLogRegModel.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/logreg/AssertionLogRegModel.scala
index f846448bbe3244..ead3246aae021f 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/logreg/AssertionLogRegModel.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/assertion/logreg/AssertionLogRegModel.scala
@@ -3,6 +3,7 @@ package com.johnsnowlabs.nlp.annotators.assertion.logreg
 import com.johnsnowlabs.nlp.AnnotatorType._
 import com.johnsnowlabs.nlp._
 import com.johnsnowlabs.nlp.embeddings.{EmbeddingsReadable, WordEmbeddings}
+import com.johnsnowlabs.nlp.pretrained.ResourceDownloader
 import com.johnsnowlabs.nlp.serialization.{MapFeature, StructFeature}
 import org.apache.spark.ml.classification.LogisticRegressionModel
 import org.apache.spark.ml.util.Identifiable
@@ -97,4 +98,10 @@ class AssertionLogRegModel(override val uid: String) extends RawAnnotator[Assert
   override def copy(extra: ParamMap): AssertionLogRegModel = defaultCopy(extra)
 }
 
-object AssertionLogRegModel extends EmbeddingsReadable[AssertionLogRegModel]
\ No newline at end of file
+trait PretrainedAssertionLogRegModel {
+  def pretrained(name: String = "as_fast_lg", language: Option[String] = Some("en"), remoteLoc: String = ResourceDownloader.publicLoc): AssertionLogRegModel =
+    ResourceDownloader.downloadModel(AssertionLogRegModel, name, language, remoteLoc)
+}
+
+
+object AssertionLogRegModel extends EmbeddingsReadable[AssertionLogRegModel] with PretrainedAssertionLogRegModel
diff --git a/src/main/scala/com/johnsnowlabs/nlp/base.scala b/src/main/scala/com/johnsnowlabs/nlp/base.scala
index dd32b0eb3c2ed1..ad13ffa69afbb1 100644
--- a/src/main/scala/com/johnsnowlabs/nlp/base.scala
+++ b/src/main/scala/com/johnsnowlabs/nlp/base.scala
@@ -8,6 +8,9 @@ object base {
   type DocumentAssembler = com.johnsnowlabs.nlp.DocumentAssembler
   object DocumentAssembler extends DefaultParamsReadable[DocumentAssembler]
 
+  type ChunkAssembler = com.johnsnowlabs.nlp.ChunkAssembler
+  object ChunkAssembler extends DefaultParamsReadable[ChunkAssembler]
+
   type TokenAssembler = com.johnsnowlabs.nlp.TokenAssembler
   object TokenAssembler extends DefaultParamsReadable[TokenAssembler]