From 718b01fe78659d744954c2d4318d4fd2d4b51205 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 11 Jan 2025 00:41:17 -0600 Subject: [PATCH] remove Python package, drop Elasticsearch 1.0.3 support, get CI working again (#224) --- .ci/install.sh | 4 - .ci/lint_r_code.R | 36 ++-- .ci/report_to_covr.sh | 4 - .ci/test.sh | 7 - .github/CODEOWNERS | 3 - .github/workflows/ci.yml | 51 +----- .gitignore | 4 - CONTRIBUTING.md | 9 +- Makefile | 24 +-- README.md | 85 ++++----- py-pkg/Makefile | 20 --- py-pkg/docs/Makefile | 20 --- py-pkg/docs/conf.py | 186 -------------------- py-pkg/docs/index.rst | 21 --- py-pkg/docs/make.bat | 36 ---- py-pkg/docs/uptasticsearch.aggs_parsers.rst | 7 - py-pkg/docs/uptasticsearch.clients.rst | 7 - py-pkg/docs/uptasticsearch.fetch_all.rst | 7 - py-pkg/docs/uptasticsearch.rst | 20 --- py-pkg/docs/uptasticsearch.util.rst | 7 - py-pkg/setup.py | 47 ----- py-pkg/tests/test_clients.py | 33 ---- py-pkg/tests/test_util.py | 97 ---------- py-pkg/uptasticsearch/__init__.py | 1 - py-pkg/uptasticsearch/aggs_parsers.py | 171 ------------------ py-pkg/uptasticsearch/clients.py | 151 ---------------- py-pkg/uptasticsearch/fetch_all.py | 48 ----- py-pkg/uptasticsearch/util.py | 104 ----------- 28 files changed, 65 insertions(+), 1145 deletions(-) delete mode 100644 py-pkg/Makefile delete mode 100644 py-pkg/docs/Makefile delete mode 100644 py-pkg/docs/conf.py delete mode 100644 py-pkg/docs/index.rst delete mode 100644 py-pkg/docs/make.bat delete mode 100644 py-pkg/docs/uptasticsearch.aggs_parsers.rst delete mode 100644 py-pkg/docs/uptasticsearch.clients.rst delete mode 100644 py-pkg/docs/uptasticsearch.fetch_all.rst delete mode 100644 py-pkg/docs/uptasticsearch.rst delete mode 100644 py-pkg/docs/uptasticsearch.util.rst delete mode 100644 py-pkg/setup.py delete mode 100644 py-pkg/tests/test_clients.py delete mode 100644 py-pkg/tests/test_util.py delete mode 100644 py-pkg/uptasticsearch/__init__.py delete mode 100644 py-pkg/uptasticsearch/aggs_parsers.py delete mode 100644 py-pkg/uptasticsearch/clients.py delete mode 100644 py-pkg/uptasticsearch/fetch_all.py delete mode 100644 py-pkg/uptasticsearch/util.py diff --git a/.ci/install.sh b/.ci/install.sh index 422ba3a..1b0c32e 100755 --- a/.ci/install.sh +++ b/.ci/install.sh @@ -8,7 +8,3 @@ if [[ "$TASK" == "rpkg" ]]; then --clean \ $(pwd)/r-pkg fi - -if [[ "$TASK" == "pypkg" ]]; then - pip install $(pwd)/py-pkg -fi diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R index b6f8f81..2c42670 100755 --- a/.ci/lint_r_code.R +++ b/.ci/lint_r_code.R @@ -29,24 +29,24 @@ interactive_text <- paste0( ) LINTERS_TO_USE <- list( - "absolute_path" = lintr::absolute_path_linter - , "assignment" = lintr::assignment_linter - , "closed_curly" = lintr::closed_curly_linter - , "commas" = lintr::commas_linter - , "equals_na" = lintr::equals_na_linter - , "function_left" = lintr::function_left_parentheses_linter - , "infix_spaces" = lintr::infix_spaces_linter - , "no_tabs" = lintr::no_tab_linter - , "non_portable_path" = lintr::nonportable_path_linter - , "open_curly" = lintr::open_curly_linter - , "semicolon" = lintr::semicolon_terminator_linter - , "seq" = lintr::seq_linter - , "spaces_inside" = lintr::spaces_inside_linter - , "spaces_left_parens" = lintr::spaces_left_parentheses_linter + # "absolute_path" = lintr::absolute_path_linter() + "assignment" = lintr::assignment_linter() + # , "closed_curly" = lintr::closed_curly_linter() + , "commas" = lintr::commas_linter() + , "equals_na" = lintr::equals_na_linter() + , "function_left" = lintr::function_left_parentheses_linter() + , "infix_spaces" = lintr::infix_spaces_linter() + , "no_tabs" = lintr::no_tab_linter() + # , "non_portable_path" = lintr::nonportable_path_linter() + # , "open_curly" = lintr::open_curly_linter() + , "semicolon" = lintr::semicolon_terminator_linter() + , "seq" = lintr::seq_linter() + , "spaces_inside" = lintr::spaces_inside_linter() + , "spaces_left_parens" = lintr::spaces_left_parentheses_linter() , "todo_comments" = lintr::todo_comment_linter(c("todo", "fixme", "to-do")) - , "trailing_blank" = lintr::trailing_blank_lines_linter - , "trailing_white" = lintr::trailing_whitespace_linter - , "true_false" = lintr::T_and_F_symbol_linter + , "trailing_blank" = lintr::trailing_blank_lines_linter() + , "trailing_white" = lintr::trailing_whitespace_linter() + , "true_false" = lintr::T_and_F_symbol_linter() , "undesirable_function" = lintr::undesirable_function_linter( fun = c( "cbind" = paste0( @@ -72,7 +72,7 @@ LINTERS_TO_USE <- list( , "??" = interactive_text ) ) - , "unneeded_concatenation" = lintr::unneeded_concatenation_linter + , "unneeded_concatenation" = lintr::unneeded_concatenation_linter() ) cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT))) diff --git a/.ci/report_to_covr.sh b/.ci/report_to_covr.sh index 3f0b994..f5138f6 100755 --- a/.ci/report_to_covr.sh +++ b/.ci/report_to_covr.sh @@ -9,7 +9,3 @@ if [[ "$TASK" == "rpkg" ]]; then covr::codecov('r-pkg/') \ " fi - -if [[ "$TASK" == "pypkg" ]]; then - echo "This is a Python build. No post-build actions configured." -fi diff --git a/.ci/test.sh b/.ci/test.sh index 18f8e81..fbfe8f7 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -11,10 +11,3 @@ if [[ "$TASK" == "rpkg" ]]; then --as-cran \ *.tar.gz fi - -if [[ "$TASK" == "pypkg" ]]; then - pip install wheel - pytest \ - --verbose \ - $(pwd)/py-pkg -fi diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 89c9974..548e2af 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -12,9 +12,6 @@ # Default reviewers for all code * @jameslamb @austin3dickey -# Python package -py-pkg/* @ngparas @jameslamb @austin3dickey - # community files LICENSE @jameslamb @austin3dickey @bburns632 CONDUCT.md @jameslamb @austin3dickey @bburns632 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bbeac9..24d8ed4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,8 +20,6 @@ jobs: #---# # R # #---# - - task: rpkg - es_version: 1.0.3 - task: rpkg es_version: 1.7.6 - task: rpkg @@ -56,45 +54,6 @@ jobs: es_version: 7.11.2 - task: rpkg es_version: 7.12.1 - #--------# - # python # - #--------# - - task: pypkg - es_version: 1.0.3 - - task: pypkg - es_version: 1.7.6 - - task: pypkg - es_version: 2.4.6 - - task: pypkg - es_version: 5.6.16 - - task: pypkg - es_version: 6.0.1 - - task: pypkg - es_version: 6.8.15 - - task: pypkg - es_version: 7.1.1 - - task: pypkg - es_version: 7.2.1 - - task: pypkg - es_version: 7.3.2 - - task: pypkg - es_version: 7.4.2 - - task: pypkg - es_version: 7.5.2 - - task: pypkg - es_version: 7.6.2 - - task: pypkg - es_version: 7.7.1 - - task: pypkg - es_version: 7.8.1 - - task: pypkg - es_version: 7.9.3 - - task: pypkg - es_version: 7.10.2 - - task: pypkg - es_version: 7.11.2 - - task: pypkg - es_version: 7.12.1 steps: - name: checkout repository uses: actions/checkout@v2 @@ -102,16 +61,12 @@ jobs: fetch-depth: 1 - name: set up R if: matrix.task == 'rpkg' - uses: r-lib/actions/setup-r@v1 + uses: r-lib/actions/setup-r@v2 with: - r-version: '4.1.0' + r-version: release - name: set up pandoc if: matrix.task == 'rpkg' - uses: r-lib/actions/setup-pandoc@v1 - - name: set up python - uses: conda-incubator/setup-miniconda@v2 - with: - python-version: 3.7 + uses: r-lib/actions/setup-pandoc@v2 - name: run tests shell: bash run: | diff --git a/.gitignore b/.gitignore index 5996e46..e6eff93 100644 --- a/.gitignore +++ b/.gitignore @@ -51,10 +51,6 @@ r-pkg/inst/testdata/*.json **/dist/ **/htmlcov/ **/*.egg-info/ -py-pkg/LICENSE -py-pkg/NEWS.md -py-pkg/README.md -py-pkg/docs/_build # As long as we're storing the pkgdown site # at the repo root, should protect against diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 60df758..7da9a14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -75,7 +75,7 @@ Each build actually runs many sub-builds. Those sub-builds run once for each com * programming language * Elasticsearch version -As of this writing, this project has clients in two programming languages: [R](./r-pkg) and [Python](./py-pkg). +As of this writing, this project has clients in one programming language: [R](./r-pkg). The set of Elasticsearch versions this project tests against changes regularly as [new Elasticsearch versions are released](https://www.elastic.co/downloads/past-releases#elasticsearch). The strategy in this project is to test against the following Elasticsearch versions: @@ -87,9 +87,8 @@ The set of Elasticsearch versions this project tests against changes regularly a > `uptasticsearch` may be tested against specific additional intermediate versions if bugs are found in the interaction between `uptasticsearch` and those versions -So, for example, as of September 2020 that meant we tested against: +So, for example, as of January 2025 that meant we tested against: -* 1.0.3 * 1.7.6 * 2.4.6 * 5.6.16 @@ -109,7 +108,9 @@ So, for example, as of September 2020 that meant we tested against: * 7.11.2 * 7.12.1 -You may notice that this strategy means that `uptasticsearch` is tested for backwards compatibility with Elasticsearch versions which have already reached [End-of-Life](https://www.elastic.co/support/eol). For example, support for Elasticsearch 1.7.x officially ended in January 2017. We test these old versions because we know of users whose companies still run those versions, and for whom Elasticsearch upgrades are prohibitively expensive. In general, upgrades across major versions pre-6.x [require a full cluster restart](https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-upgrade.html). +You may notice that this strategy means that `uptasticsearch` is tested for backwards compatibility with Elasticsearch versions which have already reached [End-of-Life](https://www.elastic.co/support/eol). For example, support for Elasticsearch 1.7.x officially ended in January 2017. +We test these old versions because we know of users whose companies still run those versions, and for whom Elasticsearch upgrades are prohibitively expensive. +In general, upgrades across major versions pre-6.x [require a full cluster restart](https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-upgrade.html). ### Running Tests Locally diff --git a/Makefile b/Makefile index 75ad0f2..b0ef3d8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build_r coverage_r docs_r install_r test_r build_py docs_py install_py test_py gh_pages +.PHONY: build_r coverage_r docs_r install_r test_r gh_pages ##### # R # @@ -26,28 +26,6 @@ install_r: build_r test_r: build_r R CMD CHECK --as-cran uptasticsearch_*.tar.gz -########## -# Python # -########## - -build_py: - cp LICENSE py-pkg/ - cp NEWS.md py-pkg/ - cp README.md py-pkg/ - -docs_py: - # Create sphinx rst files for every package and subpackage - cd py-pkg && \ - sphinx-apidoc -f -F -e -o docs uptasticsearch && \ - cd docs && \ - make html - -install_py: - pip install py-pkg/ - -test_py: - pytest py-pkg/ - ########### # General # ########### diff --git a/README.md b/README.md index 5bd4740..dc09325 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,13 @@ ## Introduction -`uptasticsearch` tackles the issue of getting data out of Elasticsearch and into a tabular format in R and Python. It should work for all versions of Elasticsearch from 1.0.0 onwards, but [is not regularly tested against all of them](https://github.com/uptake/uptasticsearch/blob/master/CONTRIBUTING.md#gha). If you run into a problem, please [open an issue](https://github.com/uptake/uptasticsearch/issues). +`uptasticsearch` tackles the issue of getting data out of Elasticsearch and into a tabular format in R. It should work for all versions of Elasticsearch from 1.0.0 onwards, but [is not regularly tested against all of them](https://github.com/uptake/uptasticsearch/blob/master/CONTRIBUTING.md#gha). If you run into a problem, please [open an issue](https://github.com/uptake/uptasticsearch/issues). # Table of contents * [How it Works](#howitworks) * [Installation](#installation) * [R](#rinstallation) - * [Python](#pythoninstallation) * [Usage Examples](#examples) * [Get a Batch of Documents](#example1) * [Aggregation Results](#example2) @@ -53,17 +52,6 @@ remotes::install_github( ) ``` -### Python - -![Lifecycle Dormant](https://img.shields.io/badge/lifecycle-dormant-orange.svg) - -This package is not currently available on PyPi. To build the development version from source, clone this repo, then : - -```shell -cd py-pkg -pip install . -``` - ## Usage Examples The examples presented here pertain to a fictional Elasticsearch index holding some information on a movie theater business. @@ -170,37 +158,40 @@ revenueDT <- es_search( ) ``` -In the example above, we used the [date_histogram](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html) and [extended_stats](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-extendedstats-aggregation.html) aggregations. `es_search()` has built-in support for many other aggregations and combinations of aggregations, with more on the way. Please see the table below for the current status of the package. Note that names of the form "agg1 - agg2" refer to the ability to handled aggregations nested inside other aggregations. - -|Agg type | R support? | Python support? | -|:--------------------------------------------|:-----------:|:----------------:| -|["cardinality"](http://bit.ly/2sn5Qiw) |YES |NO | -|["date_histogram"](http://bit.ly/2qIR97Z) |YES |NO | -|date_histogram - cardinality |YES |NO | -|date_histogram - extended_stats |YES |NO | -|date_histogram - histogram |YES |NO | -|date_histogram - percentiles |YES |NO | -|date_histogram - significant_terms |YES |NO | -|date_histogram - stats |YES |NO | -|date_histogram - terms |YES |NO | -|["extended_stats"](http://bit.ly/2qKqsDU) |YES |NO | -|["histogram"](http://bit.ly/2sn4LXF) |YES |NO | -|["percentiles"](http://bit.ly/2sy4z7f) |YES |NO | -|["significant terms"](http://bit.ly/1KnhT1r) |YES |NO | -|["stats"](http://bit.ly/2sn1t74) |YES |NO | -|["terms"](http://bit.ly/2mJyQ0C) |YES |NO | -|terms - cardinality |YES |NO | -|terms - date_histogram |YES |NO | -|terms - date_histogram - cardinality |YES |NO | -|terms - date_histogram - extended_stats |YES |NO | -|terms - date_histogram - histogram |YES |NO | -|terms - date_histogram - percentiles |YES |NO | -|terms - date_histogram - significant_terms |YES |NO | -|terms - date_histogram - stats |YES |NO | -|terms - date_histogram - terms |YES |NO | -|terms - extended_stats |YES |NO | -|terms - histogram |YES |NO | -|terms - percentiles |YES |NO | -|terms - significant_terms |YES |NO | -|terms - stats |YES |NO | -|terms - terms |YES |NO | +In the example above, we used the [date_histogram](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html) and [extended_stats](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-extendedstats-aggregation.html) aggregations. +`es_search()` has built-in support for many other aggregations and combinations of aggregations, with more on the way. +Please see the table below for the current status of the package. +Note that names of the form "agg1 - agg2" refer to the ability to handled aggregations nested inside other aggregations. + +|Agg type | R support? | +|:--------------------------------------------|:-----------:| +|["cardinality"](http://bit.ly/2sn5Qiw) |YES | +|["date_histogram"](http://bit.ly/2qIR97Z) |YES | +|date_histogram - cardinality |YES | +|date_histogram - extended_stats |YES | +|date_histogram - histogram |YES | +|date_histogram - percentiles |YES | +|date_histogram - significant_terms |YES | +|date_histogram - stats |YES | +|date_histogram - terms |YES | +|["extended_stats"](http://bit.ly/2qKqsDU) |YES | +|["histogram"](http://bit.ly/2sn4LXF) |YES | +|["percentiles"](http://bit.ly/2sy4z7f) |YES | +|["significant terms"](http://bit.ly/1KnhT1r) |YES | +|["stats"](http://bit.ly/2sn1t74) |YES | +|["terms"](http://bit.ly/2mJyQ0C) |YES | +|terms - cardinality |YES | +|terms - date_histogram |YES | +|terms - date_histogram - cardinality |YES | +|terms - date_histogram - extended_stats |YES | +|terms - date_histogram - histogram |YES | +|terms - date_histogram - percentiles |YES | +|terms - date_histogram - significant_terms |YES | +|terms - date_histogram - stats |YES | +|terms - date_histogram - terms |YES | +|terms - extended_stats |YES | +|terms - histogram |YES | +|terms - percentiles |YES | +|terms - significant_terms |YES | +|terms - stats |YES | +|terms - terms |YES | diff --git a/py-pkg/Makefile b/py-pkg/Makefile deleted file mode 100644 index d261506..0000000 --- a/py-pkg/Makefile +++ /dev/null @@ -1,20 +0,0 @@ - -install: - # Install uptasticsearch Python package - cd py-pkg && \ - python setup.py install && \ - cd .. - -docs: - # Create sphinx rst files for every package and subpackage - cd py-pkg && \ - sphinx-apidoc -f -e -o docs uptasticsearch && \ - cd docs && \ - make html - -integration-tests: - test_success=0 ;\ - docker-compose build --no-cache ;\ - docker-compose run uptasticsearch bash -c "sleep 15; cd dummy_data; sh load_dummy_data.sh; cd ..; pytest" || test_success=1 ;\ - docker-compose down ;\ - exit $$test_success diff --git a/py-pkg/docs/Makefile b/py-pkg/docs/Makefile deleted file mode 100644 index 7b62d95..0000000 --- a/py-pkg/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SPHINXPROJ = uptasticsearch -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/py-pkg/docs/conf.py b/py-pkg/docs/conf.py deleted file mode 100644 index 2b8aef9..0000000 --- a/py-pkg/docs/conf.py +++ /dev/null @@ -1,186 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -sys.path.insert(0, os.path.abspath('..')) - -# -- Project information ----------------------------------------------------- - -project = 'uptasticsearch' -copyright = '2018, Author' -author = 'Author' - -# The short X.Y version -version = '0.1.0' -# The full version, including alpha/beta/rc tags -release = '0.1.0' - - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx.ext.todo', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = 'en' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path . -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'alabaster' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'uptasticsearchdoc' - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'uptasticsearch.tex', 'uptasticsearch Documentation', - 'Author', 'manual'), -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'uptasticsearch', 'uptasticsearch Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'uptasticsearch', 'uptasticsearch Documentation', - author, 'uptasticsearch', 'One line description of project.', - 'Miscellaneous'), -] - - -# -- Options for Epub output ------------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project -epub_author = author -epub_publisher = author -epub_copyright = copyright - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - - -# -- Extension configuration ------------------------------------------------- - -# -- Options for todo extension ---------------------------------------------- - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True diff --git a/py-pkg/docs/index.rst b/py-pkg/docs/index.rst deleted file mode 100644 index c06a245..0000000 --- a/py-pkg/docs/index.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. uptasticsearch documentation master file, created by - sphinx-quickstart on Sat Oct 27 11:51:13 2018. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to uptasticsearch's documentation! -========================================== - -.. toctree:: - :maxdepth: 4 - :caption: Contents: - - uptasticsearch - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/py-pkg/docs/make.bat b/py-pkg/docs/make.bat deleted file mode 100644 index da0c9ef..0000000 --- a/py-pkg/docs/make.bat +++ /dev/null @@ -1,36 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build -set SPHINXPROJ=uptasticsearch - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/py-pkg/docs/uptasticsearch.aggs_parsers.rst b/py-pkg/docs/uptasticsearch.aggs_parsers.rst deleted file mode 100644 index 6c06c73..0000000 --- a/py-pkg/docs/uptasticsearch.aggs_parsers.rst +++ /dev/null @@ -1,7 +0,0 @@ -uptasticsearch.aggs\_parsers module -=================================== - -.. automodule:: uptasticsearch.aggs_parsers - :members: - :undoc-members: - :show-inheritance: diff --git a/py-pkg/docs/uptasticsearch.clients.rst b/py-pkg/docs/uptasticsearch.clients.rst deleted file mode 100644 index 12f3bd9..0000000 --- a/py-pkg/docs/uptasticsearch.clients.rst +++ /dev/null @@ -1,7 +0,0 @@ -uptasticsearch.clients module -============================= - -.. automodule:: uptasticsearch.clients - :members: - :undoc-members: - :show-inheritance: diff --git a/py-pkg/docs/uptasticsearch.fetch_all.rst b/py-pkg/docs/uptasticsearch.fetch_all.rst deleted file mode 100644 index e34fd1f..0000000 --- a/py-pkg/docs/uptasticsearch.fetch_all.rst +++ /dev/null @@ -1,7 +0,0 @@ -uptasticsearch.fetch\_all module -================================ - -.. automodule:: uptasticsearch.fetch_all - :members: - :undoc-members: - :show-inheritance: diff --git a/py-pkg/docs/uptasticsearch.rst b/py-pkg/docs/uptasticsearch.rst deleted file mode 100644 index 87f263f..0000000 --- a/py-pkg/docs/uptasticsearch.rst +++ /dev/null @@ -1,20 +0,0 @@ -uptasticsearch package -====================== - -Submodules ----------- - -.. toctree:: - - uptasticsearch.aggs_parsers - uptasticsearch.clients - uptasticsearch.fetch_all - uptasticsearch.util - -Module contents ---------------- - -.. automodule:: uptasticsearch - :members: - :undoc-members: - :show-inheritance: diff --git a/py-pkg/docs/uptasticsearch.util.rst b/py-pkg/docs/uptasticsearch.util.rst deleted file mode 100644 index 8921aad..0000000 --- a/py-pkg/docs/uptasticsearch.util.rst +++ /dev/null @@ -1,7 +0,0 @@ -uptasticsearch.util module -========================== - -.. automodule:: uptasticsearch.util - :members: - :undoc-members: - :show-inheritance: diff --git a/py-pkg/setup.py b/py-pkg/setup.py deleted file mode 100644 index d794a18..0000000 --- a/py-pkg/setup.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import json -import os -import re -from setuptools import find_packages -from setuptools import setup -import subprocess -import datetime - -CURRENT_VERSION = "0.2.0" - -######################### - -# Packages used -documentation_packages = [ - "sphinx", - "sphinx_rtd_theme", - "sphinxcontrib-napoleon", - "sphinxcontrib-programoutput" -] -regular_packages = [ - 'pandas', - 'requests', - 'tqdm' -] -testing_packages = [ - 'pytest', - 'mock' -] - -# This is where the magic happens -setup(name='uptasticsearch', - version=CURRENT_VERSION, - description="Get Data Frame Representations of 'Elasticsearch' Results", - author='Nick Paras', - author_email='nickgp@gmail.com', - url='https://github.com/uptake/uptasticsearch', - packages=find_packages(), - install_requires=regular_packages + documentation_packages + testing_packages, - include_package_data=True, - extras_require={ - 'doc': documentation_packages, - 'all': regular_packages + documentation_packages, - }, - zip_safe = False - ) diff --git a/py-pkg/tests/test_clients.py b/py-pkg/tests/test_clients.py deleted file mode 100644 index f40f437..0000000 --- a/py-pkg/tests/test_clients.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Integration Tests - test against each version of Elasticsearch -""" -import pytest -import json -import pandas as pd - -from uptasticsearch.clients import Uptasticsearch1 -from uptasticsearch.clients import Uptasticsearch2 -from uptasticsearch.clients import Uptasticsearch5 -from uptasticsearch.clients import Uptasticsearch6 -from uptasticsearch.clients import Uptasticsearch7 -from uptasticsearch.clients import uptasticsearch_factory - -from uptasticsearch.fetch_all import es_search - - -class TestEsSearch(object): - """ - es_search should work and return a pandas DataFrame - """ - host = "http://127.0.0.1:9200" - - def test_rectangle(self): - result = es_search( - self.host, - "shakespeare", - query_body=json.dumps({}), - size=10000, - max_hits=10, - scroll="1m" - ) - print(result) - assert isinstance(result, pd.DataFrame) diff --git a/py-pkg/tests/test_util.py b/py-pkg/tests/test_util.py deleted file mode 100644 index 2ee567d..0000000 --- a/py-pkg/tests/test_util.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Test the util module -""" - -import pytest - -from uptasticsearch.util import _format_es_url -from uptasticsearch.util import convert_to_sec - - -class TestFormatUrl(object): - """Tests for uptasticsearch.util._format_es_url - """ - def test_typing(self): - """throws TypeErrors w/ bad types, returns correct type - """ - with pytest.raises(TypeError): - _format_es_url(3) - with pytest.raises(TypeError): - _format_es_url(3.14) - with pytest.raises(TypeError): - _format_es_url(lambda x: 'cat') - with pytest.raises(TypeError): - _format_es_url([1, 2, 3]) - with pytest.raises(TypeError): - _format_es_url({"key": "value"}) - assert isinstance(_format_es_url("http://es.com:9200"), str) - - def test_normal(self): - """returns a valid url unchanged - """ - valid_url = "http://es.cluster.com:9200" - assert valid_url == _format_es_url(valid_url) - assert "http://localhost:9200" == _format_es_url("localhost:9200") - - def test_slash_removal(self): - """removes extra trailing slashes - """ - assert _format_es_url("http://es.cluster.com:9200/") == "http://es.cluster.com:9200" - assert _format_es_url("http://es.cluster.com:9200///") == "http://es.cluster.com:9200" - - def test_protocol(self): - """inserts protocol if missing/broken - """ - assert _format_es_url("es.cluster.com:9200/") == "http://es.cluster.com:9200" - assert _format_es_url("http:/es.cluster.com:9200/") == "http://es.cluster.com:9200" - - def test_port(self): - """raises ValueError w/o proper port - """ - with pytest.raises(ValueError): - _format_es_url("es.cluster.com/") - with pytest.raises(ValueError): - _format_es_url("es.cluster.com") - assert isinstance(_format_es_url("http://es.com:9200"), str) - assert isinstance(_format_es_url("es.es.com:9200"), str) - - def test_nonsense(self): - """test that it fails with nonsense - """ - with pytest.raises(ValueError): - _format_es_url("some garbage string") - with pytest.raises(ValueError): - _format_es_url("some garbage string that has http:// in it") - with pytest.raises(ValueError): - _format_es_url("some garbage string that has http:// in it:9200") - with pytest.raises(ValueError): - _format_es_url("s3:// some garbage string that has http:// in it:9200") - - -class TestConvertToSec(object): - """Test uptasticsearch.util.convert_to_sec - """ - def test_typing(self): - with pytest.raises(TypeError): - convert_to_sec(1) - with pytest.raises(TypeError): - convert_to_sec([1]) - with pytest.raises(TypeError): - convert_to_sec({"cat": "dog"}) - with pytest.raises(TypeError): - convert_to_sec(1.0) - assert convert_to_sec("1m") == 60 - - def test_garbage(self): - with pytest.raises(ValueError): - convert_to_sec("cactus") - with pytest.raises(ValueError): - convert_to_sec("1y") - - - def test_normal(self): - assert convert_to_sec("1s") == 1 - assert convert_to_sec("1m") == 60 - assert convert_to_sec("5m") == 300 - assert convert_to_sec("1h") == 3600 - assert convert_to_sec("1d") == 86400 - assert convert_to_sec("2w") == 1209600 diff --git a/py-pkg/uptasticsearch/__init__.py b/py-pkg/uptasticsearch/__init__.py deleted file mode 100644 index 4e16ac2..0000000 --- a/py-pkg/uptasticsearch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from uptasticsearch.fetch_all import es_search diff --git a/py-pkg/uptasticsearch/aggs_parsers.py b/py-pkg/uptasticsearch/aggs_parsers.py deleted file mode 100644 index 2011a92..0000000 --- a/py-pkg/uptasticsearch/aggs_parsers.py +++ /dev/null @@ -1,171 +0,0 @@ - -import pandas as pd - - -def _terms_agg_to_df(aggs_json): - """ - Given the JSON returned by an Elasticsearch aggs "terms" query, - parse that JSON into a Pandas DataFrame. Currently only has - support for a one-field aggs. - - The "terms" query is analogous to a COUNT() and GROUPBY in SQL world. - It returns counts of unique values for a given attribute. For more, see - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html - - Args: - **aggs_json (dict)**: A dictionary representation of an aggs query - result. If a str is passed, it will be converted to a dictionary. - - Returns: - A pandas DataFrame representation of the aggs query. - """ - - if not isinstance(aggs_json, str): - raise TypeError("aggs_json must be a dictionary, you gave {}".format(type(aggs_json))) - - # Parse the result into a DF - key_name = list(aggs_json.keys())[0] - - parsed_obs = [[obs['key'], obs['doc_count']] for obs in aggs_json[key_name]['buckets']] - out_df = pd.DataFrame(parsed_obs, columns=[key_name, 'doc_count']) - - return(out_df) - - -def _extended_stats_agg_to_df(aggs_json): - """ - Given the JSON returned by an Elasticsearch "extended_stats" aggregation, - parse that JSON into a Pandas DataFrame. Currently only has - support for a one-field aggs. - - The "extended_stats" aggregation computes the following summary statistics - on a given numerical field: count, min, max, mean, sum, sum of squares, - variance, and standard deviation. For more, see - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-extendedstats-aggregation.html - - Args: - **aggs_json (dict)**: A dictionary representation of an aggs query - result. If a str is passed, it will be converted to a dictionary. - - Returns: - A pandas DataFrame representation of the aggs query. - """ - - if not isinstance(aggs_json, str): - raise TypeError("aggs_json must be a dictionary, you gave {}".format(type(aggs_json))) - - # Parse the result into a DF - key_name = list(aggs_json.keys())[0] - - out_df = pd.DataFrame({'agg_field': key_name, - 'count': aggs_json[key_name]['count'], - 'min': aggs_json[key_name]['min'], - 'max': aggs_json[key_name]['max'], - 'avg': aggs_json[key_name]['avg'], - 'sum': aggs_json[key_name]['sum'], - 'sum_of_squares': aggs_json[key_name]['sum_of_squares'], - 'variance': aggs_json[key_name]['variance'], - 'std_deviation': aggs_json[key_name]['std_deviation'] - }, index=[0]) - - return(out_df) - - -def _stats_agg_to_df(aggs_json): - """ - Given the JSON returned by an Elasticsearch "stats" aggregation, - parse that JSON into a Pandas DataFrame. Currently only has - support for a one-field aggs. - - The "stats" aggregation computes the following summary statistics - on a given numerical field: count, min, max, mean, sum. For more, see - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html - - Args: - **aggs_json (dict)**: A dictionary representation of an aggs query - result. If a str is passed, it will be converted to a dictionary. - - Returns: - A pandas DataFrame representation of the aggs query. - """ - - if not isinstance(aggs_json, str): - raise TypeError("aggs_json must be a dictionary, you gave {}".format(type(aggs_json))) - - # Parse the result into a DF - key_name = list(aggs_json.keys())[0] - - out_df = pd.DataFrame({'agg_field': key_name, - 'count': aggs_json[key_name]['count'], - 'min': aggs_json[key_name]['min'], - 'max': aggs_json[key_name]['max'], - 'avg': aggs_json[key_name]['avg'], - 'sum': aggs_json[key_name]['sum'] - }, index=[0]) - - return(out_df) - - -def _date_histogram_agg_to_df(aggs_json): - """ - Given the JSON returned by an Elasticsearch aggs "date_histogram" query, - parse that JSON into a Pandas DataFrame. Currently only has - support for a one-field aggs. - - The "date_histogram" aggregation is used to bucket records into discrete, - equal-sized time windows. The plain-vanilla date_histogram aggregation - returns counts of documents within each window, but the most common use - case involves nested other aggregations within a date_histogram to - create time-series features. For more, see: - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html - - Args: - **aggs_json (dict)**: A dictionary representation of an aggs query - result. If a str is passed, it will be converted to a dictionary. - - Returns: - A pandas DataFrame representation of the aggs query. - """ - - if not isinstance(aggs_json, str): - raise TypeError("aggs_json must be a dictionary, you gave {}".format(type(aggs_json))) - - # Parse the result into a DF - key_name = list(aggs_json.keys())[0] - - parsed_obs = [[obs['key_as_string'], obs['doc_count']] for obs in aggs_json[key_name]['buckets']] - out_df = pd.DataFrame(parsed_obs, columns=[key_name, 'doc_count']) - - return(out_df) - - -def _percentiles_agg_to_df(aggs_json): - """ - Given the JSON returned by an Elasticsearch aggs "percentiles" query, - parse that JSON into a Pandas DataFrame. Currently only has - support for a one-field aggs. - - The "percentiles" aggregation takes in a vector of desired percentiles - and returns the corresponding percentiles from the distribution of a - numeric field. For more, see - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html - - Args: - **aggs_json (dict)**: A dictionary representation of an aggs query - result. If a str is passed, it will be converted to a dictionary. - - Returns: - A pandas DataFrame representation of the aggs query. - """ - - if not isinstance(aggs_json, str): - raise TypeError("aggs_json must be a dictionary, you gave {}".format(type(aggs_json))) - - # Parse the result into a DF - key_name = list(aggs_json.keys())[0] - - out_dict = {'agg_field': key_name} - out_dict.update(aggs_json[key_name]['values']) - out_df = pd.DataFrame(out_dict, index=[0]) - - return(out_df) diff --git a/py-pkg/uptasticsearch/clients.py b/py-pkg/uptasticsearch/clients.py deleted file mode 100644 index fbb871a..0000000 --- a/py-pkg/uptasticsearch/clients.py +++ /dev/null @@ -1,151 +0,0 @@ -import requests -import json -from requests.packages.urllib3.util.retry import Retry -from requests.adapters import HTTPAdapter - -import os - -from uptasticsearch.util import _format_es_url -from uptasticsearch.util import convert_to_sec - - -class HttpClient(object): - - def __init__(self, retries=5, backoff_factor=0.1): - self.retry = Retry(total=retries, - backoff_factor=backoff_factor, - status_forcelist=[500, 502, 503, 504]) - - def get(self, url, data=None, headers=None): - s = requests.Session() - s.mount('http://', HTTPAdapter(max_retries=self.retry)) - return s.get(url, data=data, headers=headers) - - def post(self, url, data, headers=None): - s = requests.Session() - s.mount('http://', HTTPAdapter(max_retries=self.retry)) - return s.post(url, data=data, headers=headers) - - -class Uptasticsearch(object): - - def __init__(self, url, http_client=HttpClient()): - self.url = _format_es_url(url) - self.client = http_client - - def _get_total_hits(self, response_json): - """ - Given a dictionary representing the content of a - respose to a ``POST /_search`` request, return the total - number of docs matching the query - """ - return response_json['hits']['total'] - - def search(self, body, index="", doc_type="", scroll_context_timer="1m", page_size=10000, max_hits=None): - """Execute a Search Query on the Elasticsearch Cluster - - Args: - body (json string): The query body - index (string): The name of the Index to Query. Default: "" (no index) - doc_type (string): The Doc Type to query. Default: "" (no doc_type) - scroll_context_timer (string): A string such as "1m" or "5m" that specifies how long to keep the scroll context alive between pages for large queries. Default: "1m" - page_size (int): The number of 'hits' per page. Default: 10000 - max_hits (int): The maximum number of 'hits' to return. Default: None, all hits will be returned - - Return: - A List of Dicts. Each Dict is the value of the "_source" key for each of the hits. - - """ - - convert_to_sec(scroll_context_timer) # check context timer input - - response = self.client.post(os.path.join(self.url, - index, - doc_type, - "_search?scroll={}&size={}".format(scroll_context_timer, - page_size)), - data=body, - headers={'Content-Type': 'application/json'}) - - page = response.json() - total_hits = self._get_total_hits(page) - total_hits = min(total_hits, max_hits) if max_hits is not None else total_hits - - results = [d['_source'] for d in page['hits']['hits']] - page_size = len(results) - - while page_size > 0 and len(results) < total_hits: - page = self._make_scroll_request(scroll_context_timer, - page.get("_scroll_id")).json() - page_size = len(page['hits']['hits']) - results += [d['_source'] for d in page['hits']['hits']] - - if total_hits > len(results): - raise Exception('Expected {} Results, instead got {}'.format(total_hits, len(results))) - else: - return results[:total_hits] - - def _make_scroll_request(self, scroll_context_timer, scroll_id): - raise NotImplementedError("_make_scroll_request is abstract. Use a subclass instead of Uptasticsearch") - - -class Uptasticsearch1(Uptasticsearch): - - def _make_scroll_request(self, scroll_context_timer, scroll_id): - return self.client.post(os.path.join(self.url, - "_search/scroll?scroll={}".format(scroll_context_timer)), - data=scroll_id) - - -class Uptasticsearch2(Uptasticsearch): - - def _make_scroll_request(self, scroll_context_timer, scroll_id): - return self.client.post(os.path.join(self.url, - "_search/scroll"), - data=json.dumps({"scroll": scroll_context_timer, - "scroll_id": scroll_id}), - headers={'Content-Type': 'application/json'}) - - -class Uptasticsearch5(Uptasticsearch): - - def _make_scroll_request(self, scroll_context_timer, scroll_id): - return self.client.post(os.path.join(self.url, - "_search/scroll"), - data=json.dumps({"scroll": scroll_context_timer, - "scroll_id": scroll_id}), - headers={'Content-Type': 'application/json'}) - - -class Uptasticsearch6(Uptasticsearch): - - def _make_scroll_request(self, scroll_context_timer, scroll_id): - return self.client.post(os.path.join(self.url, - "_search/scroll"), - data=json.dumps({"scroll": scroll_context_timer, - "scroll_id": scroll_id}), - headers={'Content-Type': 'application/json'}) - -class Uptasticsearch7(Uptasticsearch6): - - def _get_total_hits(self, response_json): - """ - Given a dictionary representing the content of a - respose to a ``POST /_search`` request, return the total - number of docs matching the query - """ - return response_json['hits']['total']['value'] - - -def uptasticsearch_factory(url, retries=5, backoff_factor=0.1): - http_client = HttpClient(retries=retries, backoff_factor=backoff_factor) - es_url = _format_es_url(url) - cluster_version = http_client.get(es_url).json()['version']['number'].split('.')[0] - - return { - "1": Uptasticsearch1, - "2": Uptasticsearch2, - "5": Uptasticsearch5, - "6": Uptasticsearch6, - "7": Uptasticsearch7 - }[cluster_version](es_url, http_client) diff --git a/py-pkg/uptasticsearch/fetch_all.py b/py-pkg/uptasticsearch/fetch_all.py deleted file mode 100644 index ce40d94..0000000 --- a/py-pkg/uptasticsearch/fetch_all.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Functions for Pulling data from Elasticsearch and unpacking into a table -""" - -import pandas as pd -import json - -from uptasticsearch.clients import uptasticsearch_factory - - -def es_search(es_host, es_index, query_body="{}", size=10000, max_hits=None, - scroll="5m"): - """ - Execute a query to elasticsearch and get a DataFrame back - - Args: - es_host (string): a url of the elasticsearch cluster e.g. http://localhost:9200 - es_index (string): the name of the Elasticsearch index - query_body (json): json query - size (int): the number of hits per page. Note: the size will not affect max_hits, but it will affect the time to return the max_hits. - max_hits (int, None): the total number of hits to allow. If None, no limit - scroll (str): the time to keep the scroll context alive for each page - - Return: - A pandas DataFrame - - """ - - client = uptasticsearch_factory(es_host) - - # Figure out if we are scrolling or getting an aggs result - if json.loads(query_body).get("aggs") is not None: - msg = "es_search detected that this is an aggs request " + \ - "and will only return aggregation results." - print(msg) - - raise NotImplementedError("es_search aggs parser has not been implemented yet!") - - else: - docs = client.search(query_body, - index=es_index, - scroll_context_timer=scroll, - page_size=size, - max_hits=max_hits) - - if len(docs) > 0: - return pd.io.json.json_normalize(docs) - else: - return None diff --git a/py-pkg/uptasticsearch/util.py b/py-pkg/uptasticsearch/util.py deleted file mode 100644 index 550b3a6..0000000 --- a/py-pkg/uptasticsearch/util.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Small function to validate and format an Elasticsearch hostname string. -""" - -import re - - -def _format_es_url(es_host): - """ - Given a string with an Elasticsearch hostname, confirm its validity and, - in some cases, fix issues. - - A valid Elasticsearch hostname has the following components: - * Begins with a transfer protocol, e.g. "http://" - * transfer protocol is followed by DNS name or IP to a running - Elasticsearch cluster. e.g. "myindex.mybusiness.com" - * Cluster name is following by a port number, e.g. ":9200" - - Args: - es_host (str): A string containing an Elasticsearch hostname - - Returns: - A string with a cleaned-up Elasticsearch hostname - """ - - if not isinstance(es_host, str): - raise TypeError('es_host should be of type "str", you provided {}'.format(type(es_host))) - - if " " in es_host: - raise ValueError("urls must not contain literal spaces") - - # es_host does not end in a slash - trailing_slash_pattern = '/+$' - if re.search(trailing_slash_pattern, es_host): - es_host = re.sub('/+$', '', es_host) - - # es_host has a port number - port_pattern = ':[0-9]+$' - if not re.search(port_pattern, es_host): - msg = 'No port found in es_host. es_host should be a string of the form ' + \ - '[transfer_protocol][hostname]:[port]. For example: ' + \ - '"http://myindex.mysite.com:9200"' - raise ValueError(msg) - - # es_host has a valid protocol - protocol_pattern = '^[A-Za-z]+://' - if not re.search(protocol_pattern, es_host): - print('You did not provide a protocol with es_host. Assuming http') - - # Doing this to avoid cases where you just missed a slash or something, - # e.g. "http:/es.thing.com:9200" --> "es.thing.com:9200" - # This will also match IP addresses, e.g. '0.0.0.0:9200 - host_m = re.search('(\.?[A-Za-z0-9]+)*:[0-9]+$', es_host) - host = es_host[host_m.start():host_m.end()] - - es_host = 'http://' + host - - return es_host - - -def convert_to_sec(duration_string): - """ - Given a string that could be passed as a datemath expression to - Elasticsearch (e.g. "2m"), parse it and return numerical value - in seconds. - - Args: - duration_string (str): A string of the form '' - (e.g. '21d', '15h'). Currently, 's', 'm', 'h', 'd', and 'w' - are supported. - - Returns: - Numeric value (in seconds) of duration_string. - """ - if not isinstance(duration_string, str): - raise TypeError("A string of the form '' must be provided") - - # Grab string from the end (e.g. "2d" --> "d") - time_unit = re.search('([A-Za-z])+$', duration_string).groups()[0] - - # Grab numeric component - time_num = int(re.sub(time_unit, '', duration_string)) - - # Build up switch dict on time_unit - time_converter = { - 's': lambda x: x, - 'm': lambda x: x * 60, - 'h': lambda x: x * 60 * 60, - 'd': lambda x: x * 60 * 60 * 24, - 'w': lambda x: x * 60 * 60 * 24 * 7 - } - - # Try to convert duration string to numeric value - converter = time_converter.get(time_unit) - if converter is None: - msg = 'Could not figure out units of datemath ' + \ - 'string! Only durations in seconds (s), ' + \ - 'minutes (m), hours (h), days (d), or weeks (w) ' + \ - 'are supported. You provided: ' + str(duration_string) - raise ValueError(msg) - - time_in_seconds = converter(time_num) - - return time_in_seconds