From 2c00567aac6d9c79087cfff0d054f64922b1473d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 3 Nov 2020 14:01:14 -0800 Subject: [PATCH] sqlite-utils search WIP, refs #192 --- docs/cli.rst | 44 ++++++++++++++++++ sqlite_utils/cli.py | 111 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/docs/cli.rst b/docs/cli.rst index 54591cc00..5d2011641 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -885,6 +885,50 @@ You can rebuild every FTS table by running ``rebuild-fts`` without passing any t $ sqlite-utils rebuild-fts mydb.db +.. _cli_search: + +Executing searches +================== + +Once you have configured full-text search for a table, you can search it using ``sqlite-utils search``:: + + $ sqlite-utils search mydb.db documents searchterm + +This command accepts the same output options as ``sqlite-utils query``: ``--table``, ``--csv``, ``--nl`` etc. + +By default it shows the most relevant matches first. You can specify a different sort order using the ``-o`` option, which can take a column or a column followed by ``desc``:: + + # Sort by rowid + $ sqlite-utils search mydb.db documents searchterm -o rowid + # Sort by created in descending order + $ sqlite-utils search mydb.db documents searchterm -o 'created desc' + +You can specify a subset of columns to be returned using the ``-c`` option one or more times:: + + $ sqlite-utils search mydb.db documents searchterm -c title -c created + +Use the ``--sql`` option to output the SQL that would be executed, rather than running the query:: + + $ sqlite-utils search mydb.db documents searchterm --sql + with original as ( + select + rowid, + * + from [documents] + ) + select + original.*, + [documents_fts].rank as rank + from + [original] + join [documents_fts] on [original].rowid = [documents_fts].rowid + where + [documents_fts] match :search + order by + rank desc + limit + 20 + .. _cli_vacuum: Vacuum diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index fe42ec2ea..15fc4b265 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -7,6 +7,7 @@ import pathlib import sqlite_utils from sqlite_utils.db import AlterError +import textwrap import itertools import json import os @@ -45,7 +46,7 @@ def output_options(fn): is_flag=True, default=False, ), - click.option("-c", "--csv", is_flag=True, help="Output CSV"), + click.option("--csv", is_flag=True, help="Output CSV"), click.option("--no-headers", is_flag=True, help="Omit CSV headers"), click.option("-t", "--table", is_flag=True, help="Output as a table"), click.option( @@ -968,6 +969,114 @@ def query( click.echo(line) +@cli.command() +@click.argument( + "path", + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), + required=True, +) +@click.argument("dbtable") +@click.argument("q") +@click.option("-o", "--order", type=str, help="Order by ('column' or 'column desc')") +@click.option("-c", "--column", type=str, multiple=True, help="Columns to return") +@click.option( + "--sql", "show_sql", is_flag=True, help="Show SQL query that would be run" +) +@output_options +@load_extension_option +@click.pass_context +def search( + ctx, + path, + dbtable, + q, + order, + show_sql, + column, + nl, + arrays, + csv, + no_headers, + table, + fmt, + json_cols, + load_extension, +): + "Execute a full-text search against this table" + db = sqlite_utils.Database(path) + _load_extensions(db, load_extension) + # Check table exists + table_obj = db[dbtable] + if not table_obj.exists(): + raise click.ClickException("Table '{}' does not exist".format(dbtable)) + fts_table = table_obj.detect_fts() + if not fts_table: + raise click.ClickException( + "Table '{}' is not configured for full-text search".format(dbtable) + ) + # Pick names for table and rank column that don't clash + original = "original_" if dbtable == "original" else "original" + rank = "rank" + while rank in table_obj.columns_dict: + rank = rank + "_" + columns = "*" + if column: + # Check they all exist + for c in column: + if c not in table_obj.columns_dict: + raise click.ClickException( + "Table '{}' has no column '{}".format(dbtable, c) + ) + columns = ", ".join("[{}]".format(c) for c in column) + sql = textwrap.dedent( + """ + with {original} as ( + select + rowid, + {columns} + from [{dbtable}] + ) + select + {original}.*, + [{fts}].rank as {rank} + from + [{original}] + join [{fts}] on [{original}].rowid = [{fts}].rowid + where + [{fts}] match :search + order by + {order} + limit + {limit} + """.format( + dbtable=dbtable, + original=original, + columns=columns, + rank=rank, + fts=fts_table, + order=order if order else "{} desc".format(rank), + limit=20, + ) + ).strip() + if show_sql: + click.echo(sql) + return + ctx.invoke( + query, + path=path, + sql=sql, + nl=nl, + arrays=arrays, + csv=csv, + no_headers=no_headers, + table=table, + fmt=fmt, + json_cols=json_cols, + param=[("search", q)], + load_extension=load_extension, + ) + + @cli.command() @click.argument( "path",