-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathspeech-tagger.el
571 lines (513 loc) · 24.4 KB
/
speech-tagger.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
;;; speech-tagger.el --- tag parts of speech using coreNLP
;; This file is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; This file is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;; Author: Danny McClanahan <[email protected]>
;; Version: 2015.09.03
;; Package-Requires: ((cl-lib "0.5"))
;; Package-Version: 0.1
;; Keywords: speech, tag, nlp, language, corenlp, parsing, natural
;; URL: https://github.com/cosmicexplorer/speech-tagger
;; This file is not part of GNU Emacs.
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; The interactive functions exported by this extension follow a common
;; protocol: if a region is active, then modify the region; otherwise modify
;; the entire buffer. If a prefix argument is provided, they read in a buffer
;; to modify the entirety of. A given region will be expanded to whitespace
;; boundaries (so if region is around the l characters in he|ll|o, the entirety
;; of |hello| will be selected).
;; Requires a "java" binary on the PATH. Downloads a mildly large jar file on
;; first use (~20.7M), which causes a large pause on the first usage, but none
;; afterwards. You can customize `speech-tagger-jar-path' to determine where it
;; looks for the presence of the jar. You can download the jar and hash manually
;; from `https://cosmicexplorer.github.io/speech-tagger/speech-tagger.jar' and
;; `https://cosmicexplorer.github.io/speech-tagger/speech-tagger.md5sum'.
;;; Usage:
;; M-x `speech-tagger-tag-dwim'
;; - Tag parts of speech in the appropriate region
;; - "dwim" is an abbreviation for "do what I mean;" hopefully what I the
;; developer mean is close enough to what you the user mean.
;; - Tagging, as shown in the image above, colors a part of speech and adds a
;; tooltip to it so that if you mouse over or move point over the part of
;; speech, you get a description of the part of speech and example of that part
;; of speech.
;; M-x `speech-tagger-clear-tags-dwim'
;; - As above, but clears the region of all such tags.
;; ESC-: (`speech-tagger-clear-state')
;; - Useful in the case that something screws up and you wish to debug.
;; - Should revert all lisp code back to the same as when first loaded.
;; - Does NOT delete the jar file, since the file takes an annoyingly long time
;; to download.
;; ESC-: (`speech-tagger-force-refresh-jar')
;; - Re-downloads jar file and md5sum. Useful for debugging jar downloads.
;;; Code:
(require 'json)
(require 'cl-lib)
(require 'url)
(defgroup speech-tagger-faces nil "Faces for speech-tag extension."
:group 'processes)
(defgroup speech-tagger-paths nil
"Paths to external files used in this extension."
:group 'processes)
;; tag descriptions json path
(defvar speech-tagger-this-file-dir
(if load-file-name (file-name-directory load-file-name) default-directory))
(defvar speech-tagger-pos-json-file "penn_treebank_tags.json")
(defvar speech-tagger-pos-json-path
(concat speech-tagger-this-file-dir speech-tagger-pos-json-file))
(defvar speech-tagger-*pos-hash* nil)
;; part-of-speech face specs
(defconst speech-tagger-+macro-charset+
'(96 39 34 40 41 59 35 91 93)
"` ' \" ( ) ; # [ ]")
(defun speech-tagger-uniquify-list (l)
"Return new list with only unique elements of L, in same order."
(cl-loop for el in l
with new-list = nil
do (unless (cl-find el new-list) (push el new-list))
finally (return (reverse new-list))))
;; assert macro charset's uniqueness
(unless (equal speech-tagger-+macro-charset+
(speech-tagger-uniquify-list speech-tagger-+macro-charset+))
(throw 'speech-tagger-invalid-charset "macro charset is non-unique"))
(defconst speech-tagger-+macro-regex+
(regexp-opt-charset speech-tagger-+macro-charset+))
(defun speech-tagger-find-free-char (ch charset)
"Find a character usable to represent CH which isn't a member of CHARSET."
(cl-loop while (cl-find ch charset)
do (cl-incf ch)
finally (return ch)))
(defconst speech-tagger-+macro-charset-escapes+
(let ((tbl (make-hash-table :test #'equal)))
(cl-loop
for ch in speech-tagger-+macro-charset+
do (puthash
(make-string 1 ch) ; convert to string for extensibility
(make-string
1
(speech-tagger-find-free-char ch speech-tagger-+macro-charset+))
tbl)
finally (return tbl))))
(defun speech-tagger-escape-macro-characters (str)
"Replace characters not allowed to represent Lisp symbols in STR."
(replace-regexp-in-string
speech-tagger-+macro-regex+
(lambda (ch-str) (gethash ch-str speech-tagger-+macro-charset-escapes+))
str))
(defun speech-tagger-hash-pos-for-color (pos-str)
"Pseudo-random color based on `md5' hash for POS-STR."
(format "#%x" (string-to-number (substring (md5 pos-str) 0 6) 16)))
(defun speech-tagger-destructure-json-table (entry face)
"Transform json in ENTRY with given FACE for a part of speech into plist."
(let ((desc (aref entry 0))
(examples (aref entry 1)))
(list :description desc :examples examples :face face)))
(defun speech-tagger-get-json-table (path)
"Construct hash table of parts of speech from .json file at PATH."
(let ((tbl (let ((json-object-type 'hash-table))
(json-read-file path))))
(maphash
(lambda (key val)
(puthash
key
(speech-tagger-destructure-json-table
val
(custom-declare-face
(intern (concat "speech-tagger-"
(speech-tagger-escape-macro-characters key)))
`((default (:foreground ,(speech-tagger-hash-pos-for-color key))))
;; first of the value stored in the hash is description of pos
(aref val 0)
:group 'speech-tagger-faces))
tbl))
tbl)
tbl))
(defun speech-tagger-refresh-table ()
"Set `speech-tagger-*pos-hash*' to hash table created from .json file."
(setq speech-tagger-*pos-hash*
(speech-tagger-get-json-table speech-tagger-pos-json-path)))
(defcustom speech-tagger-jar-path speech-tagger-this-file-dir
"Path to speech-tagger.jar required to run the part-of-speech tagging."
:group 'speech-tagger-paths)
(defvar speech-tagger-is-development nil
"If non-nil, don't refresh jar `speech-tagger-refresh-jar'.")
(defconst speech-tagger-jar-filename "speech-tagger.jar")
(defconst speech-tagger-hash-filename "speech-tagger.md5sum")
(defvar speech-tagger-*tag-proc* nil)
(defconst speech-tagger-+tag-proc-name+ "speech-tagger")
(defconst speech-tagger-+tag-proc-buf-name+ "*speech-tagger*")
(defvar speech-tagger-*job-id-counter* 0)
(defvar speech-tagger-*jobs* nil)
(defface speech-tagger-loading-text
`((default (:background "#005540")))
"Face used for loading text being analyzed."
:group 'speech-tagger-faces)
(defconst speech-tagger-+loading-text-msg+
"Loading parts of speech from process...")
(defun speech-tagger-lock-region (beg end)
"Lock region between BEG and END from editing.
Apply face `speech-tagger-loading-text'."
(put-text-property beg end 'read-only t)
(let ((olay (make-overlay beg end nil t)))
(overlay-put olay 'face 'speech-tagger-loading-text)
(overlay-put olay 'help-echo speech-tagger-+loading-text-msg+)
(overlay-put olay 'speech-tagger t)
(overlay-put olay 'speech-tagger-point-hover
speech-tagger-+loading-text-msg+)))
(defun speech-tagger-unlock-region (beg end)
"Inverse `speech-tagger-lock-region' for region between BEG and END."
(let ((inhibit-read-only t))
(put-text-property beg end 'read-only nil)
(remove-overlays beg end 'face 'speech-tagger-loading-text)))
(defun speech-tagger-make-region-log (beg end buf)
"Create job entry for part-of-speech tagging between BEG and END in BUF."
(goto-char beg)
(let ((beg-mark (point-marker)))
(set-marker-insertion-type beg-mark t)
(goto-char end)
(let ((end-mark (point-marker)))
(set-marker-insertion-type end-mark t)
(list :beg beg-mark :end end-mark :buffer buf
:text (with-current-buffer buf (buffer-substring beg end))))))
(defun speech-tagger-lock-region-and-log (beg end id)
"Lock region between BEG and END from editing.
Insert job with key ID into `speech-tagger-*jobs*'."
(speech-tagger-lock-region beg end)
(puthash id (speech-tagger-make-region-log beg end (current-buffer))
speech-tagger-*jobs*))
(defun speech-tagger-make-tag-proc-json (beg end id)
"Construct json to send to process for BEG to END with given ID."
(list :job-id id
:string (buffer-substring-no-properties beg end)))
(defun speech-tagger-search-for-whitespace (direction)
"Move point to frontier of whitespace in given DIRECTION."
(let ((space-regex "[[:space:]\r\n]"))
(cond ((eq direction 'backward)
(unless (let ((ch (char-before)))
(and ch (string-match-p space-regex (make-string 1 ch))))
(when (re-search-backward space-regex nil t) (forward-char))))
((eq direction 'forward)
(unless (let ((ch (char-after)))
(and ch (string-match-p space-regex (make-string 1 ch))))
(when (re-search-forward space-regex nil t) (backward-char))))
(t (throw 'speech-tagger-bad-search-direction
"whitespace search direction not recognized")))))
(defun speech-tagger-widen-region-to-word-bounds (beg end)
"Widen region between BEG and END with `speech-tagger-search-for-whitespace'."
(goto-char beg)
(speech-tagger-search-for-whitespace 'backward)
(let ((new-beg (point)))
(goto-char end)
(speech-tagger-search-for-whitespace 'forward)
(list beg end)))
(defun speech-tagger-get-job-id ()
"Create new global job id for better concurrent commmunication with process."
(cl-loop with first-id = (1- speech-tagger-*job-id-counter*)
while (gethash speech-tagger-*job-id-counter* speech-tagger-*jobs*)
do (if (= speech-tagger-*job-id-counter* first-id)
;; should never happen unless rest of code is awful
(throw 'no-available-jobs "no free job ids found")
(cl-incf speech-tagger-*job-id-counter*))
finally (return speech-tagger-*job-id-counter*)))
(defvar speech-tagger-*tag-proc-cur-line* "")
(defun speech-tagger-mark-parts-of-speech (beg tagged-string)
"Mark parts of speech between BEG and END according to tags in TAGGED-STRING."
(cl-loop
for tagged-section across tagged-string
do (let ((offset (plist-get tagged-section :start))
(final (plist-get tagged-section :end))
(text (plist-get tagged-section :text))
(tag (plist-get tagged-section :tag))
(inhibit-read-only t))
(let* ((beg-ind (+ beg offset))
(end-ind (+ beg final))
(new-txt (buffer-substring beg-ind end-ind)))
(unless (equal text new-txt)
(throw 'speech-tagger-different-text
(format "%s \"%s\" %s \"%s\"" "previous text" text
"is different than current text" new-txt)))
(let ((olay (make-overlay beg-ind end-ind nil t))
(tag-hash (gethash tag speech-tagger-*pos-hash*)))
(when tag-hash
(let ((help-info
(format "%s: e.g %s"
(propertize (plist-get tag-hash :description)
'face 'font-lock-keyword-face)
(plist-get tag-hash :examples))))
(overlay-put olay 'face (plist-get tag-hash :face))
(overlay-put olay 'speech-tagger t)
(overlay-put olay 'help-echo help-info)
(overlay-put olay 'speech-tagger-point-hover help-info)
(overlay-put olay 'mouse-face 'mode-line-highlight))))))))
(defun speech-tagger-process-tag-proc-json (plist)
"Take json message PLIST from the external process.
Use PLIST to highlight text in region marked by the job-id key of PLIST. Pops
job-id off of `speech-tagger-*jobs*'"
(let* ((job-id (plist-get plist :job-id))
(tagged-string (plist-get plist :tagged-string))
(reg-log (gethash job-id speech-tagger-*jobs*)))
(if (not reg-log)
(throw 'speech-tagger-no-such-job
(format "%s %d %s" "no job with id" job-id "found"))
(let ((beg (plist-get reg-log :beg))
(end (plist-get reg-log :end))
(buf (plist-get reg-log :buffer))
(text (plist-get reg-log :text)))
(with-current-buffer buf
(let ((cur-text (buffer-substring beg end)))
(unless (equal text cur-text)
(throw 'speech-tagger-different-text
(format "%s \"%s\" %s \"%s\"" "previous text" text
"is different than current text" cur-text))))
(speech-tagger-mark-parts-of-speech beg tagged-string)
(remhash job-id speech-tagger-*jobs*)
(speech-tagger-unlock-region beg end))))))
;; all json is received as a single line of text, making stream parsing easier
(defun speech-tagger-receive-tag-proc-string (str)
"Receive string STR from process filter and line-buffer.
Send line-buffered json string to `speech-tagger-process-tag-proc-json'."
(let ((newline-match (string-match-p "\n" str)))
;; in case json message is larger than emacs's process output buffer
;; (unlikely if we don't send in massive strings to tagging process)
(if (not newline-match)
(setq speech-tagger-*tag-proc-cur-line*
(concat speech-tagger-*tag-proc-cur-line* str))
(let* ((msg (concat speech-tagger-*tag-proc-cur-line*
(substring str 0 newline-match)))
(json-msg (let ((json-object-type 'plist))
(json-read-from-string msg))))
(speech-tagger-process-tag-proc-json json-msg)
(setq speech-tagger-*tag-proc-cur-line* "")
(speech-tagger-receive-tag-proc-string
(substring str (1+ newline-match)))))))
(defun speech-tagger-message-process-buffer (proc msg)
"Insert MSG into PROC's process buffer."
(with-current-buffer (process-buffer proc)
(goto-char (point-max))
(insert msg)))
(defun speech-tagger-start-tag-process ()
"Create part-of-speech tagging process."
(setq
speech-tagger-*tag-proc*
(let ((new-proc
(start-process
speech-tagger-+tag-proc-name+ speech-tagger-+tag-proc-buf-name+
"java" "-jar"
(expand-file-name
(concat (expand-file-name speech-tagger-jar-path)
"speech-tagger.jar")))))
(set-process-filter
new-proc (lambda (proc msg)
(speech-tagger-message-process-buffer proc msg)
(speech-tagger-receive-tag-proc-string msg)))
(set-process-sentinel
new-proc (lambda (proc msg)
(speech-tagger-message-process-buffer proc msg)
(message "%s exited with message \"%s\""
(process-name proc) msg)))
new-proc)))
(defun speech-tagger-post-command-fn ()
"Run after interactive commands to highlight part of speech in minibuffer."
(let ((p (get-char-property (point) 'speech-tagger-point-hover)))
(when p (message "%s" p))))
(defun speech-tagger-send-region-to-tag-proc (beg end proc)
"Send region between BEG and END to PROC.
Apply widening with `speech-tagger-widen-region-to-word-bounds'."
(let* ((id (speech-tagger-get-job-id))
(bounds (speech-tagger-widen-region-to-word-bounds beg end))
(new-beg (cl-first bounds))
(new-end (cl-second bounds)))
(speech-tagger-lock-region-and-log new-beg new-end id)
(process-send-string
proc
(concat (json-encode
(speech-tagger-make-tag-proc-json new-beg new-end id))
"\n"))))
(defun speech-tagger-clear-state ()
"Utility function used to reset Lisp code to initial state."
(when speech-tagger-*jobs* (clrhash speech-tagger-*jobs*))
(when speech-tagger-*pos-hash* (clrhash speech-tagger-*pos-hash*))
(setq speech-tagger-*job-id-counter* 0
speech-tagger-*tag-proc* nil
speech-tagger-*tag-proc-cur-line* ""
speech-tagger-*jobs* nil
speech-tagger-*pos-hash* nil)
(mapc
(lambda (proc)
(when (equal (buffer-name (process-buffer proc))
speech-tagger-+tag-proc-buf-name+)
(delete-process proc)))
(process-list))
(when (bufferp speech-tagger-+tag-proc-buf-name+)
(kill-buffer speech-tagger-+tag-proc-buf-name+)))
(defun speech-tagger-clear-overlays (&optional beg end)
"Clear overlays from `speech-tagger-mark-parts-of-speech' between BEG, END."
(let ((b (or beg (point-min))) (e (or end (point-max))))
(let ((inhibit-read-only t))
(put-text-property b e 'read-only nil)
(remove-overlays b e 'speech-tagger t))))
;;;###autoload
(defun speech-tagger-clear-tags-dwim (pfx)
"Clear tag overlays from highlighted region or buffer.
If PFX given, read buffer name to clear tags from."
(interactive "P")
(if (not pfx)
(if (use-region-p)
(let ((beg (min (region-beginning) (region-end)))
(end (max (region-beginning) (region-end))))
(speech-tagger-clear-overlays beg end))
(speech-tagger-clear-overlays))
(let ((bufname (read-buffer "buffer to clear tags from: " nil t)))
(with-current-buffer bufname (speech-tagger-clear-overlays)))))
(defconst speech-tagger-resources-base-url
"https://cosmicexplorer.github.io/speech-tagger/")
(defconst speech-tagger-jar-url
(concat speech-tagger-resources-base-url speech-tagger-jar-filename)
"Url to download the speech tagging jar from.")
(defconst speech-tagger-jar-hash-url
(concat speech-tagger-resources-base-url speech-tagger-hash-filename))
(defconst speech-tagger-jar-file-path
(expand-file-name
(concat (expand-file-name speech-tagger-jar-path)
speech-tagger-jar-filename)))
(defconst speech-tagger-jar-hash-path
(expand-file-name
(concat (expand-file-name speech-tagger-jar-path)
speech-tagger-hash-filename)))
(defvar speech-tagger-jar-hash
(and (file-exists-p speech-tagger-jar-hash-path)
(with-temp-buffer
(insert-file-contents speech-tagger-jar-hash-path)
(buffer-string)))
"Hash string from md5sum to detect jar versioning.")
(defun speech-tagger-retrieve-url-no-headers (url)
"Retrieve URL and remove HTTP headers."
(message "Retrieving from URL: %s" url)
(prog1
(cl-letf (((symbol-function #'message) (symbol-function #'ignore)))
(with-current-buffer (url-retrieve-synchronously url)
(goto-char (point-min))
(re-search-forward "^$")
(forward-char)
(delete-region (point-min) (point))
(current-buffer)))
(message "%s" "Download complete")))
(defconst speech-tagger-downloads-list
(list
:hash (list speech-tagger-jar-hash-path speech-tagger-jar-hash-url)
:jar (list speech-tagger-jar-file-path speech-tagger-jar-url)))
(defun speech-tagger-save-buf (file url)
"Write FILE from URL. Return buffer containing contents of file."
(let ((buf (speech-tagger-retrieve-url-no-headers url))
(before-save-hook nil))
(with-current-buffer buf
(fundamental-mode)
(set-visited-file-name file)
(set-buffer-file-coding-system 'raw-text)
(save-buffer))
buf))
(defun speech-tagger-force-refresh-jar ()
"Force re-downloading of speech-tagger.jar."
(let* ((hash (plist-get speech-tagger-downloads-list :hash))
(jar (plist-get speech-tagger-downloads-list :jar))
(hash-str
(let ((buf (speech-tagger-save-buf
(cl-first hash) (cl-second hash))) str)
(setq str (with-current-buffer buf (buffer-string)))
(kill-buffer buf)
str))
(jar-buf (speech-tagger-save-buf (cl-first jar) (cl-second jar)))
(md5-jar-str (concat (let ((coding-system-for-read 'raw-text))
(secure-hash 'md5 (with-current-buffer jar-buf
(fundamental-mode)
(buffer-string))))
" speech-tagger.jar\n")))
(kill-buffer jar-buf)
(if (equal md5-jar-str hash-str)
(setq speech-tagger-jar-hash hash-str)
(throw 'speech-tagger-hash-unequal
(format
"%s (%s,%s) %s"
"speech-tagger.jar hashes were not equal" md5-jar-str hash-str
"Try reloading this extension.")))))
(defun speech-tagger-refresh-jar ()
"Check if jar needs to be refreshed using contents of hash.
Do so if required."
(unless speech-tagger-is-development
(let ((hash-contents
(let ((buf (speech-tagger-retrieve-url-no-headers
speech-tagger-jar-hash-url))
str)
(setq str (with-current-buffer buf (buffer-string)))
(kill-buffer buf)
str)))
(cond ((or (not (file-exists-p speech-tagger-jar-file-path))
(not (file-exists-p speech-tagger-jar-hash-path)))
(message "%s %s, %s %s" "speech-tagger.jar or hash not found at"
speech-tagger-jar-file-path "downloading from"
speech-tagger-jar-url)
(speech-tagger-force-refresh-jar))
((or
(not speech-tagger-jar-hash)
(not (equal hash-contents speech-tagger-jar-hash)))
(message
"%s %s" "speech-tagger.jar is out of date, downloading from"
speech-tagger-jar-url)
(speech-tagger-force-refresh-jar))))))
(defun speech-tagger-setup ()
"Initialize globals as required.
Must be re-run after using `speech-tagger-clear-state'."
(unless speech-tagger-*pos-hash* (speech-tagger-refresh-table))
(unless speech-tagger-*jobs* (setq speech-tagger-*jobs* (make-hash-table)))
;; pretty harmless to add this, even if permanent, since it won't affect other
;; overlays unless they use the 'speech-tagger-point-hover property
(add-hook 'post-command-hook #'speech-tagger-post-command-fn)
(unless (process-live-p (get-process speech-tagger-+tag-proc-name+))
(speech-tagger-start-tag-process)))
;;;###autoload
(defun speech-tagger-tag-dwim (pfx)
"Create tag overlays in selected region or buffer for parts of speech.
Send selected region to external process for analysis. Call
`speech-tagger-setup' as required. If PFX given, read buffer name to tag. Be
warned that this function may take some time on large selections or buffers."
(interactive "P")
(speech-tagger-setup)
(if (not pfx)
(if (use-region-p)
(let* ((beg (min (region-beginning) (region-end)))
(end (max (region-beginning) (region-end)))
(wide-range
(speech-tagger-widen-region-to-word-bounds beg end)))
(speech-tagger-clear-overlays
(cl-first wide-range) (cl-second wide-range))
(speech-tagger-send-region-to-tag-proc
beg end speech-tagger-*tag-proc*))
(speech-tagger-clear-overlays (point-min) (point-max))
(speech-tagger-send-region-to-tag-proc
(point-min) (point-max) speech-tagger-*tag-proc*))
(let ((bufname (read-buffer "buffer to tag: " nil t)))
(with-current-buffer bufname
(speech-tagger-clear-overlays (point-min) (point-max))
(speech-tagger-send-region-to-tag-proc
(point-min) (point-max) speech-tagger-*tag-proc*)))))
;; check hash on load, and (re-)download jar/hash if required
(speech-tagger-refresh-jar)
(provide 'speech-tagger)
;;; speech-tagger.el ends here