#!/bin/sh # aside from this initial boilerplate, this is actually -*- scheme -*- code main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')' exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" !# ;;; read-text-outline --- Read a text outline and display it as a sexp ;; Copyright (C) 2002, 2006 Free Software Foundation, Inc. ;; ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation; either version 2, or ;; (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with this software; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301 USA ;;; Author: Thien-Thi Nguyen ;;; Commentary: ;; Usage: read-text-outline OUTLINE ;; ;; Scan OUTLINE file and display a list of trees, the structure of ;; each reflecting the "levels" in OUTLINE. The recognized outline ;; format (used to indicate outline headings) is zero or more pairs of ;; leading spaces followed by "-". Something like: ;; ;; - a 0 ;; - b 1 ;; - c 2 ;; - d 1 ;; - e 0 ;; - f 1 ;; - g 2 ;; - h 1 ;; ;; In this example the levels are shown to the right. The output for ;; such a file would be the single line: ;; ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h")) ;; ;; Basically, anything at the beginning of a list is a parent, and the ;; remaining elements of that list are its children. ;; ;; ;; Usage from a Scheme program: These two procs are exported: ;; ;; (read-text-outline . args) ; only first arg is used ;; (read-text-outline-silently port) ;; (make-text-outline-reader re specs) ;; ;; `make-text-outline-reader' returns a proc that reads from PORT and ;; returns a list of trees (similar to `read-text-outline-silently'). ;; ;; RE is a regular expression (string) that is used to identify a header ;; line of the outline (as opposed to a whitespace line or intervening ;; text). RE must begin w/ a sub-expression to match the "level prefix" ;; of the line. You can use `level-submatch-number' in SPECS (explained ;; below) to specify a number other than 1, the default. ;; ;; Normally, the level of the line is taken directly as the length of ;; its level prefix. This often results in adjacent levels not mapping ;; to adjacent numbers, which confuses the tree-building portion of the ;; program, which expects top-level to be 0, first sub-level to be 1, ;; etc. You can use `level-substring-divisor' or `compute-level' in ;; SPECS to specify a constant scaling factor or specify a completely ;; alternative procedure, respectively. ;; ;; SPECS is an alist which may contain the following key/value pairs: ;; ;; - level-submatch-number NUMBER ;; - level-substring-divisor NUMBER ;; - compute-level PROC ;; - body-submatch-number NUMBER ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...) ;; ;; The PROC value associated with key `compute-level' should take a ;; Scheme match structure (as returned by `regexp-exec') and return a ;; number, the normalized level for that line. If this is specified, ;; it takes precedence over other level-computation methods. ;; ;; Use `body-submatch-number' if RE specifies the whole body, or if you ;; want to make use of the extra fields parsing. The `extra-fields' ;; value is a sub-alist, whose keys name additional fields that are to ;; be recognized. These fields along with `level' are set as object ;; properties of the final string ("body") that is consed into the tree. ;; If a field name ends in "?" the field value is set to be #t if there ;; is a match and the result is not an empty string, and #f otherwise. ;; ;; ;; Bugs and caveats: ;; ;; (1) Only the first file specified on the command line is scanned. ;; (2) TAB characters at the beginnings of lines are not recognized. ;; (3) Outlines that "skip" levels signal an error. In other words, ;; this will fail: ;; ;; - a 0 ;; - b 1 ;; - c 3 <-- skipped 2 -- error! ;; - d 1 ;; ;; ;; TODO: Determine what's the right thing to do for skips. ;; Handle TABs. ;; Make line format customizable via longopts. ;;; Code: (define-module (scripts read-text-outline) :export (read-text-outline read-text-outline-silently make-text-outline-reader) :use-module (ice-9 regex) :autoload (ice-9 rdelim) (read-line) :autoload (ice-9 getopt-long) (getopt-long)) (define (?? symbol) (let ((name (symbol->string symbol))) (string=? "?" (substring name (1- (string-length name)))))) (define (msub n) (lambda (m) (match:substring m n))) (define (??-predicates pair) (cons (car pair) (if (?? (car pair)) (lambda (m) (not (string=? "" (match:substring m (cdr pair))))) (msub (cdr pair))))) (define (make-line-parser re specs) (let* ((rx (let ((fc (substring re 0 1))) (make-regexp (if (string=? "^" fc) re (string-append "^" re))))) (check (lambda (key) (assq-ref specs key))) (level-substring (msub (or (check 'level-submatch-number) 1))) (extract-level (cond ((check 'compute-level) => (lambda (proc) (lambda (m) (proc m)))) ((check 'level-substring-divisor) => (lambda (n) (lambda (m) (/ (string-length (level-substring m)) n)))) (else (lambda (m) (string-length (level-substring m)))))) (extract-body (cond ((check 'body-submatch-number) => msub) (else (lambda (m) (match:suffix m))))) (misc-props! (cond ((check 'extra-fields) => (lambda (alist) (let ((new (map ??-predicates alist))) (lambda (obj m) (for-each (lambda (pair) (set-object-property! obj (car pair) ((cdr pair) m))) new))))) (else (lambda (obj m) #t))))) ;; retval (lambda (line) (cond ((regexp-exec rx line) => (lambda (m) (let ((level (extract-level m)) (body (extract-body m))) (set-object-property! body 'level level) (misc-props! body m) body))) (else #f))))) (define (make-text-outline-reader re specs) (let ((parse-line (make-line-parser re specs))) ;; retval (lambda (port) (let* ((all '(start)) (pchain (list))) ; parents chain (let loop ((line (read-line port)) (prev-level -1) ; how this relates to the first input ; level determines whether or not we ; start in "sibling" or "child" mode. ; in the end, `start' is ignored and ; it's much easier to ignore parents ; than siblings (sometimes). this is ; not to encourage ignorance, however. (tp all)) ; tail pointer (or (eof-object? line) (cond ((parse-line line) => (lambda (w) (let* ((words (list w)) (level (object-property w 'level)) (diff (- level prev-level))) (cond ;; sibling ((zero? diff) ;; just extend the chain (set-cdr! tp words)) ;; child ((positive? diff) (or (= 1 diff) (error "unhandled diff not 1:" diff line)) ;; parent may be contacted by uncle later (kids ;; these days!) so save its level (set-object-property! tp 'level prev-level) (set! pchain (cons tp pchain)) ;; "push down" car into hierarchy (set-car! tp (cons (car tp) words))) ;; uncle ((negative? diff) ;; prune back to where levels match (do ((p pchain (cdr p))) ((= level (object-property (car p) 'level)) (set! pchain p))) ;; resume at this level (set-cdr! (car pchain) words) (set! pchain (cdr pchain)))) (loop (read-line port) level words)))) (else (loop (read-line port) prev-level tp))))) (set! all (car all)) (if (eq? 'start all) '() ; wasteland (cdr all)))))) (define read-text-outline-silently (make-text-outline-reader "(([ ][ ])*)- *" '((level-substring-divisor . 2)))) (define (read-text-outline . args) (write (read-text-outline-silently (open-file (car args) "r"))) (newline) #t) ; exit val (define main read-text-outline) ;;; read-text-outline ends here