#!/usr/bin/scheme --script
;;; Predict and Follow sets calculator.
;;; SI 413 Fall 2021
;;;
;;; To use this program, run it as
;;; scheme --script predfol.scm
;;; then type in your grammar rules like this:
;;; nt -> nt other TOKEN something ELSE
;;; followed by Ctrl-D when you are done.
;;; (Or, you can redirect in from a file.)
;;; To specify an epsilon production, you can write
;;; "epsilon" or "ε" or just leave the right-hand side blank.
; splits a list according to the given match function.
; matcher should take a list and return 0 for no match,
; or else the length of the match at the beginning of the list.
; Returned is a list of lists for what appears between matches.
; maxmatch is the maximum list size to return (0 for no max)
; include-empties? indicates whether empty lists should be returned or omitted.
(define (split lst matcher maxlen include-empties?)
(define (add-token token splits-accum)
(if (or include-empties?
(pair? token))
(cons token splits-accum)
splits-accum))
(define (split-helper lst token-accum splits-accum max-remaining)
(cond [(= 1 max-remaining)
(reverse (add-token lst splits-accum))]
[(null? lst)
(reverse (add-token (reverse token-accum) splits-accum))]
[else (let ([match-len (matcher lst)])
(if (zero? match-len)
(split-helper (cdr lst)
(cons (car lst) token-accum)
splits-accum
max-remaining)
(split-helper (list-tail lst match-len)
'()
(add-token (reverse token-accum) splits-accum)
(1- max-remaining))))]))
(split-helper lst '() '() maxlen))
; matcher for split based on whitespace characters
(define (whitespace-matcher lst)
(whitespace-matcher-helper lst 0))
(define (whitespace-matcher-helper lst count)
(if (or (null? lst)
(not (char-whitespace? (car lst))))
count
(whitespace-matcher-helper (cdr lst) (1+ count))))
; matcher for -> or →
(define (arrow-matcher lst)
(cond [(null? lst) 0]
[(char=? (car lst) #\→) 1]
[(null? (cdr lst)) 0]
[(and (char=? (car lst) #\-)
(char=? (cadr lst) #\>))
2]
[else 0]))
; compares the length of the given list to n based on the comparison
; function comp.
; Example: (compare-len '(1 2 3 4) <= 5) produces #t
; The point is to avoid calling length, faster for very large lists.
(define (compare-len lst comp n)
(cond [(null? lst)
(comp 0 n)]
[(zero? n)
(comp 1 0)]
[else
(compare-len (cdr lst) comp (1- n))]))
; creates a matcher for split based on string pattern
(define (make-matcher pattern)
(let ([patlist (string->list pattern)]
[patlen (string-length pattern)])
(lambda (lst)
(if (and (compare-len lst >= patlen)
(andmap char=?
patlist
(list-head lst patlen)))
patlen
0))))
(define comment-matcher (make-matcher "#"))
(define pipe-matcher (make-matcher "|"))
; takes a list of characters and returns a list of symbols, split according to whitespace.
; any symbol like 'eps or 'epsilon or 'ε is removed.
(define (to-symbols aloc)
(filter
(lambda (sym)
(not (memv sym '(eps epsilon Eps Epsilon EPS EPSILON ε))))
(map string->symbol
(map list->string
(split aloc whitespace-matcher 0 #f)))))
; adds production rules with the given nonterminal symbol to the accumulating list
(define (add-prods nt prods rules accum)
(cond [(null? rules)
(reverse (cons (cons nt prods)
accum))]
[(eqv? nt (caar rules))
(append (reverse accum)
(cons (cons nt (append (cdar rules) prods))
(cdr rules)))]
[else
(add-prods nt
prods
(cdr rules)
(cons (car rules) accum))]))
; reads lines from the given port to parse a grammer in EBNF form.
; returns a list of (nonterminal (production rule rhs) (production rule ...)) sub-lists
(define (parse-grammar port)
(parse-grammar-helper port '()))
(define (parse-grammar-helper port sofar)
(let ([line (get-line port)])
(if (eof-object? line)
sofar
(let ([production (split (car (split (string->list line) comment-matcher 2 #t))
arrow-matcher
2
#t)])
(if (null? (cdr production))
(begin (assert (andmap char-whitespace? (car production)))
(parse-grammar-helper port sofar))
(let ([lhs (to-symbols (car production))]
[rhs (map to-symbols
(split (cadr production) pipe-matcher 0 #t))])
(assert (compare-len lhs = 1))
(parse-grammar-helper
port
(add-prods (car lhs) rhs sofar '()))))))))
; proc must be a function of one argument.
; keeps calling (proc arg) until the result is the same as arg (a fixed point),
; and then this arg is returned.
(define (fixed-point proc initial)
(let ([next (proc initial)])
(if (equal? initial next)
initial
(fixed-point proc next))))
; adds each element of new to lst at the end if not already present
(define (incorporate new lst)
(if (null? new)
lst
(incorporate (cdr new)
(incorporate-helper (car new) lst '()))))
(define (incorporate-helper elem lst sofar)
(cond [(null? lst)
(reverse! (cons elem sofar))]
[(eqv? elem (car lst))
(append! (reverse! sofar) lst)]
[else (incorporate-helper elem (cdr lst) (cons (car lst) sofar))]))
; sorts one partial list and removes duplicates, according to a given sorted list
(define (sort-according unsorted sorted)
(filter (lambda (x) (memv x unsorted))
sorted))
; extracts all token names from the grammar, in the order they appear
; nts should be a list of nonterminals
(define (get-tokens grammar nts)
(incorporate (append (filter (lambda (sym) (not (memv sym nts)))
(apply append (apply append (map cdr grammar))))
(list '$))
'()))
; computes the EPS set for this grammar, the list of
; nonterminals which can produce empty strings.
(define (get-eps grammar)
(fixed-point
(lambda (eps)
(map car
(filter
(lambda (rules)
(ormap (lambda (rhs)
(andmap (lambda (sym) (memv sym eps))
rhs))
(cdr rules)))
grammar)))
'()))
; Gets the relations between first and follow sets of nonterminals.
; These are basically the edges in the relations graph.
; e.g. a relation '((follow B) (first A)) means any token in the
; first set of A should also be in the follow set of B.
(define (get-rels grammar eps nts)
(define (get-rels-helper rules rels-sofar)
(if (null? rules)
rels-sofar
(get-rels-helper (cdr rules)
(add-rhses (caar rules) (cdar rules) rels-sofar))))
(define (add-rhses nt rhses rels-sofar)
(if (null? rhses)
rels-sofar
(add-rhses nt
(cdr rhses)
(add-rhs nt
(car rhses)
(list (list 'first nt))
rels-sofar))))
(define (add-rhs nt rhs add-to rels-sofar)
(if (null? rhs)
(append (map (lambda (x) (list x (list 'follow nt)))
(filter (lambda (x) (symbol=? (car x) 'follow))
add-to))
rels-sofar)
(add-rhs nt
(cdr rhs)
(let ([folnode (list 'follow (car rhs))])
(cond [(memv (car rhs) eps)
(cons folnode add-to)]
[(memv (car rhs) nts)
(list folnode)]
[else '()]))
(append (map (lambda (node)
(list node (list 'first (car rhs))))
add-to)
rels-sofar))))
(get-rels-helper
grammar
(if (memv '$ (cadar grammar))
'()
(list (list (list 'follow (caar grammar))
(list 'first '$))))))
; removes duplicates from a list
(define (uniqueify lst)
(uniqueify-helper lst '()))
(define (uniqueify-helper lst accum)
(cond [(null? lst)
(reverse accum)]
[(member (car lst) accum)
(uniqueify-helper (cdr lst) accum)]
[else (uniqueify-helper (cdr lst)
(cons (car lst) accum))]))
; organizes edges into an adjacency list graph
(define (edges->graph edges)
(eg-helper edges '()))
(define (eg-helper edges sofar)
(if (null? edges)
sofar
(let ([test-first
(lambda (true-false)
(lambda (edge)
(eq? (equal? (caar edges) (car edge))
true-false)))])
(eg-helper
(filter (test-first #f)
edges)
(cons (uniqueify (cons (caar edges)
(map cadr (filter (test-first #t)
edges))))
sofar)))))
; topological sort of a directed graph.
; returned is a list of lists of equivalent nodes, in topo sort order.
(define (topo-sort graph)
(define (dfs-from-all nodes open lclosed)
(if (null? nodes)
lclosed
(dfs-from-all (cdr nodes)
open
(dfs (car nodes) open lclosed))))
(define (dfs node open lclosed)
(cond [(ormap (lambda (lst) (member node lst))
(cdr lclosed))
lclosed]
[(member node open)
(let ([loop (member node (reverse open))])
(if (> (length loop) (car lclosed))
(cons loop (cdr lclosed))
lclosed))]
[else
(let ([newlc (let ([nbrs (assoc node graph)])
(if nbrs
(dfs-from-all (cdr nbrs) (cons node open) lclosed)
lclosed))])
(if (and (pair? (car newlc))
(equal? node (caar newlc)))
(cons '() newlc)
(cons (car newlc)
(cons (list node)
(cdr newlc)))))]))
(reverse (cdr (dfs-from-all (map car graph) '() '(())))))
; computes a list of all first and follow sets, unsorted.
(define (get-first-follow grammar nts tokens eps)
(let ([graph (edges->graph (get-rels grammar eps nts))])
(define (process node-groups sofar)
(cond [(null? node-groups)
sofar]
[(and (symbol=? (caaar node-groups) 'first)
(memv (cadaar node-groups) tokens))
(assert (null? (cdar node-groups)))
(process (cdr node-groups)
(cons (list (caar node-groups) (cadaar node-groups))
sofar))]
[else
(let ([toks (sort-according
(apply append
(map (lambda (nbr)
(cdr (assoc nbr sofar)))
(uniqueify (apply append
(map (lambda (node)
(let ([x (assoc node graph)])
(if x (cdr x) '())))
(car node-groups))))))
tokens)])
(process (cdr node-groups)
(append (map (lambda (node)
(cons node toks))
(car node-groups))
sofar)))]))
(process (topo-sort graph) '())))
; ff should be output from get-first-follow
; type should be 'first or 'follow
(define (extract-ff ff type nts)
(define (helper syms sofar)
(if (null? syms)
sofar
(helper
(cdr syms)
(cons (let ([found (assoc (list type (car syms))
ff)])
(if found
(cons (car syms)
(cdr found))
(list (car syms))))
sofar))))
(reverse (helper nts '())))
(define (get-predict grammar tokens eps first follow)
(define (first-for sym)
(if (memv sym tokens)
(list sym)
(cdr (assv sym first))))
(define (predict-for nt rhs sofar)
(if (null? rhs)
(append (cdr (assv nt follow))
sofar)
(let ([sofar (append (first-for (car rhs))
sofar)])
(if (memv (car rhs) eps)
(predict-for nt (cdr rhs) sofar)
sofar))))
(map (lambda (rule)
(list rule
(sort-according (predict-for (car rule) (cdr rule) '())
tokens)))
(apply append (map (lambda (grule)
(map (lambda (rhs) (cons (car grule) rhs))
(cdr grule)))
grammar))))
; makes a big string from a list of strings by inserting the glue between everything
; alos must not be an empty string
(define (join alos glue)
(cond [(null? alos) ""]
[(null? (cdr alos))
(car alos)]
[else (string-append (car alos)
glue
(join (cdr alos) glue))]))
; like map, but will go according to the longer of the two argument lists
(define (map-longest fun args1 args2)
(cond [(null? args1) args2]
[(null? args2) args1]
[else (cons (fun (car args1) (car args2))
(map-longest fun (cdr args1) (cdr args2)))]))
(define (max-widths lolos)
(max-widths-helper lolos '()))
(define (max-widths-helper lolos sofar)
(if (null? lolos)
sofar
(max-widths-helper (cdr lolos)
(map-longest max
sofar
(map string-length (car lolos))))))
(define (print-table alol)
(let* ([widths (max-widths alol)])
(header-line widths)
(body-lines alol widths)
(header-line widths)))
(define (header-line widths)
(display ":-")
(display (join (map (lambda (w) (make-string w #\-))
widths)
"-:-"))
(display "-:")
(newline))
(define (body-lines alol widths)
(unless (null? alol)
(apply printf
(cons (string-append
": "
(join (map (lambda (w) (format "~~~aa" w))
widths)
" : ")
" :\n")
(car alol)))
(body-lines (cdr alol) widths)))
(define (print-ff ffsets)
(print-table
(map (lambda (lst)
(let ([los (map symbol->string lst)])
(list (car los)
(join (cdr los) " "))))
ffsets)))
; makes string from list of symbols with spaces in between
(define (rhs->string rhs)
(if (null? rhs)
"ε"
(join (map symbol->string rhs) " ")))
(define (rule->string rule)
(string-append
(symbol->string (car rule))
" → "
(join (map rhs->string (cdr rule)) " | ")))
(define (print-predict alop)
(print-table
(map (lambda (pair)
(list
(rule->string (cons (caar pair)
(list (cdar pair))))
(join (map symbol->string (cadr pair)) " ")))
alop)))
(define (process port)
(let* ([grammar (parse-grammar port)]
[nts (map car grammar)]
[tokens (get-tokens grammar nts)]
[eps (get-eps grammar)]
[fflist (get-first-follow grammar nts tokens eps)]
[first (extract-ff fflist 'first nts)]
[follow (extract-ff fflist 'follow nts)]
[predict (get-predict grammar tokens eps first follow)])
(printf " TOKENS: ~a\n" (join (map symbol->string tokens) " "))
(printf "NONTERMINALS: ~a\n" (join (map symbol->string nts) " "))
(printf " EPS: ~a\n" (join (map symbol->string eps) " "))
(newline)
(printf "FIRST:\n")
(print-ff first)
(newline)
(printf "FOLLOW:\n")
(print-ff follow)
(newline)
(printf "PREDICT:\n")
(print-predict predict)
))
(define (usage)
(printf "Usage: ~a [INPUT_FILE]\n" (car (command-line)))
(printf "\n")
(printf "Reads grammar rules from INPUT_FILE (if given), or standard in,\n")
(printf "and shows the resulting FIRST, FOLLOW, and PREDICT sets for that grammar.\n")
(printf "Each rule should have the form:\n")
(printf " nonterminal -> soment someothernt SOMETOKEN SOMEOTHERTOKEN\n")
(printf "Also supported: alternation symbol |, \"epsilon\" or \"ε\" for epsilon productions, comments with #\n")
(exit 1))
(define (process-args args)
(when (not (null? args))
(if (string=? (substring (car args) 0 1) "-")
(usage)
(begin (call-with-input-file (car args) process)
(process-args (cdr args))))))
(let ([cl (command-line)])
(unless (string=? (car cl) "")
(if (null? (cdr cl))
(begin
(printf "Enter grammar rules below, followed by Ctrl-D\n")
(process (current-input-port)))
(process-args (cdr cl)))))