import java.nio.file.Path;
import java.nio.file.Files;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/** Utility to read a .dot "graphviz" file into a Graph.
* Sample usage:
* Graph g = DotReader.readFrom("mygraph.dot");
* */
public class DotReader {
/** Reads the filename into a new graph object.
* @param filename the name of a dot-formatted file.
*/
public static Graph readFrom(String filename) {
Graph g = new MyGraph();
// read entire file into a string
String fileString = null;
try {
fileString = Files.readString(Path.of(filename));
}
catch (java.io.IOException e) {
throw new RuntimeException(e);
}
Matcher outer = OUTER.matcher(fileString);
if (!outer.lookingAt())
throw new RuntimeException("file does not match dot syntax");
Matcher lines = LINE.matcher(outer.group("tail"));
boolean endLines = false;
while (!endLines && lines.find()) {
endLines = lines.group("finish") != null;
Matcher firstNode = NODE.matcher(lines.group());
if (!firstNode.lookingAt())
continue;
String src = extractLabel(firstNode);
g.addVertex(src);
Matcher edgeNode = EDGE_NODE.matcher(firstNode.group("tail"));
while (edgeNode.lookingAt()) {
String dest = extractLabel(edgeNode);
g.addVertex(dest);
g.putEdge(src, dest, true);
if (edgeNode.group("edge").equals("--")) {
// undirected edge - add both ways
g.putEdge(dest, src, true);
}
// look for more edge connections
src = dest;
edgeNode = EDGE_NODE.matcher(edgeNode.group("tail"));
}
}
return g;
}
private static String extractLabel(Matcher mat) {
String label = mat.group("label1");
return (label == null) ? mat.group("label2") : label;
}
private static Pattern OUTER = Pattern.compile(
"\\s*(strict\\s*)?(di)?graph\\s*" // file header
+ "\\w*\\s*" // optional graph name
+ "\\{" // opening curly brace
+ "(?<tail>.*)"
, Pattern.CASE_INSENSITIVE | Pattern.DOTALL
);
private static Pattern LINE = Pattern.compile(
"(\\\\." // ignore any backslash-escaped character
+ "|\\'(\\\\.|[^'])*\\'" // single-quoted strings
+ "|\\\"(\\\\.|[^\"])*\\\"" // double-quoted strings
+ "|[^;\\}\\n]" // otherwise take anything except semicolon or newline
+ ")*(;|$|(?<finish>\\}))"
, Pattern.DOTALL | Pattern.MULTILINE
);
private static Pattern NODE = Pattern.compile(
"\\s*"
+ "((?<label1>\\w+)|\\\"(?<label2>(\\\\.|[^\"])*)\\\")" // word or double-quoted
+ "(?<tail>.*)"
, Pattern.DOTALL
);
private static Pattern EDGE_NODE = Pattern.compile(
"\\s*(?<edge>--|->)" // edge indicator
+ "\\s*"
+ "((?<label1>\\w+)|\\\"(?<label2>(\\\\.|[^\"])*)\\\")" // word or double-quoted
+ "(?<tail>.*)"
, Pattern.DOTALL
);
}