#!/usr/bin/env bash sed -ne '/^ [[:digit:]]\{4\}/{s/^ *//;s/ */\t/g;p}' timeline.txt | awk ' BEGIN { FS="\t"; OFS="" print "@startuml" print "digraph lang {" print "ranksep=.2" print "size=\"90,90\"" print "node [fontsize=24];" gpos=0 } { # # if theres no language, skip this line # if ($2 == "") next # # extract the relevant values # year=substr($1,1,4) # # It seems I have to limit the quantity of data I send to GraphViz. # if (year > 2005) next # # lowercase everything because capitalization is inconsistent in the article. # lang=tolower($2) # # clean off extra stuff after the language name # if ( index(lang,"(") ) lang = substr(lang,1,index(lang,"(")-1) # # trim leading and trailing space # sub(/^ */,"",lang) sub(/ *$/,"",lang) # # if weve seen this before, skip it # if (lang in langlist) next langlist[lang] = 0 # # build an array that we will use to create ranks for each language # langyear[year][length(langyear[year])+1]=lang # # create edges from predecessors to successors # This goes into an array because I need # to print it after the summary information # that gets generated in the END section. # split(tolower($4),pre,",") for (i in pre) { pl = pre[i] if ( index(pl,"(") ) pl = substr(pl,1,index(pl,"(")-1) sub(/^ */,"",pl) sub(/ *$/,"",pl) if (pl != "none" ) { glines[++gpos]="\"" pl "\" -> \"" lang "\";" # # roughly keep score so that I filter out languages # that have no relationship with another language. # langlist[pl]++ langlist[lang]++ } } } END { ORS="" # # create a subgraph for the years. # This is basically the organizational backbone of the graph. # print "{\n" print "node [shape=plaintext];\n" for (d in langyear) { if ( lastd != "" ) print "\"" lastd "\" -> \"" d "\";\n" lastd = d } print "}\n" # # create subgraphs that rank each language with its year # for (d in langyear) { print "{rank=same; \"" d "\"; " for (l in langyear[d]) { dl=langyear[d][l] if (dl != "none" ) if (langlist[dl]) print " \"" dl "\"; " } print "}\n" } ORS="\n" # # print out the rest of the nodes and edges # for (l in glines) print glines[l] print "}" print "@endmul" }'