CSV syntax implementation

This commit is contained in:
Nikolay Rozhkov 2023-06-21 01:28:59 +03:00
parent 6077ba5405
commit a2c055ba5d
6 changed files with 260 additions and 160 deletions

View File

@ -15,92 +15,78 @@
<body> <body>
<h1>Sankey diagram demos</h1> <h1>Sankey diagram demos</h1>
<h2>Simple example</h2>
<pre class="mermaid">
sankey
node[title="hello, how are you?"]
node[title="hello, mister Sankey"]
First -> 30 -> Second
First -> 10 -> Third
Second -> 20 -> Third
</pre
>
<!-- node[title="hello, mister "sankey", backslash for you "] -->
<h2>Energy flow</h2> <h2>Energy flow</h2>
<pre class="mermaid"> <pre class="mermaid">
sankey sankey
"Agricultural 'waste'" -> 124.729 -> "Bio-conversion" Agricultural 'waste',Bio-conversion,124.729
"Bio-conversion" -> 0.597 -> "Liquid" Bio-conversion,Liquid,0.597
"Bio-conversion" -> 26.862 -> "Losses" Bio-conversion,Losses,26.862
"Bio-conversion" -> 280.322 -> "Solid" Bio-conversion,Solid,280.322
"Bio-conversion" -> 81.144 -> "Gas" Bio-conversion,Gas,81.144
"Biofuel imports" -> 35 -> "Liquid" Biofuel imports,Liquid,35
"Biomass imports" -> 35 -> "Solid" Biomass imports,Solid,35
"Coal imports" -> 11.606 -> "Coal" Coal imports,Coal,11.606
"Coal reserves" -> 63.965 -> "Coal" Coal reserves,Coal,63.965
"Coal" -> 75.571 -> "Solid" Coal,Solid,75.571
"District heating" -> 10.639 -> "Industry" District heating,Industry,10.639
"District heating" -> 22.505 -> "Heating and cooling - commercial" District heating,Heating and cooling - commercial,22.505
"District heating" -> 46.184 -> "Heating and cooling - homes" District heating,Heating and cooling - homes,46.184
"Electricity grid" -> 104.453 -> "Over generation / exports" Electricity grid,Over generation / exports,104.453
"Electricity grid" -> 113.726 -> "Heating and cooling - homes" Electricity grid,Heating and cooling - homes,113.726
"Electricity grid" -> 27.14 -> "H2 conversion" Electricity grid,H2 conversion,27.14
"Electricity grid" -> 342.165 -> "Industry" Electricity grid,Industry,342.165
"Electricity grid" -> 37.797 -> "Road transport" Electricity grid,Road transport,37.797
"Electricity grid" -> 4.412 -> "Agriculture" Electricity grid,Agriculture,4.412
"Electricity grid" -> 40.858 -> "Heating and cooling - commercial" Electricity grid,Heating and cooling - commercial,40.858
"Electricity grid" -> 56.691 -> "Losses" Electricity grid,Losses,56.691
"Electricity grid" -> 7.863 -> "Rail transport" Electricity grid,Rail transport,7.863
"Electricity grid" -> 90.008 -> "Lighting & appliances - commercial" Electricity grid,Lighting & appliances - commercial,90.008
"Electricity grid" -> 93.494 -> "Lighting & appliances - homes" Electricity grid,Lighting & appliances - homes,93.494
"Gas imports" -> 40.719 -> "Ngas" Gas imports,Ngas,40.719
"Gas reserves" -> 82.233 -> "Ngas" Gas reserves,Ngas,82.233
"Gas" -> 0.129 -> "Heating and cooling - commercial" Gas,Heating and cooling - commercial,0.129
"Gas" -> 1.401 -> "Losses" Gas,Losses,1.401
"Gas" -> 151.891 -> "Thermal generation" Gas,Thermal generation,151.891
"Gas" -> 2.096 -> "Agriculture" Gas,Agriculture,2.096
"Gas" -> 48.58 -> "Industry" Gas,Industry,48.58
"Geothermal" -> 7.013 -> "Electricity grid" Geothermal,Electricity grid,7.013
"H2 conversion" -> 20.897 -> "H2" H2 conversion,H2,20.897
"H2 conversion" -> 6.242 -> "Losses" H2 conversion,Losses,6.242
"H2" -> 20.897 -> "Road transport" H2,Road transport,20.897
"Hydro" -> 6.995 -> "Electricity grid" Hydro,Electricity grid,6.995
"Liquid" -> 121.066 -> "Industry" Liquid,Industry,121.066
"Liquid" -> 128.69 -> "International shipping" Liquid,International shipping,128.69
"Liquid" -> 135.835 -> "Road transport" Liquid,Road transport,135.835
"Liquid" -> 14.458 -> "Domestic aviation" Liquid,Domestic aviation,14.458
"Liquid" -> 206.267 -> "International aviation" Liquid,International aviation,206.267
"Liquid" -> 3.64 -> "Agriculture" Liquid,Agriculture,3.64
"Liquid" -> 33.218 -> "National navigation" Liquid,National navigation,33.218
"Liquid" -> 4.413 -> "Rail transport" Liquid,Rail transport,4.413
"Marine algae" -> 4.375 -> "Bio-conversion" Marine algae,Bio-conversion,4.375
"Ngas" -> 122.952 -> "Gas" Ngas,Gas,122.952
"Nuclear" -> 839.978 -> "Thermal generation" Nuclear,Thermal generation,839.978
"Oil imports" -> 504.287 -> "Oil" Oil imports,Oil,504.287
"Oil reserves" -> 107.703 -> "Oil" Oil reserves,Oil,107.703
"Oil" -> 611.99 -> "Liquid" Oil,Liquid,611.99
"Other waste" -> 56.587 -> "Solid" Other waste,Solid,56.587
"Other waste" -> 77.81 -> "Bio-conversion" Other waste,Bio-conversion,77.81
"Pumped heat" -> 193.026 -> "Heating and cooling - homes" Pumped heat,Heating and cooling - homes,193.026
"Pumped heat" -> 70.672 -> "Heating and cooling - commercial" Pumped heat,Heating and cooling - commercial,70.672
"Solar PV" -> 59.901 -> "Electricity grid" Solar PV,Electricity grid,59.901
"Solar Thermal" -> 19.263 -> "Heating and cooling - homes" Solar Thermal,Heating and cooling - homes,19.263
"Solar" -> 19.263 -> "Solar Thermal" Solar,Solar Thermal,19.263
"Solar" -> 59.901 -> "Solar PV" Solar,Solar PV,59.901
"Solid" -> 0.882 -> "Agriculture" Solid,Agriculture,0.882
"Solid" -> 400.12 -> "Thermal generation" Solid,Thermal generation,400.12
"Solid" -> 46.477 -> "Industry" Solid,Industry,46.477
"Thermal generation" -> 525.531 -> "Electricity grid" Thermal generation,Electricity grid,525.531
"Thermal generation" -> 787.129 -> "Losses" Thermal generation,Losses,787.129
"Thermal generation" -> 79.329 -> "District heating" Thermal generation,District heating,79.329
"Tidal" -> 9.452 -> "Electricity grid" Tidal,Electricity grid,9.452
"UK land based bioenergy" -> 182.01 -> "Bio-conversion" UK land based bioenergy,Bio-conversion,182.01
"Wave" -> 19.013 -> "Electricity grid" Wave,Electricity grid,19.013
"Wind" -> 289.366 -> "Electricity grid" Wind,Electricity grid,289.366
</pre </pre
> >

View File

@ -1,4 +1,3 @@
source,target,value
Agricultural 'waste',Bio-conversion,124.729 Agricultural 'waste',Bio-conversion,124.729
Bio-conversion,Liquid,0.597 Bio-conversion,Liquid,0.597
Bio-conversion,Losses,26.862 Bio-conversion,Losses,26.862
1 source Agricultural 'waste' target Bio-conversion value 124.729
source target value
1 Agricultural 'waste' Agricultural 'waste' Bio-conversion Bio-conversion 124.729 124.729
2 Bio-conversion Bio-conversion Liquid Liquid 0.597 0.597
3 Bio-conversion Bio-conversion Losses Losses 26.862 26.862

View File

@ -0,0 +1,105 @@
/** mermaid */
%lex
TOKEN \w+
NUM \d+(.\d+)?
%options case-insensitive
%options easy_keword_rules
%s link_value
%x attributes
%x attr_value
%x string
%%
//--------------------------------------------------------------
// skip all whitespace EXCEPT newlines, but not within a string
//--------------------------------------------------------------
<INITIAL,link_value,attributes,attr_value>[^\S\r\n]+ {}
//--------------
// basic tokens
//--------------
(<<EOF>>|[\n;])+ { return 'EOS'; } // end of statement is semicolon ; new line \n or end of file
"sankey" { return 'SANKEY'; }
<INITIAL>{TOKEN} { return 'NODE_ID'; }
<link_value>{NUM} { return 'AMOUNT'; }
"->" {
if(this.topState()!=='link_value') this.pushState('link_value');
else this.popState();
return 'ARROW';
}
//------------
// attributes
//------------
"[" { this.pushState('attributes'); return 'OPEN_ATTRIBUTES'; }
<attributes>"]" { this.popState(); return 'CLOSE_ATTRIBUTES'; }
<attributes>{TOKEN} { return 'ATTRIBUTE'; }
<attributes>\= { this.pushState('attr_value'); return 'EQUAL'; }
<attr_value>{TOKEN} { this.popState(); return 'VALUE'; }
//------------
// strings
//------------
<INITIAL,attributes,attr_value>\" { this.pushState('string'); return 'OPEN_STRING'; }
<string>(?!\\)\" {
if(this.topState()==='string') this.popState();
if(this.topState()==='attr_value') this.popState();
return 'CLOSE_STRING';
}
<string>([^"\\]|\\\"|\\\\)+ { return 'STRING'; }
/lex
%start start
%left ARROW
%% // language grammar
start
: EOS SANKEY document
| SANKEY document
;
document
: line document
|
;
line
: node optional_attributes EOS
| stream optional_attributes EOS
| EOS
;
optional_attributes: OPEN_ATTRIBUTES attributes CLOSE_ATTRIBUTES | ;
attributes: attribute attributes | ;
attribute: ATTRIBUTE EQUAL value | ATTRIBUTE;
value: VALUE | OPEN_STRING STRING CLOSE_STRING;
stream
: node\[source] ARROW amount ARROW tail\[target] {
$$=$source;
yy.addLink($source, $target, $amount);
}
;
tail
: stream { $$ = $stream }
| node { $$ = $node; }
;
amount: AMOUNT { $$=parseFloat($AMOUNT); };
node
: NODE_ID { $$ = yy.findOrCreateNode($NODE_ID); }
| OPEN_STRING STRING\[node_label] CLOSE_STRING { $$ = yy.findOrCreateNode($node_label); }
;

View File

@ -1,105 +1,82 @@
/** mermaid */ /** mermaid */
//----------------------------------------------------
// We support csv format as defined there
// CSV format // https://www.ietf.org/rfc/rfc4180.txt
//----------------------------------------------------
%lex %lex
TOKEN \w+
NUM \d+(.\d+)?
%options case-insensitive %options case-insensitive
%options easy_keword_rules %options easy_keword_rules
%s link_value // as per section 6.1 of RFC 2234 [2]
COMMA \u002C
%x attributes CR \u000D
%x attr_value LF \u000A
%x string CRLF \u000D\u000A
DQUOTE \u0022
TEXTDATA [\u0020-\u0021\u0023-\u002B\u002D-\u007E]
%% %%
//--------------------------------------------------------------
// skip all whitespace EXCEPT newlines, but not within a string
//--------------------------------------------------------------
<INITIAL,link_value,attributes,attr_value>[^\S\r\n]+ {} <<EOF>> { return 'EOF' }
//-------------- "sankey" { return 'SANKEY' }
// basic tokens {COMMA} { return 'COMMA' }
//-------------- {DQUOTE} { return 'DQUOTE' }
({CRLF}|{LF}) { return 'NEWLINE' }
(<<EOF>>|[\n;])+ { return 'EOS'; } // end of statement is semicolon ; new line \n or end of file {TEXTDATA}* { return 'NON_ESCAPED_TEXT' }
"sankey" { return 'SANKEY'; } ({TEXTDATA}|{COMMA}|{CR}|{LF}|{DQUOTE}{DQUOTE})* { return 'ESCAPED_TEXT' }
<INITIAL>{TOKEN} { return 'NODE_ID'; }
<link_value>{NUM} { return 'AMOUNT'; }
"->" {
if(this.topState()!=='link_value') this.pushState('link_value');
else this.popState();
return 'ARROW';
}
//------------
// attributes
//------------
"[" { this.pushState('attributes'); return 'OPEN_ATTRIBUTES'; }
<attributes>"]" { this.popState(); return 'CLOSE_ATTRIBUTES'; }
<attributes>{TOKEN} { return 'ATTRIBUTE'; }
<attributes>\= { this.pushState('attr_value'); return 'EQUAL'; }
<attr_value>{TOKEN} { this.popState(); return 'VALUE'; }
//------------
// strings
//------------
<INITIAL,attributes,attr_value>\" { this.pushState('string'); return 'OPEN_STRING'; }
<string>(?!\\)\" {
if(this.topState()==='string') this.popState();
if(this.topState()==='attr_value') this.popState();
return 'CLOSE_STRING';
}
<string>([^"\\]|\\\"|\\\\)+ { return 'STRING'; }
/lex /lex
%start start %start start
%left ARROW
%% // language grammar %% // language grammar
start start
: EOS SANKEY document : SANKEY file opt_eof
| SANKEY document
; ;
document file: csv opt_newline;
: line document
| csv
: record csv_tail
; ;
line csv_tail
: node optional_attributes EOS : NEWLINE csv
| stream optional_attributes EOS | // empty
| EOS
; ;
optional_attributes: OPEN_ATTRIBUTES attributes CLOSE_ATTRIBUTES | ; opt_newline
: NEWLINE
attributes: attribute attributes | ; | // empty
attribute: ATTRIBUTE EQUAL value | ATTRIBUTE;
value: VALUE | OPEN_STRING STRING CLOSE_STRING;
stream
: node\[source] ARROW amount ARROW tail\[target] {
$$=$source;
yy.addLink($source, $target, $amount);
}
; ;
tail opt_eof
: stream { $$ = $stream } : EOF
| node { $$ = $node; } | // empty
;
amount: AMOUNT { $$=parseFloat($AMOUNT); };
node
: NODE_ID { $$ = yy.findOrCreateNode($NODE_ID); }
| OPEN_STRING STRING\[node_label] CLOSE_STRING { $$ = yy.findOrCreateNode($node_label); }
; ;
record
: field\[source] COMMA field\[target] COMMA field\[value] {
const source = yy.findOrCreateNode($source);
const target = yy.findOrCreateNode($target);
const value = parseFloat($value);
$$ = yy.addLink(source,target,value);
} // parse only 3 fields, this is not part of standard
| // allow empty record to handle empty lines, this is not part of csv standard either
;
field
: escaped { $$=$escaped; }
| non_escaped { $$=$non_escaped; }
;
escaped: DQUOTE ESCAPED_TEXT DQUOTE { $$=$ESCAPED_TEXT; };
non_escaped: NON_ESCAPED_TEXT { $$=$NON_ESCAPED_TEXT; };

View File

@ -0,0 +1,33 @@
// @ts-ignore: jison doesn't export types
import diagram from './sankey.jison';
// @ts-ignore: jison doesn't export types
import { parser } from './sankey.jison';
import db from '../sankeyDB.js';
// import { fail } from 'assert';
describe('Sankey diagram', function () {
// TODO - these examples should be put into ./parser/stateDiagram.spec.js
describe('when parsing an info graph it', function () {
beforeEach(function () {
parser.yy = db;
diagram.parser.yy = db;
diagram.parser.yy.clear();
});
it('parses csv', async () => {
const fs = require('fs');
const path = require('path').resolve(__dirname, "./energy.csv");
await fs.readFile(path, 'utf8', (err: Error, data: string) => {
if (err) throw(err);
const str = `sankey\\n${data}`;
parser.parse(str);
});
});
});
});