CSV syntax implementation

This commit is contained in:
Nikolay Rozhkov 2023-06-21 01:28:59 +03:00
parent 6077ba5405
commit a2c055ba5d
6 changed files with 260 additions and 160 deletions

View File

@ -15,92 +15,78 @@
<body>
<h1>Sankey diagram demos</h1>
<h2>Simple example</h2>
<pre class="mermaid">
sankey
node[title="hello, how are you?"]
node[title="hello, mister Sankey"]
First -> 30 -> Second
First -> 10 -> Third
Second -> 20 -> Third
</pre
>
<!-- node[title="hello, mister "sankey", backslash for you "] -->
<h2>Energy flow</h2>
<pre class="mermaid">
sankey
"Agricultural 'waste'" -> 124.729 -> "Bio-conversion"
"Bio-conversion" -> 0.597 -> "Liquid"
"Bio-conversion" -> 26.862 -> "Losses"
"Bio-conversion" -> 280.322 -> "Solid"
"Bio-conversion" -> 81.144 -> "Gas"
"Biofuel imports" -> 35 -> "Liquid"
"Biomass imports" -> 35 -> "Solid"
"Coal imports" -> 11.606 -> "Coal"
"Coal reserves" -> 63.965 -> "Coal"
"Coal" -> 75.571 -> "Solid"
"District heating" -> 10.639 -> "Industry"
"District heating" -> 22.505 -> "Heating and cooling - commercial"
"District heating" -> 46.184 -> "Heating and cooling - homes"
"Electricity grid" -> 104.453 -> "Over generation / exports"
"Electricity grid" -> 113.726 -> "Heating and cooling - homes"
"Electricity grid" -> 27.14 -> "H2 conversion"
"Electricity grid" -> 342.165 -> "Industry"
"Electricity grid" -> 37.797 -> "Road transport"
"Electricity grid" -> 4.412 -> "Agriculture"
"Electricity grid" -> 40.858 -> "Heating and cooling - commercial"
"Electricity grid" -> 56.691 -> "Losses"
"Electricity grid" -> 7.863 -> "Rail transport"
"Electricity grid" -> 90.008 -> "Lighting & appliances - commercial"
"Electricity grid" -> 93.494 -> "Lighting & appliances - homes"
"Gas imports" -> 40.719 -> "Ngas"
"Gas reserves" -> 82.233 -> "Ngas"
"Gas" -> 0.129 -> "Heating and cooling - commercial"
"Gas" -> 1.401 -> "Losses"
"Gas" -> 151.891 -> "Thermal generation"
"Gas" -> 2.096 -> "Agriculture"
"Gas" -> 48.58 -> "Industry"
"Geothermal" -> 7.013 -> "Electricity grid"
"H2 conversion" -> 20.897 -> "H2"
"H2 conversion" -> 6.242 -> "Losses"
"H2" -> 20.897 -> "Road transport"
"Hydro" -> 6.995 -> "Electricity grid"
"Liquid" -> 121.066 -> "Industry"
"Liquid" -> 128.69 -> "International shipping"
"Liquid" -> 135.835 -> "Road transport"
"Liquid" -> 14.458 -> "Domestic aviation"
"Liquid" -> 206.267 -> "International aviation"
"Liquid" -> 3.64 -> "Agriculture"
"Liquid" -> 33.218 -> "National navigation"
"Liquid" -> 4.413 -> "Rail transport"
"Marine algae" -> 4.375 -> "Bio-conversion"
"Ngas" -> 122.952 -> "Gas"
"Nuclear" -> 839.978 -> "Thermal generation"
"Oil imports" -> 504.287 -> "Oil"
"Oil reserves" -> 107.703 -> "Oil"
"Oil" -> 611.99 -> "Liquid"
"Other waste" -> 56.587 -> "Solid"
"Other waste" -> 77.81 -> "Bio-conversion"
"Pumped heat" -> 193.026 -> "Heating and cooling - homes"
"Pumped heat" -> 70.672 -> "Heating and cooling - commercial"
"Solar PV" -> 59.901 -> "Electricity grid"
"Solar Thermal" -> 19.263 -> "Heating and cooling - homes"
"Solar" -> 19.263 -> "Solar Thermal"
"Solar" -> 59.901 -> "Solar PV"
"Solid" -> 0.882 -> "Agriculture"
"Solid" -> 400.12 -> "Thermal generation"
"Solid" -> 46.477 -> "Industry"
"Thermal generation" -> 525.531 -> "Electricity grid"
"Thermal generation" -> 787.129 -> "Losses"
"Thermal generation" -> 79.329 -> "District heating"
"Tidal" -> 9.452 -> "Electricity grid"
"UK land based bioenergy" -> 182.01 -> "Bio-conversion"
"Wave" -> 19.013 -> "Electricity grid"
"Wind" -> 289.366 -> "Electricity grid"
Agricultural 'waste',Bio-conversion,124.729
Bio-conversion,Liquid,0.597
Bio-conversion,Losses,26.862
Bio-conversion,Solid,280.322
Bio-conversion,Gas,81.144
Biofuel imports,Liquid,35
Biomass imports,Solid,35
Coal imports,Coal,11.606
Coal reserves,Coal,63.965
Coal,Solid,75.571
District heating,Industry,10.639
District heating,Heating and cooling - commercial,22.505
District heating,Heating and cooling - homes,46.184
Electricity grid,Over generation / exports,104.453
Electricity grid,Heating and cooling - homes,113.726
Electricity grid,H2 conversion,27.14
Electricity grid,Industry,342.165
Electricity grid,Road transport,37.797
Electricity grid,Agriculture,4.412
Electricity grid,Heating and cooling - commercial,40.858
Electricity grid,Losses,56.691
Electricity grid,Rail transport,7.863
Electricity grid,Lighting & appliances - commercial,90.008
Electricity grid,Lighting & appliances - homes,93.494
Gas imports,Ngas,40.719
Gas reserves,Ngas,82.233
Gas,Heating and cooling - commercial,0.129
Gas,Losses,1.401
Gas,Thermal generation,151.891
Gas,Agriculture,2.096
Gas,Industry,48.58
Geothermal,Electricity grid,7.013
H2 conversion,H2,20.897
H2 conversion,Losses,6.242
H2,Road transport,20.897
Hydro,Electricity grid,6.995
Liquid,Industry,121.066
Liquid,International shipping,128.69
Liquid,Road transport,135.835
Liquid,Domestic aviation,14.458
Liquid,International aviation,206.267
Liquid,Agriculture,3.64
Liquid,National navigation,33.218
Liquid,Rail transport,4.413
Marine algae,Bio-conversion,4.375
Ngas,Gas,122.952
Nuclear,Thermal generation,839.978
Oil imports,Oil,504.287
Oil reserves,Oil,107.703
Oil,Liquid,611.99
Other waste,Solid,56.587
Other waste,Bio-conversion,77.81
Pumped heat,Heating and cooling - homes,193.026
Pumped heat,Heating and cooling - commercial,70.672
Solar PV,Electricity grid,59.901
Solar Thermal,Heating and cooling - homes,19.263
Solar,Solar Thermal,19.263
Solar,Solar PV,59.901
Solid,Agriculture,0.882
Solid,Thermal generation,400.12
Solid,Industry,46.477
Thermal generation,Electricity grid,525.531
Thermal generation,Losses,787.129
Thermal generation,District heating,79.329
Tidal,Electricity grid,9.452
UK land based bioenergy,Bio-conversion,182.01
Wave,Electricity grid,19.013
Wind,Electricity grid,289.366
</pre
>

View File

@ -1,4 +1,3 @@
source,target,value
Agricultural 'waste',Bio-conversion,124.729
Bio-conversion,Liquid,0.597
Bio-conversion,Losses,26.862
1 source Agricultural 'waste' target Bio-conversion value 124.729
source target value
1 Agricultural 'waste' Agricultural 'waste' Bio-conversion Bio-conversion 124.729 124.729
2 Bio-conversion Bio-conversion Liquid Liquid 0.597 0.597
3 Bio-conversion Bio-conversion Losses Losses 26.862 26.862

View File

@ -0,0 +1,105 @@
/** mermaid */
%lex
TOKEN \w+
NUM \d+(.\d+)?
%options case-insensitive
%options easy_keword_rules
%s link_value
%x attributes
%x attr_value
%x string
%%
//--------------------------------------------------------------
// skip all whitespace EXCEPT newlines, but not within a string
//--------------------------------------------------------------
<INITIAL,link_value,attributes,attr_value>[^\S\r\n]+ {}
//--------------
// basic tokens
//--------------
(<<EOF>>|[\n;])+ { return 'EOS'; } // end of statement is semicolon ; new line \n or end of file
"sankey" { return 'SANKEY'; }
<INITIAL>{TOKEN} { return 'NODE_ID'; }
<link_value>{NUM} { return 'AMOUNT'; }
"->" {
if(this.topState()!=='link_value') this.pushState('link_value');
else this.popState();
return 'ARROW';
}
//------------
// attributes
//------------
"[" { this.pushState('attributes'); return 'OPEN_ATTRIBUTES'; }
<attributes>"]" { this.popState(); return 'CLOSE_ATTRIBUTES'; }
<attributes>{TOKEN} { return 'ATTRIBUTE'; }
<attributes>\= { this.pushState('attr_value'); return 'EQUAL'; }
<attr_value>{TOKEN} { this.popState(); return 'VALUE'; }
//------------
// strings
//------------
<INITIAL,attributes,attr_value>\" { this.pushState('string'); return 'OPEN_STRING'; }
<string>(?!\\)\" {
if(this.topState()==='string') this.popState();
if(this.topState()==='attr_value') this.popState();
return 'CLOSE_STRING';
}
<string>([^"\\]|\\\"|\\\\)+ { return 'STRING'; }
/lex
%start start
%left ARROW
%% // language grammar
start
: EOS SANKEY document
| SANKEY document
;
document
: line document
|
;
line
: node optional_attributes EOS
| stream optional_attributes EOS
| EOS
;
optional_attributes: OPEN_ATTRIBUTES attributes CLOSE_ATTRIBUTES | ;
attributes: attribute attributes | ;
attribute: ATTRIBUTE EQUAL value | ATTRIBUTE;
value: VALUE | OPEN_STRING STRING CLOSE_STRING;
stream
: node\[source] ARROW amount ARROW tail\[target] {
$$=$source;
yy.addLink($source, $target, $amount);
}
;
tail
: stream { $$ = $stream }
| node { $$ = $node; }
;
amount: AMOUNT { $$=parseFloat($AMOUNT); };
node
: NODE_ID { $$ = yy.findOrCreateNode($NODE_ID); }
| OPEN_STRING STRING\[node_label] CLOSE_STRING { $$ = yy.findOrCreateNode($node_label); }
;

View File

@ -1,105 +1,82 @@
/** mermaid */
//----------------------------------------------------
// We support csv format as defined there
// CSV format // https://www.ietf.org/rfc/rfc4180.txt
//----------------------------------------------------
%lex
TOKEN \w+
NUM \d+(.\d+)?
%options case-insensitive
%options easy_keword_rules
%s link_value
%x attributes
%x attr_value
%x string
// as per section 6.1 of RFC 2234 [2]
COMMA \u002C
CR \u000D
LF \u000A
CRLF \u000D\u000A
DQUOTE \u0022
TEXTDATA [\u0020-\u0021\u0023-\u002B\u002D-\u007E]
%%
//--------------------------------------------------------------
// skip all whitespace EXCEPT newlines, but not within a string
//--------------------------------------------------------------
<INITIAL,link_value,attributes,attr_value>[^\S\r\n]+ {}
<<EOF>> { return 'EOF' }
//--------------
// basic tokens
//--------------
(<<EOF>>|[\n;])+ { return 'EOS'; } // end of statement is semicolon ; new line \n or end of file
"sankey" { return 'SANKEY'; }
<INITIAL>{TOKEN} { return 'NODE_ID'; }
<link_value>{NUM} { return 'AMOUNT'; }
"->" {
if(this.topState()!=='link_value') this.pushState('link_value');
else this.popState();
return 'ARROW';
}
//------------
// attributes
//------------
"[" { this.pushState('attributes'); return 'OPEN_ATTRIBUTES'; }
<attributes>"]" { this.popState(); return 'CLOSE_ATTRIBUTES'; }
<attributes>{TOKEN} { return 'ATTRIBUTE'; }
<attributes>\= { this.pushState('attr_value'); return 'EQUAL'; }
<attr_value>{TOKEN} { this.popState(); return 'VALUE'; }
//------------
// strings
//------------
<INITIAL,attributes,attr_value>\" { this.pushState('string'); return 'OPEN_STRING'; }
<string>(?!\\)\" {
if(this.topState()==='string') this.popState();
if(this.topState()==='attr_value') this.popState();
return 'CLOSE_STRING';
}
<string>([^"\\]|\\\"|\\\\)+ { return 'STRING'; }
"sankey" { return 'SANKEY' }
{COMMA} { return 'COMMA' }
{DQUOTE} { return 'DQUOTE' }
({CRLF}|{LF}) { return 'NEWLINE' }
{TEXTDATA}* { return 'NON_ESCAPED_TEXT' }
({TEXTDATA}|{COMMA}|{CR}|{LF}|{DQUOTE}{DQUOTE})* { return 'ESCAPED_TEXT' }
/lex
%start start
%left ARROW
%% // language grammar
start
: EOS SANKEY document
| SANKEY document
: SANKEY file opt_eof
;
document
: line document
|
file: csv opt_newline;
csv
: record csv_tail
;
line
: node optional_attributes EOS
| stream optional_attributes EOS
| EOS
csv_tail
: NEWLINE csv
| // empty
;
optional_attributes: OPEN_ATTRIBUTES attributes CLOSE_ATTRIBUTES | ;
attributes: attribute attributes | ;
attribute: ATTRIBUTE EQUAL value | ATTRIBUTE;
value: VALUE | OPEN_STRING STRING CLOSE_STRING;
stream
: node\[source] ARROW amount ARROW tail\[target] {
$$=$source;
yy.addLink($source, $target, $amount);
}
opt_newline
: NEWLINE
| // empty
;
tail
: stream { $$ = $stream }
| node { $$ = $node; }
;
amount: AMOUNT { $$=parseFloat($AMOUNT); };
node
: NODE_ID { $$ = yy.findOrCreateNode($NODE_ID); }
| OPEN_STRING STRING\[node_label] CLOSE_STRING { $$ = yy.findOrCreateNode($node_label); }
opt_eof
: EOF
| // empty
;
record
: field\[source] COMMA field\[target] COMMA field\[value] {
const source = yy.findOrCreateNode($source);
const target = yy.findOrCreateNode($target);
const value = parseFloat($value);
$$ = yy.addLink(source,target,value);
} // parse only 3 fields, this is not part of standard
| // allow empty record to handle empty lines, this is not part of csv standard either
;
field
: escaped { $$=$escaped; }
| non_escaped { $$=$non_escaped; }
;
escaped: DQUOTE ESCAPED_TEXT DQUOTE { $$=$ESCAPED_TEXT; };
non_escaped: NON_ESCAPED_TEXT { $$=$NON_ESCAPED_TEXT; };

View File

@ -0,0 +1,33 @@
// @ts-ignore: jison doesn't export types
import diagram from './sankey.jison';
// @ts-ignore: jison doesn't export types
import { parser } from './sankey.jison';
import db from '../sankeyDB.js';
// import { fail } from 'assert';
describe('Sankey diagram', function () {
// TODO - these examples should be put into ./parser/stateDiagram.spec.js
describe('when parsing an info graph it', function () {
beforeEach(function () {
parser.yy = db;
diagram.parser.yy = db;
diagram.parser.yy.clear();
});
it('parses csv', async () => {
const fs = require('fs');
const path = require('path').resolve(__dirname, "./energy.csv");
await fs.readFile(path, 'utf8', (err: Error, data: string) => {
if (err) throw(err);
const str = `sankey\\n${data}`;
parser.parse(str);
});
});
});
});