farolanf's solution to "Add additional MERGE functionality to existing Node script"

Changes<\/h2>\n\n
    \n
  • Added -r<\/code> option to traverse merge dir recursively to find json files<\/li>\n
  • Added -f pattern<\/code> option to filter filenames (pattern is case insensitive)<\/li>\n
  • Added -g<\/code> to show basic progress <\/li>\n<\/ul>\n\n

    Example<\/h2>\n
    spmer<\/span> <\/span>-<\/span>m<\/span> <\/span>CA<\/span> <\/span>-<\/span>o<\/span> <\/span>sample<\/span>.<\/span>json<\/span> <\/span>-<\/span>rf<\/span> <\/span>OSLER<\/span>\n<\/pre><\/div>\n

    Merge files in CA folder, output to sample.json, recursively (-r<\/code>), for filenames containing OSLER only (-f pattern<\/code>)<\/p>\n\n

    spmer.js<\/h2>\n
    #<\/span>!/<\/span>usr<\/span>/<\/span>bin<\/span>/<\/span>env<\/span> <\/span>node<\/span>\nvar<\/span> <\/span>fs<\/span> <\/span>=<\/span> <\/span>require<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>fs&quot<\/span>;);<\/span>\nvar<\/span> <\/span>path<\/span> <\/span>=<\/span> <\/span>require<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>path&quot<\/span>;);<\/span>\nconst<\/span> <\/span>clu<\/span> <\/span>=<\/span> <\/span>require<\/span>(<\/span>'command-line-usage');<\/span>\nconst<\/span> <\/span>cla<\/span> <\/span>=<\/span> <\/span>require<\/span>(<\/span>'command-line-args');<\/span>\n\nconst<\/span> <\/span>splitOptions<\/span> <\/span>=<\/span> <\/span>[<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'split', alias: 's', type: String, arg: 'file', desc: 'a json-line file to split' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'name-key', alias: 'n', type: String, arg: 'key', desc: 'key for the name of file, will groups objects with the same file' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'path-key', alias: 'p', type: String, arg: 'key', desc: 'key for the output path' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'omit-name', alias: 't', type: Boolean, desc: 'omit the name key' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'omit-path', alias: 'u', type: Boolean, desc: 'omit the path key' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'out-key', alias: 'k', type: String, arg: 'key', desc: 'output the groups as array value of this key' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'append', alias: 'a', type: Boolean, desc: 'append to existing files' },<\/span>\n];<\/span>\n\nconst<\/span> <\/span>mergeOptions<\/span> <\/span>=<\/span> <\/span>[<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'merge', alias: 'm', type: String, arg: 'dir', desc: 'dir with json files to merge' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'filter', alias: 'f', type: String, arg: 'pattern', desc: 'filter files according to pattern' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'recursive', alias: 'r', type: Boolean, desc: 'traverse dir recursively' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'merge-output', alias: 'o', type: String, arg: 'file', desc: 'merge output file' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'index', alias: 'x', type: String, arg: 'ESJSON index key', desc: 'specify index key for ESJSON' },<\/span>\n];<\/span>\n\nconst<\/span> <\/span>generalOptions<\/span> <\/span>=<\/span> <\/span>[<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'progress', alias: 'g', type: Boolean, desc: 'show progress' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'output-dir', alias: 'd', type: String, defaultValue: '.', arg: 'dir', desc: 'root output dir, defaults to current dir' },<\/span>\n  <\/span>{<\/span> <\/span>name:<\/span> <\/span>'help', alias: 'h', type: Boolean, desc: 'show this help' },<\/span>\n];<\/span>\n\nconst<\/span> <\/span>optionDefs<\/span> <\/span>=<\/span> <\/span>splitOptions<\/span>.<\/span>concat<\/span>(<\/span>mergeOptions<\/span>)<\/span>.<\/span>concat<\/span>(<\/span>generalOptions<\/span>);<\/span>\n\nconst<\/span> <\/span>help<\/span> <\/span>=<\/span> <\/span>[<\/span>\n  <\/span>{<\/span>\n    <\/span>header:<\/span> <\/span>'spmer',<\/span>\n    <\/span>content:<\/span> <\/span>[<\/span>\n      <\/span>'Split a json-line file or merge json files.',<\/span>\n      <\/span>'',<\/span>\n      <\/span>'A json-line file is a file containing valid json on each line.',<\/span>\n    <\/span>],<\/span>\n  <\/span>},<\/span>\n  <\/span>{<\/span>\n    <\/span>header:<\/span> <\/span>'Usage',<\/span>\n    <\/span>content:<\/span> <\/span>[<\/span>\n      <\/span>'node spmer.js -s FILE [options]',<\/span>\n      <\/span>'node spmer.js -m DIR [options]',<\/span>\n      <\/span>'',<\/span>\n      <\/span>'The first form to split FILE.',<\/span>\n      <\/span>'The second form to merge files in DIR.',<\/span>\n    <\/span>],<\/span>\n  <\/span>},<\/span>\n  <\/span>getSectionOption<\/span>(<\/span>'Split options', splitOptions),<\/span>\n  <\/span>getSectionOption<\/span>(<\/span>'Merge options', mergeOptions),<\/span>\n  <\/span>getSectionOption<\/span>(<\/span>'General options', generalOptions),<\/span>\n];<\/span>\n\nfunction<\/span> <\/span>getSectionOption<\/span>(<\/span>title<\/span>,<\/span> <\/span>optionDef<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>return<\/span> <\/span>{<\/span>\n    <\/span>header:<\/span> <\/span>title<\/span>,<\/span>\n    <\/span>content:<\/span> <\/span>optionDef<\/span>.<\/span>map<\/span>(<\/span>o<\/span> <\/span>=><\/span> <\/span>({<\/span> <\/span>\n      <\/span>a:<\/span> <\/span>'-' + o.alias, <\/span>\n      <\/span>b:<\/span> <\/span>'--' + o.name + ' ' + (o.arg || ''), <\/span>\n      <\/span>c:<\/span> <\/span>o<\/span>.<\/span>desc<\/span> <\/span>})),<\/span>\n  <\/span>};<\/span>\n}<\/span>\n\n//<\/span> <\/span>parse<\/span> <\/span>options<\/span>\nconst<\/span> <\/span>opts<\/span> <\/span>=<\/span> <\/span>cla<\/span>(<\/span>optionDefs<\/span>);<\/span>\n//<\/span> <\/span>console<\/span>.<\/span>log<\/span>(<\/span>opts<\/span>);<\/span>\n\n//<\/span> <\/span>handle<\/span> <\/span>errors<\/span>\nif<\/span> <\/span>(<\/span>!<\/span>opts<\/span>.<\/span>split<\/span> <\/span>&&<\/span> <\/span>!<\/span>opts<\/span>.<\/span>merge<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>exitErr<\/span>(<\/span>'Please specify an action: -s (split) or -m (merge).');<\/span>\n}<\/span> <\/span>\n\nfunction<\/span> <\/span>exitErr<\/span>(<\/span>str<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>const<\/span> <\/span>errorSection<\/span> <\/span>=<\/span> <\/span>{<\/span>\n    <\/span>'header': 'Error',<\/span>\n    <\/span>'content': str,<\/span>\n  <\/span>};<\/span>\n  <\/span>help<\/span>.<\/span>push<\/span>(<\/span>errorSection<\/span>);<\/span>\n  <\/span>console<\/span>.<\/span>log<\/span>(<\/span>clu<\/span>(<\/span>help<\/span>));<\/span>\n  <\/span>process<\/span>.<\/span>exit<\/span>(<\/span>-1<\/span>);<\/span>\n}<\/span>\n\n//<\/span> <\/span>show<\/span> <\/span>help<\/span>\nif<\/span> <\/span>(<\/span>opts<\/span>.<\/span>help<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>console<\/span>.<\/span>log<\/span>(<\/span>clu<\/span>(<\/span>help<\/span>));<\/span>\n  <\/span>process<\/span>.<\/span>exit<\/span>(<\/span>0<\/span>);<\/span>\n}<\/span>\n\nif<\/span> <\/span>(<\/span>opts<\/span>.<\/span>split<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>//<\/span> <\/span>track<\/span> <\/span>the<\/span> <\/span>processed<\/span> <\/span>file<\/span>\n  <\/span>const<\/span> <\/span>filenames<\/span> <\/span>=<\/span> <\/span>[];<\/span>\n\n  <\/span>const<\/span> <\/span>filepath<\/span> <\/span>=<\/span> <\/span>opts<\/span>.<\/span>split<\/span>\n  <\/span>fs<\/span>.<\/span>readFile<\/span>(<\/span>filepath<\/span>,<\/span> <\/span>function<\/span> <\/span>(<\/span>err<\/span>,<\/span> <\/span>data<\/span>)<\/span> <\/span>{<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>err<\/span>)<\/span> <\/span>{<\/span>\n      <\/span>return<\/span> <\/span>console<\/span>.<\/span>error<\/span>(<\/span>err<\/span>);<\/span>\n    <\/span>}<\/span>\n    <\/span>var<\/span> <\/span>lines<\/span> <\/span>=<\/span> <\/span>data<\/span>.<\/span>toString<\/span>()<\/span>.<\/span>split<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>\\<\/span>n&quot<\/span>;);<\/span>\n    <\/span>//<\/span> <\/span>determine<\/span> <\/span>the<\/span> <\/span>input<\/span> <\/span>type<\/span>\n    <\/span>var<\/span> <\/span>type<\/span> <\/span>=<\/span> <\/span>&<\/span>quot<\/span>;<\/span>ndjson&quot<\/span>;;<\/span>\n    <\/span>//<\/span> <\/span>Note:<\/span> <\/span>The<\/span> <\/span>comma<\/span> <\/span>at<\/span> <\/span>the<\/span> <\/span>end<\/span> <\/span>of<\/span> <\/span>the<\/span> <\/span>line<\/span> <\/span>is<\/span> <\/span>optional<\/span>.<\/span> <\/span>I<\/span> <\/span>assume<\/span> <\/span>the<\/span> <\/span>format<\/span>\n    <\/span>//<\/span> <\/span>is<\/span> <\/span>[{<\/span>object<\/span>}],<\/span>\\<\/span>n<\/span>[{<\/span>object<\/span>}],<\/span>\\<\/span>n<\/span>[{<\/span>object<\/span>}]<\/span>\\<\/span>EOF<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>lines<\/span>[<\/span>0<\/span>]<\/span>.<\/span>match<\/span>(<\/span>/<\/span>[[]]<\/span>*<\/span>],<\/span>?/<\/span>))<\/span> <\/span>{<\/span>\n      <\/span>//<\/span> <\/span>it<\/span>'s the JSON-style format [<json>],<\/span>\n      <\/span>type<\/span> <\/span>=<\/span> <\/span>&<\/span>quot<\/span>;<\/span>json&quot<\/span>;;<\/span>\n    <\/span>}<\/span>\n    <\/span>var<\/span> <\/span>out<\/span> <\/span>=<\/span> <\/span>&<\/span>quot<\/span>;<\/span>&<\/span>quot<\/span>;;<\/span>\n    <\/span>for<\/span> <\/span>(<\/span>var<\/span> <\/span>i<\/span> <\/span>=<\/span> <\/span>0<\/span>;<\/span> <\/span>i<\/span> <\/span><<\/span> <\/span>lines<\/span>.<\/span>length<\/span>;<\/span> <\/span>i<\/span>++<\/span>)<\/span> <\/span>{<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>lines<\/span>[<\/span>i<\/span>]<\/span>.<\/span>trim<\/span>()<\/span> <\/span>==<\/span> <\/span>&<\/span>quot<\/span>;<\/span>&<\/span>quot<\/span>;)<\/span> <\/span>{<\/span>\n        <\/span>continue<\/span>;<\/span>\n      <\/span>}<\/span>\n      <\/span>var<\/span> <\/span>json<\/span>;<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>type<\/span> <\/span>==<\/span> <\/span>&<\/span>quot<\/span>;<\/span>ndjson&quot<\/span>;)<\/span> <\/span>{<\/span>\n        <\/span>json<\/span> <\/span>=<\/span> <\/span>JSON<\/span>.<\/span>parse<\/span>(<\/span>lines<\/span>[<\/span>i<\/span>]);<\/span>\n      <\/span>}<\/span>\n      <\/span>else<\/span> <\/span>if<\/span> <\/span>(<\/span>type<\/span> <\/span>==<\/span> <\/span>&<\/span>quot<\/span>;<\/span>json&quot<\/span>;)<\/span> <\/span>{<\/span>\n        <\/span>json<\/span> <\/span>=<\/span> <\/span>JSON<\/span>.<\/span>parse<\/span>(<\/span>lines<\/span>[<\/span>i<\/span>]<\/span>.<\/span>match<\/span>(<\/span>/<\/span>[([]]<\/span>*<\/span>)],<\/span>?/<\/span>)[<\/span>1<\/span>]);<\/span>\n      <\/span>}<\/span>\n\n      <\/span>const<\/span> <\/span>nameKey<\/span> <\/span>=<\/span> <\/span>opts<\/span>[<\/span>'name-key'];<\/span>\n      <\/span>const<\/span> <\/span>pathKey<\/span> <\/span>=<\/span> <\/span>opts<\/span>[<\/span>'path-key'];<\/span>\n\n      <\/span>if<\/span> <\/span>(<\/span>!<\/span>nameKey<\/span>)<\/span> <\/span>{<\/span>\n        <\/span>exitErr<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>Please<\/span> <\/span>specify<\/span> <\/span>the<\/span> <\/span>name<\/span>-<\/span>key<\/span>.&<\/span>quot<\/span>;);<\/span>\n      <\/span>}<\/span>\n\n      <\/span>const<\/span> <\/span>filename<\/span> <\/span>=<\/span> <\/span>json<\/span>[<\/span>nameKey<\/span>];<\/span>\n      <\/span>const<\/span> <\/span>filepath<\/span> <\/span>=<\/span> <\/span>json<\/span>[<\/span>pathKey<\/span>]<\/span> <\/span>||<\/span> <\/span>'';<\/span>\n\n      <\/span>if<\/span> <\/span>(<\/span>opts<\/span>[<\/span>'omit-name']) {<\/span>\n        <\/span>delete<\/span> <\/span>json<\/span>[<\/span>nameKey<\/span>];<\/span>       <\/span>\n      <\/span>}<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>opts<\/span>[<\/span>'omit-path']) {<\/span>\n        <\/span>delete<\/span> <\/span>json<\/span>[<\/span>pathKey<\/span>];<\/span>\n      <\/span>}<\/span>\n\n      <\/span>const<\/span> <\/span>outfile<\/span> <\/span>=<\/span> <\/span>getOutputPath<\/span>(<\/span>filepath<\/span>)<\/span> <\/span>+<\/span> <\/span>&<\/span>quot<\/span>;<\/span>/&<\/span>quot<\/span>;<\/span> <\/span>+<\/span> <\/span>filename<\/span> <\/span>+<\/span> <\/span>&<\/span>quot<\/span>;<\/span>.<\/span>json&quot<\/span>;;<\/span>\n\n      <\/span>//<\/span> <\/span>truncate<\/span> <\/span>if<\/span> <\/span>this<\/span> <\/span>is<\/span> <\/span>the<\/span> <\/span>first<\/span> <\/span>time<\/span> <\/span>writing<\/span> <\/span>to<\/span> <\/span>this<\/span> <\/span>file<\/span>\n      <\/span>//<\/span> <\/span>and<\/span> <\/span>not<\/span> <\/span>appending<\/span> <\/span>\n      <\/span>let<\/span> <\/span>truncate<\/span> <\/span>=<\/span> <\/span>false<\/span>;<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>!<\/span>opts<\/span>.<\/span>append<\/span> <\/span>&&<\/span> <\/span>!<\/span>filenames<\/span>.<\/span>includes<\/span>(<\/span>outfile<\/span>))<\/span> <\/span>{<\/span>\n        <\/span>truncate<\/span> <\/span>=<\/span> <\/span>true<\/span>;<\/span>\n        <\/span>filenames<\/span>.<\/span>push<\/span>(<\/span>outfile<\/span>);<\/span>\n      <\/span>}<\/span>\n\n      <\/span>const<\/span> <\/span>outKey<\/span> <\/span>=<\/span> <\/span>opts<\/span>[<\/span>'out-key'];<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>outKey<\/span>)<\/span> <\/span>{<\/span>\n        <\/span>//<\/span> <\/span>add<\/span> <\/span>it<\/span> <\/span>to<\/span> <\/span>the<\/span> <\/span>array<\/span> <\/span>on<\/span> <\/span>out<\/span>-<\/span>key<\/span>\n        <\/span>let<\/span> <\/span>obj<\/span>;<\/span>\n        <\/span>if<\/span> <\/span>(<\/span>!<\/span>truncate<\/span> <\/span>&&<\/span> <\/span>fs<\/span>.<\/span>existsSync<\/span>(<\/span>outfile<\/span>))<\/span> <\/span>{<\/span>\n          <\/span>try<\/span> <\/span>{<\/span>\n            <\/span>obj<\/span> <\/span>=<\/span> <\/span>JSON<\/span>.<\/span>parse<\/span>(<\/span>fs<\/span>.<\/span>readFileSync<\/span>(<\/span>outfile<\/span>));<\/span>\n          <\/span>}<\/span>\n          <\/span>catch<\/span>(<\/span>x<\/span>)<\/span> <\/span>{<\/span>\n            <\/span>if<\/span> <\/span>(<\/span>x<\/span> <\/span>instanceof<\/span> <\/span>SyntaxError<\/span>)<\/span> <\/span>{<\/span>\n              <\/span>console<\/span>.<\/span>log<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>\\<\/span>nError:<\/span>\\<\/span>n<\/span>  <\/span>A<\/span> <\/span>file<\/span> <\/span>exists<\/span> <\/span>with<\/span> <\/span>the<\/span> <\/span>same<\/span> <\/span>name<\/span> <\/span>but<\/span> <\/span>not<\/span> <\/span>in<\/span> <\/span>a<\/span> <\/span>valid<\/span> <\/span>JSON<\/span> <\/span>format<\/span>.\\<\/span>n<\/span>\\<\/span>  <\/span>Perhaps<\/span> <\/span>it<\/span>'s the result of previous operation?\\n\\  Please delete the file or specify another output-dir.\\n&quot;);                  <\/span>\n            <\/span>}<\/span>\n            <\/span>else<\/span> <\/span>{<\/span>\n              <\/span>console<\/span>.<\/span>log<\/span>(<\/span>x<\/span>);<\/span>\n            <\/span>}<\/span>\n            <\/span>process<\/span>.<\/span>exit<\/span>(<\/span>-1<\/span>);<\/span>\n          <\/span>}<\/span>\n        <\/span>}<\/span>\n        <\/span>else<\/span> <\/span>{<\/span>\n          <\/span>obj<\/span> <\/span>=<\/span> <\/span>{<\/span> <\/span>[<\/span>outKey<\/span>]<\/span>:<\/span> <\/span>[]<\/span> <\/span>};<\/span>\n        <\/span>}<\/span>\n        <\/span>obj<\/span>[<\/span>outKey<\/span>]<\/span>.<\/span>push<\/span>(<\/span>json<\/span>);<\/span>\n        <\/span>fs<\/span>.<\/span>writeFileSync<\/span>(<\/span>outfile<\/span>,<\/span> <\/span>JSON<\/span>.<\/span>stringify<\/span>(<\/span>obj<\/span>));<\/span>\n      <\/span>}<\/span>\n      <\/span>else<\/span> <\/span>{<\/span>\n        <\/span>const<\/span> <\/span>data<\/span> <\/span>=<\/span> <\/span>JSON<\/span>.<\/span>stringify<\/span>(<\/span>json<\/span>)<\/span> <\/span>+<\/span> <\/span>&<\/span>quot<\/span>;<\/span>\\<\/span>n&quot<\/span>;;<\/span>\n\n        <\/span>if<\/span> <\/span>(<\/span>truncate<\/span>)<\/span> <\/span>{<\/span>\n          <\/span>fs<\/span>.<\/span>writeFileSync<\/span>(<\/span>outfile<\/span>,<\/span> <\/span>data<\/span>);<\/span>\n        <\/span>}<\/span>\n        <\/span>else<\/span> <\/span>{<\/span>\n          <\/span>fs<\/span>.<\/span>appendFileSync<\/span>(<\/span>outfile<\/span>,<\/span> <\/span>data<\/span>);<\/span>\n        <\/span>}<\/span>\n      <\/span>}<\/span>\n    <\/span>}<\/span>\n  <\/span>});<\/span>\n}<\/span>\nelse<\/span> <\/span>if<\/span> <\/span>(<\/span>opts<\/span>.<\/span>merge<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>//<\/span> <\/span>get<\/span> <\/span>the<\/span> <\/span>desired<\/span> <\/span>output<\/span> <\/span>format<\/span> <\/span>from<\/span> <\/span>the<\/span> <\/span>user<\/span>\n  <\/span>getFormat<\/span>(<\/span>function<\/span> <\/span>(<\/span>format<\/span>)<\/span> <\/span>{<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>Number<\/span>(<\/span>format<\/span>)<\/span> <\/span>==<\/span> <\/span>3<\/span> <\/span>&&<\/span> <\/span>!<\/span>opts<\/span>.<\/span>index<\/span>)<\/span> <\/span>{<\/span>\n      <\/span>console<\/span>.<\/span>log<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>You<\/span> <\/span>forgot<\/span> <\/span>to<\/span> <\/span>declare<\/span> <\/span>an<\/span> <\/span>index<\/span> <\/span>(<\/span>e<\/span>.<\/span>g<\/span>.-<\/span> <\/span>pid<\/span>)<\/span> <\/span>at<\/span> <\/span>EOL<\/span>,<\/span> <\/span>run<\/span> <\/span>script<\/span> <\/span>again<\/span>.&<\/span>quot<\/span>;);<\/span>\n      <\/span>process<\/span>.<\/span>exit<\/span>();<\/span>\n    <\/span>}<\/span>  <\/span>\n    <\/span>const<\/span> <\/span>mergeDir<\/span> <\/span>=<\/span> <\/span>opts<\/span>.<\/span>merge<\/span>;<\/span>\n    <\/span>const<\/span> <\/span>outStream<\/span> <\/span>=<\/span> <\/span>getOutStream<\/span>();<\/span>\n    <\/span>walkDir<\/span>(<\/span>path<\/span>.<\/span>resolve<\/span>(<\/span>mergeDir<\/span>),<\/span> <\/span>writeJSON<\/span>.<\/span>bind<\/span>(<\/span>null<\/span>,<\/span> <\/span>format<\/span>,<\/span> <\/span>outStream<\/span>));<\/span>\n  <\/span>});<\/span>\n}<\/span>\nelse<\/span> <\/span>{<\/span>\n  <\/span>console<\/span>.<\/span>log<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>Please<\/span> <\/span>provide<\/span> <\/span>a<\/span> <\/span>correct<\/span> <\/span>action&quot<\/span>;);<\/span>\n}<\/span>\n\nfunction<\/span> <\/span>writeJSON<\/span>(<\/span>format<\/span>,<\/span> <\/span>outStream<\/span>,<\/span> <\/span>filepath<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>if<\/span> <\/span>(<\/span>!<\/span>filepath<\/span>)<\/span> <\/span>{<\/span>\n    <\/span>outStream<\/span>.<\/span>on<\/span>(<\/span>'drain', () => outStream.end());<\/span>\n  <\/span>}<\/span>\n  <\/span>else<\/span> <\/span>if<\/span> <\/span>(<\/span>filepath<\/span>.<\/span>endsWith<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>.<\/span>json&quot<\/span>;))<\/span> <\/span>{<\/span>\n    <\/span>let<\/span> <\/span>mergedString<\/span> <\/span>=<\/span> <\/span>'';<\/span>\n    <\/span>fs<\/span>.<\/span>readFile<\/span>(<\/span>filepath<\/span>,<\/span> <\/span>&<\/span>quot<\/span>;<\/span>utf8&quot<\/span>;,<\/span> <\/span>(<\/span>err<\/span>,<\/span> <\/span>data<\/span>)<\/span> <\/span>=><\/span> <\/span>{<\/span>\n      <\/span>const<\/span> <\/span>lines<\/span> <\/span>=<\/span> <\/span>data<\/span>.<\/span>toString<\/span>()<\/span>.<\/span>split<\/span>(<\/span>&<\/span>quot<\/span>;<\/span>\\<\/span>n&quot<\/span>;);<\/span>\n      <\/span>for<\/span> <\/span>(<\/span>const<\/span> <\/span>line<\/span> <\/span>of<\/span> <\/span>lines<\/span>)<\/span> <\/span>{<\/span>\n        <\/span>if<\/span> <\/span>(<\/span>line<\/span> <\/span>!=<\/span> <\/span>&<\/span>quot<\/span>;<\/span>&<\/span>quot<\/span>;)<\/span> <\/span>{<\/span>\n          <\/span>mergedString<\/span> <\/span>+=<\/span> <\/span>getJSON<\/span>(<\/span>format<\/span>,<\/span> <\/span>line<\/span>,<\/span> <\/span>opts<\/span>.<\/span>index<\/span>);<\/span>\n        <\/span>}<\/span>\n      <\/span>}<\/span>\n      <\/span>outStream<\/span>.<\/span>write<\/span>(<\/span>mergedString<\/span>);<\/span>\n    <\/span>});<\/span>\n  <\/span>}<\/span>\n}<\/span>\n\nfunction<\/span> <\/span>getOutStream<\/span>()<\/span> <\/span>{<\/span>\n  <\/span>const<\/span> <\/span>filename<\/span> <\/span>=<\/span> <\/span>opts<\/span>[<\/span>'merge-output'];<\/span>\n  <\/span>if<\/span> <\/span>(<\/span>!<\/span>filename<\/span>)<\/span> <\/span>{<\/span>\n    <\/span>exitErr<\/span>(<\/span>'Please specify merge-output file.');<\/span>\n  <\/span>}<\/span>\n  <\/span>const<\/span> <\/span>filepath<\/span> <\/span>=<\/span> <\/span>path<\/span>.<\/span>join<\/span>(<\/span>getOutputPath<\/span>(),<\/span> <\/span>filename<\/span>);<\/span> <\/span>\n  <\/span>return<\/span> <\/span>fs<\/span>.<\/span>createWriteStream<\/span>(<\/span>filepath<\/span>);<\/span>\n}<\/span>\n\nfunction<\/span> <\/span>walkDir<\/span>(<\/span>dir<\/span>,<\/span> <\/span>fn<\/span>,<\/span> <\/span>depth<\/span>=<\/span>0<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>const<\/span> <\/span>files<\/span> <\/span>=<\/span> <\/span>fs<\/span>.<\/span>readdirSync<\/span>(<\/span>dir<\/span>);<\/span>\n\n  <\/span>files<\/span>.<\/span>forEach<\/span>((<\/span>file<\/span>,<\/span> <\/span>i<\/span>)<\/span> <\/span>=><\/span> <\/span>{<\/span>\n    <\/span>const<\/span> <\/span>filepath<\/span> <\/span>=<\/span> <\/span>path<\/span>.<\/span>join<\/span>(<\/span>dir<\/span>,<\/span> <\/span>file<\/span>);<\/span>\n    <\/span>const<\/span> <\/span>stats<\/span> <\/span>=<\/span> <\/span>fs<\/span>.<\/span>statSync<\/span>(<\/span>filepath<\/span>);<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>stats<\/span>.<\/span>isDirectory<\/span>())<\/span> <\/span>{<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>opts<\/span>.<\/span>recursive<\/span>)<\/span> <\/span>{<\/span>\n        <\/span>walkDir<\/span>(<\/span>filepath<\/span>,<\/span> <\/span>fn<\/span>,<\/span> <\/span>depth<\/span>+<\/span>1<\/span>);<\/span>\n      <\/span>}<\/span>\n    <\/span>}<\/span>\n    <\/span>else<\/span> <\/span>{<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>opts<\/span>.<\/span>progress<\/span>)<\/span> <\/span>{<\/span>\n        <\/span>process<\/span>.<\/span>stdout<\/span>.<\/span>write<\/span>(<\/span>'.');<\/span>\n      <\/span>}<\/span>\n      <\/span>if<\/span> <\/span>(<\/span>!<\/span>opts<\/span>.<\/span>filter<\/span> <\/span>||<\/span> <\/span>new<\/span> <\/span>RegExp<\/span>(<\/span>opts<\/span>.<\/span>filter<\/span>,<\/span> <\/span>'i').test(file)) {<\/span>\n        <\/span>fn<\/span>(<\/span>filepath<\/span>);<\/span>\n      <\/span>}<\/span>\n    <\/span>}<\/span>\n  <\/span>});<\/span>\n\n  <\/span>if<\/span> <\/span>(<\/span>depth<\/span> <\/span>===<\/span> <\/span>0<\/span>)<\/span> <\/span>{<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>opts<\/span>.<\/span>progress<\/span>)<\/span> <\/span>{<\/span>\n      <\/span>process<\/span>.<\/span>stdout<\/span>.<\/span>write<\/span>(<\/span>'\\n');<\/span>\n    <\/span>}<\/span>\n    <\/span>fn<\/span>(<\/span>null<\/span>);<\/span>\n  <\/span>}<\/span>\n}<\/span>\n\nfunction<\/span> <\/span>getJSON<\/span>(<\/span>format<\/span>,<\/span> <\/span>item<\/span>,<\/span> <\/span>index<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>switch<\/span> <\/span>(<\/span>Number<\/span>(<\/span>format<\/span>))<\/span> <\/span>{<\/span>\n    <\/span>case<\/span> <\/span>1<\/span>:<\/span> <\/span>//<\/span> <\/span>minified<\/span> <\/span>JSON<\/span>\n      <\/span>return<\/span> <\/span>&<\/span>quot<\/span>;[<\/span>&<\/span>quot<\/span>;<\/span> <\/span>+<\/span> <\/span>item<\/span> <\/span>+<\/span> <\/span>&<\/span>quot<\/span>;],<\/span>\\<\/span>n&quot<\/span>;;<\/span>\n      <\/span>break<\/span>;<\/span>\n    <\/span>case<\/span> <\/span>2<\/span>:<\/span> <\/span>//<\/span> <\/span>NDJSON<\/span>\n      <\/span>return<\/span> <\/span>item<\/span> <\/span>+<\/span> <\/span>&<\/span>quot<\/span>;<\/span>\\<\/span>n&quot<\/span>;;<\/span>\n      <\/span>break<\/span>;<\/span>\n    <\/span>case<\/span> <\/span>3<\/span>:<\/span> <\/span>//<\/span> <\/span>ESJSON<\/span>\n      <\/span>return<\/span> <\/span>'{&quot;index&quot;:{&quot;_id&quot;:&quot;' + JSON.parse(item)[index] + '&quot;}}\\n' +<\/span>\n        <\/span>item<\/span> <\/span>+<\/span> <\/span>&<\/span>quot<\/span>;<\/span>\\<\/span>n&quot<\/span>;;<\/span>\n      <\/span>break<\/span>;<\/span>\n    <\/span>default:<\/span>\n      <\/span>break<\/span>;<\/span>\n  <\/span>}<\/span>\n}<\/span>\n\n//<\/span> <\/span>function<\/span> <\/span>to<\/span> <\/span>use<\/span> <\/span>recursion<\/span> <\/span>to<\/span> <\/span>simulate<\/span> <\/span>syncronous<\/span> <\/span>access<\/span> <\/span>to<\/span> <\/span>stdin<\/span>/<\/span>out<\/span>\nfunction<\/span> <\/span>getFormat<\/span>(<\/span>callback<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>process<\/span>.<\/span>stdout<\/span>.<\/span>write<\/span>(<\/span>\n    <\/span>&<\/span>quot<\/span>;<\/span>Select<\/span> <\/span>output<\/span> <\/span>format:<\/span> <\/span>1<\/span>:<\/span>minified<\/span> <\/span>JSON<\/span>,<\/span> <\/span>2<\/span>:<\/span> <\/span>NDJSON<\/span>,<\/span> <\/span>3<\/span>:<\/span>ESJSON:<\/span> <\/span>&<\/span>quot<\/span>;<\/span>\n  <\/span>);<\/span>\n  <\/span>process<\/span>.<\/span>stdin<\/span>.<\/span>setEncoding<\/span>(<\/span>'utf8');<\/span>\n  <\/span>process<\/span>.<\/span>stdin<\/span>.<\/span>once<\/span>(<\/span>'data', function (val) {<\/span>\n    <\/span>//<\/span> <\/span>check<\/span> <\/span>validity<\/span> <\/span>of<\/span> <\/span>input<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>!<\/span>isNaN<\/span>(<\/span>val<\/span>))<\/span> <\/span>{<\/span>\n      <\/span>val<\/span> <\/span>=<\/span> <\/span>+<\/span>val<\/span>;<\/span>   <\/span>\n      <\/span>if<\/span> <\/span>(<\/span>1<\/span> <\/span><=<\/span> <\/span>val<\/span> <\/span>&&<\/span> <\/span>val<\/span> <\/span><=<\/span> <\/span>3<\/span>)<\/span> <\/span>{<\/span>\n        <\/span>process<\/span>.<\/span>stdin<\/span>.<\/span>pause<\/span>();<\/span>\n        <\/span>callback<\/span>(<\/span>val<\/span>);<\/span>\n        <\/span>return<\/span>;<\/span>\n      <\/span>}<\/span>\n    <\/span>}<\/span>\n    <\/span>//<\/span> <\/span>if<\/span> <\/span>input<\/span> <\/span>is<\/span> <\/span>invalid<\/span>,<\/span> <\/span>ask<\/span> <\/span>again<\/span>\n    <\/span>getFormat<\/span>(<\/span>callback<\/span>);<\/span>\n  <\/span>});<\/span>\n}<\/span>\n\nfunction<\/span> <\/span>mkDir<\/span>(<\/span>dir<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>return<\/span> <\/span>dir<\/span>.<\/span>split<\/span>(<\/span>'/').reduce((path, folder) => {<\/span>\n    <\/span>path<\/span> <\/span>=<\/span> <\/span>path<\/span> <\/span>+<\/span> <\/span>'/' + fixName(folder);<\/span>\n    <\/span>if<\/span> <\/span>(<\/span>!<\/span>fs<\/span>.<\/span>existsSync<\/span>(<\/span>path<\/span>))<\/span> <\/span>{<\/span>\n      <\/span>fs<\/span>.<\/span>mkdirSync<\/span>(<\/span>path<\/span>);<\/span>\n    <\/span>}<\/span>\n    <\/span>return<\/span> <\/span>path<\/span>;<\/span>\n  <\/span>},<\/span> <\/span>'');<\/span>\n}<\/span>\n\nfunction<\/span> <\/span>fixName<\/span>(<\/span>name<\/span>)<\/span> <\/span>{<\/span>\n  <\/span>return<\/span> <\/span>name<\/span>.<\/span>replace<\/span>(<\/span>/\\<\/span>s<\/span>+/<\/span>g<\/span>,<\/span> <\/span>'_');  <\/span>\n}<\/span>\n\nfunction<\/span> <\/span>getOutputPath<\/span>(<\/span>dir<\/span>=<\/span>'') {<\/span>\n  <\/span>return<\/span> <\/span>mkDir<\/span>(<\/span>path<\/span>.<\/span>resolve<\/span>(<\/span>path<\/span>.<\/span>join<\/span>(<\/span>\n    <\/span>opts<\/span>[<\/span>'output-dir'], <\/span>\n    <\/span>dir<\/span>)));<\/span>\n}<\/span>\n<\/pre><\/div>
    

    ## Changes * Added `-r` option to traverse merge dir recursively to find json files * Added `-f pattern` option to filter filenames (pattern is case insensitive) * Added `-g` to show basic progress ## Example spmer -m CA -o sample.json -rf OSLER Merge files in CA folder, output to sample.json, recursively (`-r`), for filenames containing OSLER only (`-f pattern`) ## spmer.js #!/usr/bin/env node var fs = require("fs"); var path = require("path"); const clu = require('command-line-usage'); const cla = require('command-line-args'); const splitOptions = [ { name: 'split', alias: 's', type: String, arg: 'file', desc: 'a json-line file to split' }, { name: 'name-key', alias: 'n', type: String, arg: 'key', desc: 'key for the name of file, will groups objects with the same file' }, { name: 'path-key', alias: 'p', type: String, arg: 'key', desc: 'key for the output path' }, { name: 'omit-name', alias: 't', type: Boolean, desc: 'omit the name key' }, { name: 'omit-path', alias: 'u', type: Boolean, desc: 'omit the path key' }, { name: 'out-key', alias: 'k', type: String, arg: 'key', desc: 'output the groups as array value of this key' }, { name: 'append', alias: 'a', type: Boolean, desc: 'append to existing files' }, ]; const mergeOptions = [ { name: 'merge', alias: 'm', type: String, arg: 'dir', desc: 'dir with json files to merge' }, { name: 'filter', alias: 'f', type: String, arg: 'pattern', desc: 'filter files according to pattern' }, { name: 'recursive', alias: 'r', type: Boolean, desc: 'traverse dir recursively' }, { name: 'merge-output', alias: 'o', type: String, arg: 'file', desc: 'merge output file' }, { name: 'index', alias: 'x', type: String, arg: 'ESJSON index key', desc: 'specify index key for ESJSON' }, ]; const generalOptions = [ { name: 'progress', alias: 'g', type: Boolean, desc: 'show progress' }, { name: 'output-dir', alias: 'd', type: String, defaultValue: '.', arg: 'dir', desc: 'root output dir, defaults to current dir' }, { name: 'help', alias: 'h', type: Boolean, desc: 'show this help' }, ]; const optionDefs = splitOptions.concat(mergeOptions).concat(generalOptions); const help = [ { header: 'spmer', content: [ 'Split a json-line file or merge json files.', '', 'A json-line file is a file containing valid json on each line.', ], }, { header: 'Usage', content: [ 'node spmer.js -s FILE [options]', 'node spmer.js -m DIR [options]', '', 'The first form to split FILE.', 'The second form to merge files in DIR.', ], }, getSectionOption('Split options', splitOptions), getSectionOption('Merge options', mergeOptions), getSectionOption('General options', generalOptions), ]; function getSectionOption(title, optionDef) { return { header: title, content: optionDef.map(o => ({ a: '-' + o.alias, b: '--' + o.name + ' ' + (o.arg || ''), c: o.desc })), }; } // parse options const opts = cla(optionDefs); // console.log(opts); // handle errors if (!opts.split && !opts.merge) { exitErr('Please specify an action: -s (split) or -m (merge).'); } function exitErr(str) { const errorSection = { 'header': 'Error', 'content': str, }; help.push(errorSection); console.log(clu(help)); process.exit(-1); } // show help if (opts.help) { console.log(clu(help)); process.exit(0); } if (opts.split) { // track the processed file const filenames = []; const filepath = opts.split fs.readFile(filepath, function (err, data) { if (err) { return console.error(err); } var lines = data.toString().split("\n"); // determine the input type var type = "ndjson"; // Note: The comma at the end of the line is optional. I assume the format // is [{object}],\n[{object}],\n[{object}]\EOF if (lines[0].match(/[[]]*],?/)) { // it's the JSON-style format [], type = "json"; } var out = ""; for (var i = 0; i < lines.length; i++) { if (lines[i].trim() == "") { continue; } var json; if (type == "ndjson") { json = JSON.parse(lines[i]); } else if (type == "json") { json = JSON.parse(lines[i].match(/[([]]*)],?/)[1]); } ]); } const nameKey = opts['name-key']; const pathKey = opts['path-key']; if (!nameKey) { exitErr("Please specify the name-key."); } const filename = json[nameKey]; const filepath = json[pathKey] || ''; if (opts['omit-name']) { delete json[nameKey]; } if (opts['omit-path']) { delete json[pathKey]; } const outfile = getOutputPath(filepath) + "/" + filename + ".json"; // truncate if this is the first time writing to this file // and not appending let truncate = false; if (!opts.append && !filenames.includes(outfile)) { truncate = true; filenames.push(outfile); } const outKey = opts['out-key']; if (outKey) { // add it to the array on out-key let obj; if (!truncate && fs.existsSync(outfile)) { try { obj = JSON.parse(fs.readFileSync(outfile)); } catch(x) { if (x instanceof SyntaxError) { console.log("\nError:\n A file exists with the same name but not in a valid JSON format.\n\ Perhaps it's the result of previous operation?\n\ Please delete the file or specify another output-dir.\n"); } else { console.log(x); } process.exit(-1); } } else { obj = { [outKey]: [] }; } obj[outKey].push(json); fs.writeFileSync(outfile, JSON.stringify(obj)); } else { const data = JSON.stringify(json) + "\n"; if (truncate) { fs.writeFileSync(outfile, data); } else { fs.appendFileSync(outfile, data); } } } }); } else if (opts.merge) { // get the desired output format from the user getFormat(function (format) { if (Number(format) == 3 && !opts.index) { console.log("You forgot to declare an index (e.g.- pid) at EOL, run script again."); process.exit(); } const mergeDir = opts.merge; const outStream = getOutStream(); walkDir(path.resolve(mergeDir), writeJSON.bind(null, format, outStream)); }); } else { console.log("Please provide a correct action"); } function writeJSON(format, outStream, filepath) { if (!filepath) { outStream.on('drain', () => outStream.end()); } else if (filepath.endsWith(".json")) { let mergedString = ''; fs.readFile(filepath, "utf8", (err, data) => { const lines = data.toString().split("\n"); for (const line of lines) { if (line != "") { mergedString += getJSON(format, line, opts.index); } } outStream.write(mergedString); }); } } function getOutStream() { const filename = opts['merge-output']; if (!filename) { exitErr('Please specify merge-output file.'); } const filepath = path.join(getOutputPath(), filename); return fs.createWriteStream(filepath); } function walkDir(dir, fn, depth=0) { const files = fs.readdirSync(dir); files.forEach((file, i) => { const filepath = path.join(dir, file); const stats = fs.statSync(filepath); if (stats.isDirectory()) { if (opts.recursive) { walkDir(filepath, fn, depth+1); } } else { if (opts.progress) { process.stdout.write('.'); } if (!opts.filter || new RegExp(opts.filter, 'i').test(file)) { fn(filepath); } } }); if (depth === 0) { if (opts.progress) { process.stdout.write('\n'); } fn(null); } } function getJSON(format, item, index) { switch (Number(format)) { case 1: // minified JSON return "[" + item + "],\n"; break; case 2: // NDJSON return item + "\n"; break; case 3: // ESJSON const obj = JSON.parse(item); const key = 'obj.'+index; return '{"index":{"_id":"' + eval(key) + '"}}\n' + item + "\n"; break; default: break; } } // function to use recursion to simulate syncronous access to stdin/out function getFormat(callback) { process.stdout.write( "Select output format: 1:minified JSON.parse(item)[index] + '"}}\n' + item + "\n"; break; default: break; } } // function to use recursion to simulate syncronous access to stdin/out function getFormat(callback) { process.stdout.write( "Select output format: 1:minified JSON, 2: NDJSON, 3:ESJSON: " ); process.stdin.setEncoding('utf8'); process.stdin.once('data', function (val) { // check validity of input if (!isNaN(val)) { val = +val; if (1 <= val && val <= 3) { process.stdin.pause(); callback(val); return; } } // if input is invalid, ask again getFormat(callback); }); } function mkDir(dir) { return dir.split('/').reduce((path, folder) => { path = path + '/' + fixName(folder); if (!fs.existsSync(path)) { fs.mkdirSync(path); } return path; }, ''); } function fixName(name) { return name.replace(/\s+/g, '_'); } function getOutputPath(dir='') { return mkDir(path.resolve(path.join( opts['output-dir'], dir))); }))); }
    ## Changes * Support input in pretty format * Added `-y` option for pretty output * Added `-r` option to traverse merge dir recursively to find json files * Added `-f pattern` option to filter filenames (pattern is case insensitive) * Added `-g` to show basic progress ## Example spmer -m CA -o sample.json -rf OSLER Merge files in CA folder, output to sample.json, recursively (`-r`), for filenames containing OSLER only (`-f pattern`) ## spmer.js #!/usr/bin/env node var fs = require("fs"); var path = require("path"); const fs = require("fs"); const path = require("path"); const clu = require('command-line-usage'); const cla = require('command-line-args'); '); const Transform = require('stream').Transform; const Writable = require('stream').Writable; const splitOptions = [ { name: 'split', alias: 's', type: String, arg: 'file', desc: 'a json-line file to split' }, { name: 'name-key', alias: 'n', type: String, arg: 'key', desc: 'key for the name of file, will groups objects with the same file' }, { name: 'path-key', alias: 'p', type: String, arg: 'key', desc: 'key for the output path' }, { name: 'omit-name', alias: 't', type: Boolean, desc: 'omit the name key' }, { name: 'omit-path', alias: 'u', type: Boolean, desc: 'omit the path key' }, { name: 'out-key', alias: 'k', type: String, arg: 'key', desc: 'output the groups as array value of this key' }, { name: 'append', alias: 'a', type: Boolean, desc: 'append to existing files' }, ]; const mergeOptions = [ { name: 'merge', alias: 'm', type: String, arg: 'dir', desc: 'dir with json files to merge' }, { name: 'filter', alias: 'f', type: String, arg: 'pattern', desc: 'filter files according to pattern' }, { name: 'recursive', alias: 'r', type: Boolean, desc: 'traverse dir recursively' }, { name: 'merge-output', alias: 'o', type: String, arg: 'file', desc: 'merge output file' }, { name: 'index', alias: 'x', type: String, arg: 'key', desc: 'specify index key for ESJSON' }, { name: 'mjson', type: Boolean, desc: 'output merged as minified-JSON' }, { name: 'ndjson', type: Boolean, desc: 'output merged as NDJSON' }, { name: 'esjson', type: Boolean, desc: 'output merged as ESJSON' }, ]; const generalOptions = [ { name: 'pretty', alias: 'y', type: Boolean, desc: 'pretty json output' }, { name: 'progress', alias: 'g', type: Boolean, desc: 'show progress' }, { name: 'output-dir', alias: 'd', type: String, defaultValue: '.', arg: 'dir', desc: 'root output dir, defaults to current dir' }, { name: 'help', alias: 'h', type: Boolean, desc: 'show this help' }, ]; const optionDefs = splitOptions.concat(mergeOptions).concat(generalOptions); const help = [ { header: 'spmer', content: [ 'Split a json-line file or merge json files.', '', 'A json-line file is a file containing valid json on each line.', ], }, { header: 'Usage', content: [ 'node spmer.js -s FILE [options]', 'node spmer.js -m DIR [options]', '', 'The first form to split FILE.', 'The second form to merge files in DIR.', ], }, getSectionOption('Split options', splitOptions), getSectionOption('Merge options', mergeOptions), getSectionOption('General options', generalOptions), ]; function getSectionOption(title, optionDef) { return { header: title, content: optionDef.map(o => ({ a: o.alias ? '-' + o.alias : null, b: '--' + o.name + ' ' + (o.arg || ''), c: o.desc })), }; } // parse options const opts = cla(optionDefs); // handle errors if (!opts.split && !opts.merge) { exitErr('Please specify an action: [-s FILE] to split or [-m DIR] to merge'); } function exitErr(str) { const errorSection = { 'header': 'Error', 'content': str, }; help.push(errorSection); console.log(clu(help)); process.exit(-1); } // show help if (opts.help) { console.log(clu(help)); process.exit(0); } class SplitJson extends Transform { constructor() { super({ encoding: 'utf8', decodeStrings: false }); // unprocessed chunks & callbacks this._chunks = []; this._offset = 0; } _save(chunk) { this._chunks.push(chunk); } _chunksStr() { return this._chunks.join(''); } // flush processed chunks and call the callbacks _flushChunks(offset) { let flushCount = 0; let flushLen = 0; for (const chunk of this._chunks) { if (flushLen + chunk.length <= offset) { flushLen += chunk.length; flushCount++; } else { break; } } this._chunks.splice(0, flushCount); return flushLen; } _transform(chunk, enc, cb) { this._save(chunk); const str = this._chunksStr(); this._offset = this._split(str, this._offset); this._offset -= this._flushChunks(this._offset); cb(); } _split(str, offset) { const closer = { '{': '}', '[': ']', }; let char; let startIdx = -1; let depth = 0; let i; for (i = offset; i < str.length; i++) { const ch = str[i]; if (char ? ch === char : ch === '{' || ch === '[') { if (depth === 0) { startIdx = i; char = ch; } depth++; } else if (ch === closer[char]) { depth--; if (depth === 0) { const json = str.substr(startIdx, i - startIdx + 1); this.push(json); char = null; } } } const end = depth === 0 ? i : startIdx < 0 ? i : startIdx; return end; } } class WriteJson extends Writable { constructor(options) { options.decodeStrings = false; super(options); this.options = options; // track processed files this.filenames = []; } _write(chunk, enc, cb) { const str = chunk.toString('utf8'); // console.log('===', str); const { nameKey, pathKey, omitName, omitPath, append, outKey } = this.options; const nameDot = nameKey[0] === '[' ? '' : '.'; const pathDot = pathKey[0] === '[' ? '' : '.'; const obj = JSON.parse(str); const filename = eval('obj'+nameDot+nameKey); const filepath = eval('obj'+pathDot+pathKey) || ''; if (omitName) { eval('delete obj'+nameDot+nameKey); } if (omitPath) { eval('delete obj'+pathDot+pathKey); } const outfile = getOutputPath(filepath) + "/" + filename + ".json"; // truncate if this is the first time writing to this file // and not appending let truncate = false; if (!append && !this.filenames.includes(outfile)) { truncate = true; this.filenames.push(outfile); } if (outKey) { // add it to the array on out-key let parent; if (!truncate && fs.existsSync(outfile)) { try { parent = JSON.parse(fs.readFileSync(outfile)); } catch(x) { if (x instanceof SyntaxError) { console.log("\nError:\n A file exists with the same name but not in a valid JSON format.\n\ Perhaps it's the result of previous operation?\n\ Please delete the file or specify another output-dir.\n"); } else { console.log(x); } process.exit(1); } } else { parent = { [outKey]: [] }; } parent[outKey].push(obj); fs.writeFileSync(outfile, pretty(parent)); } else { const data = pretty(obj) + "\n"; if (truncate) { fs.writeFileSync(outfile, data); } else { fs.appendFileSync(outfile, data); } } cb(); } } class Progress { constructor(options) { this.max = options.hasOwnProperty('max') ? options.max : 100; this.value = options.value || 0; this.stream = options.stream || process.stdout; this.template = options.template || 'Progress: :progress'; } add(value) { this.value += value; this.render(); } value(value) { this.value = value; this.render(); } end() { this.stream.write('\n'); } render() { const progress = Math.round(this.value / this.max * 100) + '%'; const str = this.template .replace(new RegExp(':progress', 'g'), progress) .replace(new RegExp(':value', 'g'), this.value) .replace(new RegExp(':max', 'g'), this.max); this.stream.cursorTo(0); this.stream.write(str); } } class Streams { constructor(out) { this.out = out; this.lastIn = null; this.i = 0; } write(ins) { if (!ins) { if (this.lastIn) { this.lastIn.once('end', this.out.end); } else { this.out.end(); } } else { const pipe = ins => ins.pipe(this.out, { end: false }); if (this.lastIn) { this.lastIn.on('end', () => pipe(ins)); } else { pipe(ins); } this.lastIn = ins; } } } if (opts.split) { if (!opts['name-key']) { exitErr("Please specify the name-key."); } const writeOpts = { nameKey: opts['name-key'], pathKey: opts['path-key'], omitName: opts['omit-name'], omitPath: opts['omit-path'], outKey: opts['out-key'], append: opts.append, }; const progress = new Progress({ max: fs.statSync(opts.split).size }); const inStream = fs.createReadStream(opts.split, 'utf8'); if (opts.progress) { inStream.on('data', data => progress.add(data.length)); inStream.on('end', () => progress.end()); } inStream.pipe(new SplitJson()).pipe(new WriteJson(writeOpts)); } else if (opts.merge) { // get the desired output format from the user getFormat(function (format) { if (Number(format) == 3 && !opts.index key', desc: 'specify index key for ESJSON' }, ]; const generalOptions = [ { name: 'progress', alias: 'g', type: Boolean, desc: 'show progress' }, { name: 'output-dir', alias: 'd', type: String, defaultValue: '.', arg: 'dir', desc: 'root output dir, defaults to current dir' }, { name: 'help', alias: 'h', type: Boolean, desc: 'show this help' }, ]; const optionDefs = splitOptions.concat(mergeOptions).concat(generalOptions); const help = [ { header: 'spmer', content: [ 'Split a json-line file or merge json files.', '', 'A json-line file is a file containing valid json on each line.', ], }, { header: 'Usage', content: [ 'node spmer.js -s FILE [options]', 'node spmer.js -m DIR [options]', '', 'The first form to split FILE.', 'The second form to merge files in DIR.', ], }, getSectionOption('Split options', splitOptions), getSectionOption('Merge options', mergeOptions), getSectionOption('General options', generalOptions), ]; function getSectionOption(title, optionDef) { return { header: title, content: optionDef.map(o => ({ a: '-' + o.alias, b: '--' + o.name + ' ' + (o.arg || ''), c: o.desc })), }; } // parse options const opts = cla(optionDefs); // console.log(opts); // handle errors if (!opts.split && !opts.merge) { exitErr('Please specify an action: -s (split) or -m (merge).'); } function exitErr(str) { const errorSection = { 'header': 'Error', 'content': str, }; help.push(errorSection); console.log(clu(help)); process.exit(-1); } // show help if (opts.help) { console.log(clu(help)); process.exit(0); } if (opts.split) { // track the processed file const filenames = []; const filepath = opts.split fs.readFile(filepath, function (err, data) { if (err) { return console.error(err); } var lines = data.toString().split("\n"); // determine the input type var type = "ndjson"; // Note: The comma at the end of the line is optional. I assume the format // is [{object}],\n[{object}],\n[{object}]\EOF if (lines[0].match(/[[]]*],?/)) { // it's the JSON-style format [], type = "json"; } var out = ""; for (var i = 0; i < lines.length; i++) { if (lines[i].trim() == "") { continue; } var json; if (type == "ndjson") { json = JSON.parse(lines[i]); } else if (type == "json") { json = JSON.parse(lines[i].match(/[([]]*)],?/)[1]); } const nameKey = opts['name-key']; const pathKey = opts['path-key']; if (!nameKey) { exitErr("Please specify the name-key."); } const filename = json[nameKey]; const filepath = json[pathKey] || ''; if (opts['omit-name']) { delete json[nameKey]; } if (opts['omit-path']) { delete json[pathKey]; } const outfile = getOutputPath(filepath) + "/" + filename + ".json"; // truncate if this is the first time writing to this file // and not appending let truncate = false; if (!opts.append && !filenames.includes(outfile)) { truncate = true; filenames.push(outfile); } const outKey = opts['out-key']; if (outKey) { // add it to the array on out-key let obj; if (!truncate && fs.existsSync(outfile)) { try { obj = JSON.parse(fs.readFileSync(outfile)); } catch(x) { if (x instanceof SyntaxError) { console.log("\nError:\n A file exists with the same name but not in a valid JSON format.\n\ Perhaps it's the result of previous operation?\n\ Please delete the file or specify another output-dir.\n"); } else { console.log(x); } process.exit(-1); } } else { obj = { [outKey]: [] }; } obj[outKey].push(json); fs.writeFileSync(outfile, JSON.stringify(obj)); } else { const data = JSON.stringify(json) + "\n"; if (truncate) { fs.writeFileSync(outfile, data); } else { fs.appendFileSync(outfile, data); } } } }); } else if (opts.merge) { // get the desired output format from the user getFormat(function (format) { if (Number(format) == 3 && !opts.index) { console.log("You forgot to declare an index (e.g.- pid) at EOL, run script again."); process.exit(); } (); } const mergeDirmergePath = path.resolve(opts.merge; ); const outStream = getOutStream(); walkDir(path.resolve(mergeDir), writeJSON.bind(null, format, outStream)); }); } else { console.log("Please provide a correct action"); } function writeJSON(format, outStream, filepath) { if (!filepath) { outStream.on('drain', () => outStream.end()); } else if (filepath.endsWith(".json")) { let mergedString = ''; fs.readFile(filepath, "utf8", (err, data) => { const lines = data.toString().split("\n"); for (const line of lines) { if (line != "") { mergedString += getJSON(format, line, opts.index); } } outStream.write(mergedString); }); } } function getOutStream() { const filename = opts['merge-output']; if (!filename) { exitErr('Please specify merge-output file.'); } const filepath = path.join(getOutputPath(), filename); return fs.createWriteStream(filepath); } function walkDir(dir, fn, depth=0) { const files = fs.readdirSync(dir); files.forEach((file, i) => { const filepath = path.join(dir, file); const stats = fs.statSync(filepath); if (stats.isDirectory()) { if (opts.recursive) { walkDir(filepath, fn, depth+1); } } else { if (opts.progress = new Progress({ max: 0, template: 'Progress: :progress (:value of :max files)' }); if (opts.progress) { walkDir(mergePath, filepath => { if (filepath) { progress.max++; } }); } const streams = new Streams(getOutStream()); walkDir(mergePath, filepath => { writeJSON(format, streams, filepath); if (opts.progress) { if (filepath) { progress.add(1); } else { progress.end(); } } }); }); } function writeJSON(format, streams, filepath) { if (!filepath) { streams.write(null); } else if (filepath.endsWith(".json")) { class TransformJson extends Transform { constructor() { super({ encoding: 'utf8', decodeStrings: false }); } _transform(chunk, enc, cb) { const json = getJSON(format, chunk, opts.index); this.push(json); cb(); } } const inStream = fs.createReadStream(filepath, 'utf8'); streams.write(inStream.pipe(new SplitJson()).pipe(new TransformJson())); } } function getOutStream() { const filename = opts['merge-output']; if (!filename) { exitErr('Please specify merge-output file.'); } const filepath = path.join(getOutputPath(), filename); return fs.createWriteStream(filepath, 'utf8'); } function walkDir(dir, fn, depth=0) { const files = fs.readdirSync(dir); files.forEach(file => { const filepath = path.join(dir, file); const stats = fs.statSync(filepath); if (stats.isDirectory()) { if (opts.recursive) { walkDir(filepath, fn, depth+1); } } else { if (!opts.filter || new RegExp(opts.filter, 'i').test(file)) { fn(filepath); } } }); if (depth === 0) { fn(null); } } function getJSON(format, item, index) { function prettyParse(str) { return pretty(JSON.parse(str)); } switch (Number(format)) { case 1: // minified JSON return prettyParse("[" + item + "]") + ",\n"; case 2: // NDJSON return prettyParse(item) + "\n"; case 3: // ESJSON const obj = JSON.parse(item); const key = 'obj.'+index; return prettyParse('{"index":{"_id":"' + eval(key) + '"}}') + '\n' + prettyParse(item) + "\n"; default: break; } } function pretty(obj) { if (typeof obj !== 'object') { throw Error('pretty expects an object'); } return JSON.stringify(obj, null, opts.pretty ? 2 : null); } // function to use recursion to simulate syncronous access to stdin/out function getFormat(callback) { if (opts.mjson) return callback(1); if (opts.ndjson) return callback(2); if (opts.esjson) return callback(3); process.stdout.write('.'); } if (!opts.filter || new RegExp(opts.filter, 'i').test(file)) { fn(filepath); } } }); if (depth === 0) { if (opts.progress) { process.stdout.write('\n'); } fn(null); } } function getJSON(format, item, index) { switch (Number(format)) { case 1: // minified JSON return "[" + item + "],\n"; break; case 2: // NDJSON return item + "\n"; break; case 3: // ESJSON const obj = JSON.parse(item); const key = 'obj.'+index; return '{"index":{"_id":"' + eval(key) + '"}}\n' + item + "\n"; break; default: break; } } // function to use recursion to simulate syncronous access to stdin/out function getFormat(callback) { process.stdout.write( "Select output format: 1:minified JSON, 2: NDJSON, 3:ESJSON: " ); process.stdin.setEncoding('utf8'); process.stdin.once('data', function (val) { // check validity of input if (!isNaN(val)) { val = +val; if (1 <= val && val <= 3) { process.stdin.pause(); callback(val); return; } } // if input is invalid, ask again getFormat(callback); }); } function mkDir(dir) { return dir.split('/').reduce((path, folder) => { path = path + '/' + fixName(folder); if (!fs.existsSync(path)) { fs.mkdirSync(path); } return path; }, ''); } function fixName(name) { return name.replace(/\s+/g, '_'); } function getOutputPath(dir='') { return mkDir(path.resolve(path.join( opts['output-dir'], dir))); } ))); }

    User: farolanf

    Question: Add additional MERGE functionality to existing Node script

    Back to question