Modify nodejs solution for new file type - Audio transcript to Chord D3js file
New here? Learn about Bountify and follow @bountify to get notified of new bounties! x

We have this solution posted here -

https://bountify.co/nodejs-circular-chord-diagram-from-audio-transcript

we have been working to see how to materialize this solution and found another better file format that we should attempt to use. AWS reliably outputs this json file format which will replace the more simple txt file that the other bounty used. this bounty is to fork the previous solution and make it work with the json file

https://crts-assets.s3.amazonaws.com/transcribe/asrOutput%20%282%29.json

awarded to Twann
Tags
nodejs

Crowdsource coding tasks.

1 Solution

Winning solution

The following should work.
The script reads 'asrOutput.json' and renders it as HTML in the file 'chorddiagram.html'.

let fs = require('fs')

let getSpeakerData = segment => {
  let name = segment.speaker_label
  let time = segment.start_time
  let wordCount = segment.items.length
  return {name, time, wordCount}
}


let countSpeakers = speakerDatas => {
  let speakers = []
  let count = 0
  for (let {name} of speakerDatas) {
    if (!speakers.includes(name))
    {
      speakers.push(name)  
      count += 1
    }
  }
  return [count, speakers]
}  


let computeInteractions = speakerDatas => {
  let [speakerCount, names] = countSpeakers(speakerDatas)

  let interactionMatrix
    = Array(speakerCount)
      .fill(undefined)
      .map(_ => Array(speakerCount).fill(0))

  for (let i = 1; i < speakerDatas.length; i++)
  {
    let prevName = speakerDatas[i - 1].name
    let currName = speakerDatas[i].name

    if (prevName != currName) // we assume that a speaker speaking twice isn't speaking to itself
    {
      let prevSpeakerIndex = names.findIndex(name => name == prevName)
      let currSpeakerIndex = names.findIndex(name => name == currName)
      let wordCount = speakerDatas[i].wordCount

      interactionMatrix[currSpeakerIndex][prevSpeakerIndex] += wordCount
    }
    else // therefore it can be considered as a one time speech 
    {
      speakerDatas[i].wordCount += speakerDatas[i - 1].wordCount
    }
  }

  return [names, interactionMatrix]
}


let generateHTML = (names, interactionMatrix) =>
`<!DOCTYPE html>
<html>

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width">
  <title>Chord Diagram from Audio Transcript</title>
</head>

<body>
  <div id="diagram"></div>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/d3-chord/3.0.0/d3-chord.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/7.3.0/d3.min.js"></script>
  <script>
    let [names, matrix] = ${JSON.stringify([names,interactionMatrix])}

let chart=(speakerNames,interactionMatrix)=>{const width=window.innerWidth;const height=window.innerHeight;const svg=d3.create("svg").attr("viewBox",[-width/2,-height/2,width,height]);const data=Object.assign(interactionMatrix,{names:speakerNames,colors:["#43AA8B","#F94144","#F3722C","#ec1d25","#c8125c","#008fc8","#10218b","#134b24","#737373"].slice(0,speakerNames.length-1)});const chords=d3.chord().padAngle(10/350).sortSubgroups(d3.descending).sortChords(d3.descending)(data);const names=data.names===undefined?d3.range(data.length):data.names;const colors=data.colors===undefined?d3.quantize(d3.interpolateRainbow,names.length):data.colors;const color=d3.scaleOrdinal(names,colors);const outerRadius=Math.min(width,height)*0.5-60;const innerRadius=outerRadius-10;const arc=d3.arc().innerRadius(innerRadius).outerRadius(outerRadius);const formatValue=d3.format("1");const tickStep=d3.tickStep(0,d3.sum(data.flat()),100);function ticks({startAngle,endAngle,value}){const k=(endAngle-startAngle)/value;return d3.range(0,value,tickStep).map(value=>{return{value,angle:value*k+startAngle}})}const ribbon=d3.ribbon().radius(innerRadius-1).padAngle(1/innerRadius);const group=svg.append("g").attr("font-size",10).attr("font-family","sans-serif").selectAll("g").data(chords.groups).join("g");group.append("path").attr("fill",d=>color(names[d.index])).attr("d",arc);group.append("title").text(d=>${'`${names[d.index]}'}
${'${formatValue(d.value)}`'});const groupTick=group.append("g").selectAll("g").data(ticks).join("g").attr("transform",d=>${'`rotate(${d.angle*180/Math.PI-90}) translate(${ outerRadius },0)`'});groupTick.append("line").attr("stroke","currentColor").attr("x2",6);groupTick.append("text").attr("x",8).attr("dy","0.35em").attr("transform",d=>d.angle>Math.PI?"rotate(180) translate(-16)":null).attr("text-anchor",d=>d.angle>Math.PI?"end":null).text(d=>formatValue(d.value)+' words');group.select("text").attr("font-weight","bold").text(function(d){return this.getAttribute("text-anchor")==="end"?${'`↑ ${names[d.index]}`:`${names[d.index]} ↓`}'});svg.append("g").attr("fill-opacity",0.8).selectAll("path").data(chords).join("path").style("mix-blend-mode","multiply").on("mouseenter",d=>{d3.select(d.path[1]).attr("fill-opacity",0.3);d3.select(d.path[0]).attr("fill-opacity", 1);}).on("mouseleave",d=>{d3.select(d.path[1]).attr("fill-opacity", 1);d3.select(d.path[0]).attr("fill-opacity", null);}).attr("fill",d=>color(names[d.source.index])).attr("d",ribbon).append("title").text(d=>${'`${names[d.target.index]} spoke ${formatValue(d.source.value)} words to ${names[d.source.index]}${d.source.index===d.target.index?"":`\n${names[d.source.index]} spoke ${formatValue(d.target.value)} words to ${names[d.target.index]}`}`'});return svg.node()}

d3.select(window)
  .on("resize", function() {
    let diagram = document.getElementById('diagram')
    diagram.innerHTML = ''
    diagram.appendChild(chart(names, matrix))
  });


document.getElementById('diagram').appendChild(chart(names, matrix))
  </script>

</body>

</html>`


let renderTranscript = transcriptFilePath => {
  fs.readFile(transcriptFilePath, 'utf8', (err, data) => {
    if (err) console.log(err)


    let json = JSON.parse(data)



    let speakerDatas = json.results.speaker_labels.segments.map(getSpeakerData)
    let [names, matrix] = computeInteractions(speakerDatas)

    fs.writeFile('chorddiagram.html', generateHTML(names, matrix), 'utf8', err => {
      if (err) return console.log(err);
      console.log('`chorddiagram.html` generated.');
    })
  })
}

// Reads 'asrOutput.json' and renders it as HTML in the file 'chorddiagram.html'
renderTranscript('asrOutput.json')