Nodejs Circular Chord Diagram from audio transcript
New here? Learn about Bountify and follow @bountify to get notified of new bounties! x

We are working to get a proof of concept made that will parse a transcript txt file and output a visualization of it. The premise is that each speaker in the file should be weighted relative to the other speakers by the number of words they said. Second we will weight the interactions between speakers by assuming if speaker 1 talks and then speaker 2 then there was an interaction between the two of them. here is a visualization similar to what we are doing -

using the above, we would say speaker B talked to the most and that speaker F seems to have spoken most to Speaker W.

I think you can use D3 for the chord diagram but i am open to anything open source. the final output should be the diamgram with interactivity in that we can hover on a speaker to see its interactions. Not sure if possible but it would be awesome if when you hover you can get labels showing the qty as seen on this page -

code should be nodejs and setup on so its easy to test. Ideally you can upload the txt file and it will parse it and output the html page. speaker qty will be dynamic but you can assume some logical bounds if needed like less than 25. here is a sample txt file for you to use.

awarded to Twann

Crowdsource coding tasks.

1 Solution

Winning solution

Update 4th april 2022: made it width and height responsive
Update 4th april 2022 #2: gave each chord a highlight when the mouse is over it.

This renders the contents of 'transcript.txt' (in as the HTML file 'chorddiagram.txt', press run in to generate the HTML file.
If you would prefer the script to be a command line utility, for example

node diagramTranscript transcript28.04.2022.txt diagram.html // generates an html file
node diagramTranscript transcript28.04.2022.txt diagram.svg  // generates an svg file

I could add it without too much effort.
In general tell me if you need any simple change.

    let fs = require('fs')

let getSpeakerData = line => {
  let lines = line.split('\n')
  let [nameText, timeText] = lines[0].split('(')
  let time = timeText.slice(0, timeText.length - 2)
  let name = nameText.trim()
  let wordCount = lines.splice(1, lines.length).concat().reduce((a,b) => a + b).split(' ').length
  return {name, time, wordCount}

let countSpeakers = speakerDatas => {
  let speakers = []
  let count = 0
  for (let {name} of speakerDatas) {
    if (!speakers.includes(name))
      count += 1
  return [count, speakers]

let computeInteractions = speakerDatas => {
  let [speakerCount, names] = countSpeakers(speakerDatas)

  let interactionMatrix
    = Array(speakerCount)
      .map(_ => Array(speakerCount).fill(0))

  for (let i = 1; i < speakerDatas.length; i++)
    let prevName = speakerDatas[i - 1].name
    let currName = speakerDatas[i].name

    if (prevName != currName) // we assume that a speaker speaking twice isn't speaking to itself
      let prevSpeakerIndex = names.findIndex(name => name == prevName)
      let currSpeakerIndex = names.findIndex(name => name == currName)
      let wordCount = speakerDatas[i].wordCount

      interactionMatrix[currSpeakerIndex][prevSpeakerIndex] += wordCount
    else // therefore it can be considered as a one time speech 
      speakerDatas[i].wordCount += speakerDatas[i - 1].wordCount

  return [names, interactionMatrix]

let generateHTML = (names, interactionMatrix) =>
`<!DOCTYPE html>

  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width">
  <title>Chord Diagram from Audio Transcript</title>

  <div id="diagram"></div>
  <script src=""></script>
<script src=""></script>
    let [names, matrix] = ${JSON.stringify([names,interactionMatrix])}

let chart=(speakerNames,interactionMatrix)=>{const width=window.innerWidth;const height=window.innerHeight;const svg=d3.create("svg").attr("viewBox",[-width/2,-height/2,width,height]);const data=Object.assign(interactionMatrix,{names:speakerNames,colors:["#43AA8B","#F94144","#F3722C","#ec1d25","#c8125c","#008fc8","#10218b","#134b24","#737373"].slice(0,speakerNames.length-1)});const chords=d3.chord().padAngle(10/350).sortSubgroups(d3.descending).sortChords(d3.descending)(data);const names=data.names===undefined?d3.range(data.length):data.names;const colors=data.colors===undefined?d3.quantize(d3.interpolateRainbow,names.length):data.colors;const color=d3.scaleOrdinal(names,colors);const outerRadius=Math.min(width,height)*0.5-60;const innerRadius=outerRadius-10;const arc=d3.arc().innerRadius(innerRadius).outerRadius(outerRadius);const formatValue=d3.format("1");const tickStep=d3.tickStep(0,d3.sum(data.flat()),100);function ticks({startAngle,endAngle,value}){const k=(endAngle-startAngle)/value;return d3.range(0,value,tickStep).map(value=>{return{value,angle:value*k+startAngle}})}const ribbon=d3.ribbon().radius(innerRadius-1).padAngle(1/innerRadius);const group=svg.append("g").attr("font-size",10).attr("font-family","sans-serif").selectAll("g").data(chords.groups).join("g");group.append("path").attr("fill",d=>color(names[d.index])).attr("d",arc);group.append("title").text(d=>${'`${names[d.index]}'}
${'${formatValue(d.value)}`'});const groupTick=group.append("g").selectAll("g").data(ticks).join("g").attr("transform",d=>${'`rotate(${d.angle*180/Math.PI-90}) translate(${ outerRadius },0)`'});groupTick.append("line").attr("stroke","currentColor").attr("x2",6);groupTick.append("text").attr("x",8).attr("dy","0.35em").attr("transform",d=>d.angle>Math.PI?"rotate(180) translate(-16)":null).attr("text-anchor",d=>d.angle>Math.PI?"end":null).text(d=>formatValue(d.value)+' words');"text").attr("font-weight","bold").text(function(d){return this.getAttribute("text-anchor")==="end"?${'`↑ ${names[d.index]}`:`${names[d.index]} ↓`}'});svg.append("g").attr("fill-opacity",0.8).selectAll("path").data(chords).join("path").style("mix-blend-mode","multiply").on("mouseenter",d=>{[1]).attr("fill-opacity",0.3);[0]).attr("fill-opacity", 1);}).on("mouseleave",d=>{[1]).attr("fill-opacity", 1);[0]).attr("fill-opacity", null);}).attr("fill",d=>color(names[d.source.index])).attr("d",ribbon).append("title").text(d=>${'`${names[]} spoke ${formatValue(d.source.value)} words to ${names[d.source.index]}${"":`\n${names[d.source.index]} spoke ${formatValue(} words to ${names[]}`}`'});return svg.node()}
  .on("resize", function() {
    let diagram = document.getElementById('diagram')
    diagram.innerHTML = ''
    diagram.appendChild(chart(names, matrix))

document.getElementById('diagram').appendChild(chart(names, matrix))



let renderTranscript = transcriptFilePath => {
  fs.readFile(transcriptFilePath, 'utf8', (err, data) => {
    if (err)

    let lines = data.split('\n')
    if (lines[0][lines[0].length - 1] == '\r'){
        lines = data.split('\r\n\r\n')
    else {
        lines = data.split('\n\n')

    let speakerDatas =
    let [names, matrix] = computeInteractions(speakerDatas)

    fs.writeFile('chorddiagram.html', generateHTML(names, matrix), 'utf8', err => {
      if (err) return console.log(err);
      console.log('`chorddiagram.html` generated.');

// Reads 'transcript.txt' and renders it as HTML in the file 'chorddiagram.html'
hey this is great, 2 things. 1 can we make it interactive like this version when you hover a speaker it will show the paths and make the others less opaque. 2 can we make the chart responsive to the page? right now it seems to want to be a specific size so its hard to see it all in the window (this is less a big deal)
Qdev 1 month ago
ah its responsive on width, can we set it to width and height. thx!
Qdev 1 month ago
working on it!
Twann 1 month ago
finished, should be working as intended now!
Twann 1 month ago