import { useCallback, useMemo, useState } from 'react'

const example = `GenBank deposition: the sequences of tickborne pathogens obtained in this study
were deposited in GenBank with accession numbers: A. bovis rrs: MW275984–MW275987
and MN148605, and groEL: MW226869; E. ewingii rrs MN148606–MN148617, and gltA:
MW226861–MW226866; C. burnetti icd: MW226857– MW226860, and omp: MW226877–
MW226880. Rickettsia 17-kDa protein gene: MW226870–MW226876, gltA: MW226867 and
MW226868, rrs: MW275981–MW275983, and ompA MW265948`

const buildURL = (accn) => {
  if (accn.startsWith('PR')) {
    return `https://www.ncbi.nlm.nih.gov/bioproject/?term=${encodeURIComponent(
      accn
    )}`
  }

  if (accn.startsWith('SA')) {
    return `https://www.ncbi.nlm.nih.gov/biosample/?term=${encodeURIComponent(
      `${accn}[accn]`
    )}`
  }

  if (accn.startsWith('SR')) {
    return `https://www.ncbi.nlm.nih.gov/sra/?term=${encodeURIComponent(
      `${accn}[accn]`
    )}`
  }

  return `https://www.ncbi.nlm.nih.gov/nuccore/?term=${encodeURIComponent(
    `${accn}[accn]`
  )}`
}

const parseData = (input) => {
  // https://www.ncbi.nlm.nih.gov/genbank/acc_prefix/
  const matches = Array.from(
    input.matchAll(
      /\b((?:[A-Z]{1,6}_?\d{5,9})(?:(?:\s*[–—-]+\s*|\s+to\s+)[A-Z]{1,6}_?\d{5,9})?)\b/g
    ),
    (matches) => {
      console.log(matches)
      return matches[1].replace(/\s+to\s+/, ':').replace(/\W+/, ':')
    }
  )

  return Array.from(new Set(matches)).sort()
}

const buildQuery = (items) => {
  const url = new URL('https://www.ncbi.nlm.nih.gov/nuccore/')
  const term = items.map((accn) => `${accn}[accn]`).join(' OR ')
  url.searchParams.set('term', term)
  return url
}

export default function App() {
  const [value, setValue] = useState('')

  const items = useMemo(() => {
    return parseData(value)
  }, [value])

  const setExample = useCallback(() => {
    setValue(example)
  }, [])

  return (
    <div style={{ fontFamily: 'Nunito, sans-serif' }}>
      <h1>Extract GenBank identifiers</h1>

      <p>
        This tool will attempt to find{' '}
        <a href="https://www.ncbi.nlm.nih.gov/genbank/acc_prefix/">
          GenBank accession numbers
        </a>{' '}
        in text and convert them to links.
      </p>

      <textarea
        autoFocus
        rows={10}
        style={{ width: '100%', marginBottom: 5, boxSizing: 'border-box' }}
        value={value}
        onChange={(event) => setValue(event.target.value)}
        placeholder="Enter text containing GenBank identifiers"
      />

      {!value && (
        <div style={{ display: 'flex', justifyContent: 'flex-end' }}>
          <button type="button" onClick={setExample}>
            load an example
          </button>
        </div>
      )}

      <ul style={{ listStyle: 'none', paddingLeft: 0 }}>
        {items.map((item) => (
          <li key={item}>
            <a href={buildURL(item)} target="_blank" rel="noreferrer">
              {item}
            </a>
          </li>
        ))}
      </ul>

      {items.length > 0 && (
        <p>
          <a href={buildQuery(items)} target="_blank" rel="noreferrer">
            search
          </a>
        </p>
      )}
    </div>
  )
}
