DNA sequence highlighting and enzyme site detection to Sequence Viewer & DNA_Utils Calculations

author: Nathan Fargo <32229490+ntfargo@users.noreply.github.com> 2024-06-13 20:28:36 +0200
committer: Nathan Fargo <32229490+ntfargo@users.noreply.github.com> 2024-06-13 20:28:36 +0200
commit: 4c0984e49d59052f9361179e23d5a3b74c4b5425 (patch)
tree: 53819f9164bda4ff65112cae678def915a7ce730
parent: 839616dc230800c9eb4d44baa9d3814c2916b999 (diff)
5 files changed, 135 insertions, 23 deletions
diff --git a/GenoFusion/utils/__init__.py b/GenoFusion/utils/__init__.py
index e7d4dc0..73d42e9 100644
--- a/GenoFusion/utils/__init__.py
+++ b/GenoFusion/utils/__init__.py
@@ -1 +1,2 @@
-from .functions import *
-\ No newline at end of file
+from .functions import *
+from .dna_utils import *
+\ No newline at end of file
diff --git a/GenoFusion/utils/dna_utils.py b/GenoFusion/utils/dna_utils.py
new file mode 100644
index 0000000..df3207b
--- /dev/null
+++ b/GenoFusion/utils/dna_utils.py
@@ -0,0 +1,41 @@
+def calculate_nucleotide_composition(sequence):
+    composition = {
+        'A': sequence.count('A'),
+        'T': sequence.count('T'),
+        'G': sequence.count('G'),
+        'C': sequence.count('C')
+    }
+    return composition
+
+def calculate_nucleotide_percentage(sequence):
+    length = len(sequence)
+    composition = calculate_nucleotide_composition(sequence)
+    percentage = {base: (count / length) * 100 for base, count in composition.items()}
+    return percentage
+
+def calculate_gc_content(sequence):
+    composition = calculate_nucleotide_composition(sequence)
+    gc_content = ((composition['G'] + composition['C']) / len(sequence)) * 100
+    return gc_content
+
+def reverse_sequence(sequence):
+    return sequence[::-1]
+
+def complement_sequence(sequence):
+    complement = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
+    return ''.join(complement[base] for base in sequence)
+
+def reverse_complement_sequence(sequence):
+    return complement_sequence(reverse_sequence(sequence))
+
+def get_sequence_properties(sequence):
+    properties = {
+        'length': len(sequence),
+        'nucleotide_composition': calculate_nucleotide_composition(sequence),
+        'nucleotide_percentage': calculate_nucleotide_percentage(sequence),
+        'gc_content': calculate_gc_content(sequence),
+        'reverse_sequence': reverse_sequence(sequence),
+        'complement_sequence': complement_sequence(sequence),
+        'reverse_complement_sequence': reverse_complement_sequence(sequence)
+    }
+    return properties
diff --git a/SequenceViewer/app/routes.py b/SequenceViewer/app/routes.py
index 0e39f74..4031421 100644
--- a/SequenceViewer/app/routes.py
+++ b/SequenceViewer/app/routes.py
@@ -1,8 +1,46 @@
 from flask import request, render_template, redirect, url_for
 from Bio import SeqIO
+from GenoFusion.Utils import get_sequence_properties
 import os
 from . import app
 
+
+# (TEMPLATE) Define the restriction enzymes and their recognition sites
+
+enzymes = {
+    "EcoRI": "GAATTC",
+    "BamHI": "GGATCC",
+    "HindIII": "AAGCTT",
+    "NotI": "GCGGCCGC"
+}
+
+def find_enzyme_sites(sequence, enzymes):
+    annotated_sequence = sequence
+    enzyme_sites = []
+
+    for enzyme, site in enzymes.items():
+        start = 0
+        while start < len(sequence):
+            start = sequence.find(site, start)
+            if start == -1:
+                break
+            enzyme_sites.append((enzyme, start, start + len(site)))
+            start += len(site)
+
+    enzyme_sites.sort(key=lambda x: x[1])
+
+    highlighted_sequence = ""
+    last_end = 0
+
+    for enzyme, start, end in enzyme_sites:
+        highlighted_sequence += annotated_sequence[last_end:start]
+        highlighted_sequence += f'<span class="enzyme">{annotated_sequence[start:end]}</span>'
+        last_end = end
+
+    highlighted_sequence += annotated_sequence[last_end:]
+
+    return highlighted_sequence, enzyme_sites
+
 @app.route('/')
 def index():
     return render_template('index.html')
@@ -29,20 +67,47 @@ def view_file(filename):
     with open(filepath, "r") as handle:
         if file_type in ['fasta', 'fa']:
             for record in SeqIO.parse(handle, "fasta"):
-                sequences.append(record)
+                sequence_str = str(record.seq)
+                highlighted_sequence, enzyme_sites = find_enzyme_sites(sequence_str, enzymes)
+                sequence_properties = get_sequence_properties(sequence_str)
+                sequences.append({
+                    "id": record.id,
+                    "highlighted_sequence": highlighted_sequence,
+                    "features": enzyme_sites,
+                    "properties": sequence_properties
+                })
         elif file_type in ['fastq']:
             for record in SeqIO.parse(handle, "fastq"):
-                sequences.append(record)
+                sequence_str = str(record.seq)
+                highlighted_sequence, enzyme_sites = find_enzyme_sites(sequence_str, enzymes)
+                sequence_properties = get_sequence_properties(sequence_str)
+                sequences.append({
+                    "id": record.id,
+                    "highlighted_sequence": highlighted_sequence,
+                    "features": enzyme_sites,
+                    "properties": sequence_properties
+                })
         elif file_type in ['gb', 'genbank']:
             for record in SeqIO.parse(handle, "genbank"):
-                sequences.append(record)
-    
-    # Convert sequences to a JSON-serializable format
+                sequence_str = str(record.seq)
+                highlighted_sequence, enzyme_sites = find_enzyme_sites(sequence_str, enzymes)
+                features = [f"{feature.type}: {feature.location}" for feature in record.features]
+                features.extend([f"{enzyme} site at {start}-{end}" for enzyme, start, end in enzyme_sites])
+                sequence_properties = get_sequence_properties(sequence_str)
+                sequences.append({
+                    "id": record.id,
+                    "highlighted_sequence": highlighted_sequence,
+                    "features": features,
+                    "properties": sequence_properties
+                })
+     
     sequences_data = [{
-        'id': str(record.id),
-        'description': str(record.description),
-        'sequence': str(record.seq),
-        'features': [feature.qualifiers for feature in record.features] if file_type in ['gb', 'genbank'] else []
-    } for record in sequences]
+        'id': sequence['id'],
+        'description': '',  # Description is not available in the current 'sequences' data
+        'sequence': sequence['highlighted_sequence'],
+        'features': sequence['features']
+    } for sequence in sequences]
+
+    # print(sequences_data)
 
     return render_template('view.html', sequences=sequences_data, filename=filename)
 \ No newline at end of file
diff --git a/SequenceViewer/app/templates/view.html b/SequenceViewer/app/templates/view.html
index 6b11721..4f1ef18 100644
--- a/SequenceViewer/app/templates/view.html
+++ b/SequenceViewer/app/templates/view.html
@@ -5,6 +5,11 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>View Sequences - {{ filename }}</title>
     <link rel="stylesheet" href="{{ url_for('static', filename='genview.css') }}">
+    <style>
+        .enzyme {
+            background-color: lightblue;
+        }
+    </style>
 </head>
 <body>
     <h1>Viewing Sequences from {{ filename }}</h1>
@@ -13,14 +18,16 @@
     {% for record in sequences %}
         <li>
             <strong>ID: {{ record.id }}</strong><br>
-            <pre>{{ record.sequence }}</pre>
+            <pre>{{ record.sequence|safe }}</pre>
             {% if record.features %}
-                Features:
-                <ul>
-                {% for feature in record.features %}
-                    <li>{{ feature }}</li>
-                {% endfor %}
-                </ul>
+                <details>
+                    <summary>Features:</summary>
+                    <ul>
+                    {% for feature in record.features %}
+                        <li>{{ feature }}</li>
+                    {% endfor %}
+                    </ul>
+                </details>
             {% endif %}
         </li>
     {% endfor %}
diff --git a/SequenceViewer/example.fasta b/SequenceViewer/example.fasta
index 3408631..eb93a22 100644
--- a/SequenceViewer/example.fasta
+++ b/SequenceViewer/example.fasta
@@ -1,10 +1,8 @@
 >Sequence_1
-AGCTAGCTAGCTACGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCGTAGCTAGCTAGCTAGCTA
+GGATCCGCGGCCGCAAGCTTGAATTCCGCGGCCGCAAGCTTGAATTCCGCGGCCGCAAGCTTGAATTCCGCGGCCGCAAGCTTGAATTC
 >Sequence_2
-CGTAGCTAGCTAGCTAGCTGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA
+AGCTTGCGGCCGCGGATCCAGCTTGAATTCGCGGCCGCGGATCCAGCTTGAATTCGCGGCCGCGGATCCAGCTTGAATTCGCGGCCGC
 >Sequence_3
 TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
 >Sequence_4
-ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
->Sequence_5
-GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAG
-\ No newline at end of file
+ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
+\ No newline at end of file
author	Nathan Fargo <32229490+ntfargo@users.noreply.github.com>	2024-06-13 20:28:36 +0200
committer	Nathan Fargo <32229490+ntfargo@users.noreply.github.com>	2024-06-13 20:28:36 +0200
commit	4c0984e49d59052f9361179e23d5a3b74c4b5425 (patch)
tree	53819f9164bda4ff65112cae678def915a7ce730
parent	839616dc230800c9eb4d44baa9d3814c2916b999 (diff)