summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Fargo <32229490+ntfargo@users.noreply.github.com>2024-06-13 20:28:36 +0200
committerNathan Fargo <32229490+ntfargo@users.noreply.github.com>2024-06-13 20:28:36 +0200
commit4c0984e49d59052f9361179e23d5a3b74c4b5425 (patch)
tree53819f9164bda4ff65112cae678def915a7ce730
parent839616dc230800c9eb4d44baa9d3814c2916b999 (diff)
DNA sequence highlighting and enzyme site detection to Sequence Viewer & DNA_Utils Calculations
-rw-r--r--GenoFusion/utils/__init__.py3
-rw-r--r--GenoFusion/utils/dna_utils.py41
-rw-r--r--SequenceViewer/app/routes.py85
-rw-r--r--SequenceViewer/app/templates/view.html21
-rw-r--r--SequenceViewer/example.fasta8
5 files changed, 135 insertions, 23 deletions
diff --git a/GenoFusion/utils/__init__.py b/GenoFusion/utils/__init__.py
index e7d4dc0..73d42e9 100644
--- a/GenoFusion/utils/__init__.py
+++ b/GenoFusion/utils/__init__.py
@@ -1 +1,2 @@
-from .functions import * \ No newline at end of file
+from .functions import *
+from .dna_utils import * \ No newline at end of file
diff --git a/GenoFusion/utils/dna_utils.py b/GenoFusion/utils/dna_utils.py
new file mode 100644
index 0000000..df3207b
--- /dev/null
+++ b/GenoFusion/utils/dna_utils.py
@@ -0,0 +1,41 @@
+def calculate_nucleotide_composition(sequence):
+ composition = {
+ 'A': sequence.count('A'),
+ 'T': sequence.count('T'),
+ 'G': sequence.count('G'),
+ 'C': sequence.count('C')
+ }
+ return composition
+
+def calculate_nucleotide_percentage(sequence):
+ length = len(sequence)
+ composition = calculate_nucleotide_composition(sequence)
+ percentage = {base: (count / length) * 100 for base, count in composition.items()}
+ return percentage
+
+def calculate_gc_content(sequence):
+ composition = calculate_nucleotide_composition(sequence)
+ gc_content = ((composition['G'] + composition['C']) / len(sequence)) * 100
+ return gc_content
+
+def reverse_sequence(sequence):
+ return sequence[::-1]
+
+def complement_sequence(sequence):
+ complement = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
+ return ''.join(complement[base] for base in sequence)
+
+def reverse_complement_sequence(sequence):
+ return complement_sequence(reverse_sequence(sequence))
+
+def get_sequence_properties(sequence):
+ properties = {
+ 'length': len(sequence),
+ 'nucleotide_composition': calculate_nucleotide_composition(sequence),
+ 'nucleotide_percentage': calculate_nucleotide_percentage(sequence),
+ 'gc_content': calculate_gc_content(sequence),
+ 'reverse_sequence': reverse_sequence(sequence),
+ 'complement_sequence': complement_sequence(sequence),
+ 'reverse_complement_sequence': reverse_complement_sequence(sequence)
+ }
+ return properties
diff --git a/SequenceViewer/app/routes.py b/SequenceViewer/app/routes.py
index 0e39f74..4031421 100644
--- a/SequenceViewer/app/routes.py
+++ b/SequenceViewer/app/routes.py
@@ -1,8 +1,46 @@
from flask import request, render_template, redirect, url_for
from Bio import SeqIO
+from GenoFusion.Utils import get_sequence_properties
import os
from . import app
+
+# (TEMPLATE) Define the restriction enzymes and their recognition sites
+
+enzymes = {
+ "EcoRI": "GAATTC",
+ "BamHI": "GGATCC",
+ "HindIII": "AAGCTT",
+ "NotI": "GCGGCCGC"
+}
+
+def find_enzyme_sites(sequence, enzymes):
+ annotated_sequence = sequence
+ enzyme_sites = []
+
+ for enzyme, site in enzymes.items():
+ start = 0
+ while start < len(sequence):
+ start = sequence.find(site, start)
+ if start == -1:
+ break
+ enzyme_sites.append((enzyme, start, start + len(site)))
+ start += len(site)
+
+ enzyme_sites.sort(key=lambda x: x[1])
+
+ highlighted_sequence = ""
+ last_end = 0
+
+ for enzyme, start, end in enzyme_sites:
+ highlighted_sequence += annotated_sequence[last_end:start]
+ highlighted_sequence += f'<span class="enzyme">{annotated_sequence[start:end]}</span>'
+ last_end = end
+
+ highlighted_sequence += annotated_sequence[last_end:]
+
+ return highlighted_sequence, enzyme_sites
+
@app.route('/')
def index():
return render_template('index.html')
@@ -29,20 +67,47 @@ def view_file(filename):
with open(filepath, "r") as handle:
if file_type in ['fasta', 'fa']:
for record in SeqIO.parse(handle, "fasta"):
- sequences.append(record)
+ sequence_str = str(record.seq)
+ highlighted_sequence, enzyme_sites = find_enzyme_sites(sequence_str, enzymes)
+ sequence_properties = get_sequence_properties(sequence_str)
+ sequences.append({
+ "id": record.id,
+ "highlighted_sequence": highlighted_sequence,
+ "features": enzyme_sites,
+ "properties": sequence_properties
+ })
elif file_type in ['fastq']:
for record in SeqIO.parse(handle, "fastq"):
- sequences.append(record)
+ sequence_str = str(record.seq)
+ highlighted_sequence, enzyme_sites = find_enzyme_sites(sequence_str, enzymes)
+ sequence_properties = get_sequence_properties(sequence_str)
+ sequences.append({
+ "id": record.id,
+ "highlighted_sequence": highlighted_sequence,
+ "features": enzyme_sites,
+ "properties": sequence_properties
+ })
elif file_type in ['gb', 'genbank']:
for record in SeqIO.parse(handle, "genbank"):
- sequences.append(record)
-
- # Convert sequences to a JSON-serializable format
+ sequence_str = str(record.seq)
+ highlighted_sequence, enzyme_sites = find_enzyme_sites(sequence_str, enzymes)
+ features = [f"{feature.type}: {feature.location}" for feature in record.features]
+ features.extend([f"{enzyme} site at {start}-{end}" for enzyme, start, end in enzyme_sites])
+ sequence_properties = get_sequence_properties(sequence_str)
+ sequences.append({
+ "id": record.id,
+ "highlighted_sequence": highlighted_sequence,
+ "features": features,
+ "properties": sequence_properties
+ })
+
sequences_data = [{
- 'id': str(record.id),
- 'description': str(record.description),
- 'sequence': str(record.seq),
- 'features': [feature.qualifiers for feature in record.features] if file_type in ['gb', 'genbank'] else []
- } for record in sequences]
+ 'id': sequence['id'],
+ 'description': '', # Description is not available in the current 'sequences' data
+ 'sequence': sequence['highlighted_sequence'],
+ 'features': sequence['features']
+ } for sequence in sequences]
+
+ # print(sequences_data)
return render_template('view.html', sequences=sequences_data, filename=filename) \ No newline at end of file
diff --git a/SequenceViewer/app/templates/view.html b/SequenceViewer/app/templates/view.html
index 6b11721..4f1ef18 100644
--- a/SequenceViewer/app/templates/view.html
+++ b/SequenceViewer/app/templates/view.html
@@ -5,6 +5,11 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>View Sequences - {{ filename }}</title>
<link rel="stylesheet" href="{{ url_for('static', filename='genview.css') }}">
+ <style>
+ .enzyme {
+ background-color: lightblue;
+ }
+ </style>
</head>
<body>
<h1>Viewing Sequences from {{ filename }}</h1>
@@ -13,14 +18,16 @@
{% for record in sequences %}
<li>
<strong>ID: {{ record.id }}</strong><br>
- <pre>{{ record.sequence }}</pre>
+ <pre>{{ record.sequence|safe }}</pre>
{% if record.features %}
- Features:
- <ul>
- {% for feature in record.features %}
- <li>{{ feature }}</li>
- {% endfor %}
- </ul>
+ <details>
+ <summary>Features:</summary>
+ <ul>
+ {% for feature in record.features %}
+ <li>{{ feature }}</li>
+ {% endfor %}
+ </ul>
+ </details>
{% endif %}
</li>
{% endfor %}
diff --git a/SequenceViewer/example.fasta b/SequenceViewer/example.fasta
index 3408631..eb93a22 100644
--- a/SequenceViewer/example.fasta
+++ b/SequenceViewer/example.fasta
@@ -1,10 +1,8 @@
>Sequence_1
-AGCTAGCTAGCTACGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCGTAGCTAGCTAGCTAGCTA
+GGATCCGCGGCCGCAAGCTTGAATTCCGCGGCCGCAAGCTTGAATTCCGCGGCCGCAAGCTTGAATTCCGCGGCCGCAAGCTTGAATTC
>Sequence_2
-CGTAGCTAGCTAGCTAGCTGATCGTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA
+AGCTTGCGGCCGCGGATCCAGCTTGAATTCGCGGCCGCGGATCCAGCTTGAATTCGCGGCCGCGGATCCAGCTTGAATTCGCGGCCGC
>Sequence_3
TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
>Sequence_4
-ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
->Sequence_5
-GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAG \ No newline at end of file
+ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA \ No newline at end of file