! pip install Bio
! apt install clustalw
! apt install muscle
! apt install t-coffee

name_mapping = {'U00659': 'Sheep',
                'AY044828': 'Pig',
                'AY242098': 'Pig',
                'AY242100': 'Pig',
                'AY242101': 'Pig',
                'AY242109': 'Pig',
                'V00179': 'Dog',
                'J02989': 'Owl monkey',
                'AY138590': 'Human',
                'X61092': 'Grivet',
                'J00265': 'Human',
                'X61089': 'Chimpanzee',
                'K02233': 'Guinea pig',
                'X04725': 'Mouse',
                'AY438372': 'Chicken',
                'AF160192': 'Aplysia californica'}


# 1. Wykonanie ClustalW

#clustalw_cline = ClustalwCommandline(infile=input_fasta) #to juz nie jest wspierane
#clustalw_cline()

subprocess.run(["clustalw", "-infile=" + input_fasta], check=True)

#analogicznie:
subprocess.run(["muscle", "-in", input_fasta, "-clwout", muscle_output], check=True)
subprocess.run(["t_coffee", input_fasta, "-output", "clustalw_aln", "-outfile", tcoffee_output], check=True)
subprocess.run(["clustalw", "-infile=" + input_fasta, "-outfile="+clustalw_output], check=True)

# 2. Wczytanie uliniowienia
alignment = AlignIO.read(alignment_file, "clustal")

# 3. Wczytanie drzewa filogenetycznego
tree = Phylo.read(tree_file, "newick")


Zad3
# 2. Tlumaczenie sekwencji DNA na bialko (usuniecie kodonow stopu)
protein_seq = record.seq.translate(to_stop=True)  # Tlumaczenie na bialko
record.seq = protein_seq
record.description = record.description + " protein"  # Dodanie sufiksu do ID
SeqIO.write(record, output_handle, "fasta")



===========================================

from Bio import AlignIO, Phylo, SeqIO
import os
# from Bio.Align.Applications import ClustalwCommandline - ten modul nie jest dluzej wspierany
import subprocess

# Wczytywanie pliku FASTA i wypisywanie id sekwencji
fasta_file = "insulin.fa"
for record in SeqIO.parse(fasta_file, "fasta"):
    print(record.id)

def map_fasta_taxon(input_fasta, output_fasta, name_mapping):
    with open(output_fasta, "w") as out_handle:
        for record in SeqIO.parse(input_fasta, "fasta"):
            key = record.id.split(".")[0]  # Extract dictionary key
            taxon = name_mapping.get(key, "Unknown")  # Map to taxon
            record.description = taxon
            SeqIO.write(record, out_handle, "fasta")

#ZADANIE 1
name_mapping = {'U00659': 'Sheep',
                'AY044828': 'Pig',
                'AY242098': 'Pig',
                'AY242100': 'Pig',
                'AY242101': 'Pig',
                'AY242109': 'Pig',
                'V00179': 'Dog',
                'J02989': 'Owl monkey',
                'AY138590': 'Human',
                'X61092': 'Grivet',
                'J00265': 'Human',
                'X61089': 'Chimpanzee',
                'K02233': 'Guinea pig',
                'X04725': 'Mouse',
                'AY438372': 'Chicken',
                'AF160192': 'Aplysia californica'}

map_fasta_taxon("insulin.fa", "insulin_mapped.fa", name_mapping)

tree = Phylo.read(tree_file, "newick")

# 4. Identyfikacja redundantnych sekwencji na podstawie gałęzi o 0 długości
redundant_ids = set()
for clade in tree.get_terminals():
    if clade.branch_length == 0:
        redundant_ids.add(clade.name)