! pip install Bio ! apt install clustalw ! apt install muscle ! apt install t-coffee name_mapping = {'U00659': 'Sheep', 'AY044828': 'Pig', 'AY242098': 'Pig', 'AY242100': 'Pig', 'AY242101': 'Pig', 'AY242109': 'Pig', 'V00179': 'Dog', 'J02989': 'Owl monkey', 'AY138590': 'Human', 'X61092': 'Grivet', 'J00265': 'Human', 'X61089': 'Chimpanzee', 'K02233': 'Guinea pig', 'X04725': 'Mouse', 'AY438372': 'Chicken', 'AF160192': 'Aplysia californica'} # 1. Wykonanie ClustalW #clustalw_cline = ClustalwCommandline(infile=input_fasta) #to juz nie jest wspierane #clustalw_cline() subprocess.run(["clustalw", "-infile=" + input_fasta], check=True) #analogicznie: subprocess.run(["muscle", "-in", input_fasta, "-clwout", muscle_output], check=True) subprocess.run(["t_coffee", input_fasta, "-output", "clustalw_aln", "-outfile", tcoffee_output], check=True) subprocess.run(["clustalw", "-infile=" + input_fasta, "-outfile="+clustalw_output], check=True) # 2. Wczytanie uliniowienia alignment = AlignIO.read(alignment_file, "clustal") # 3. Wczytanie drzewa filogenetycznego tree = Phylo.read(tree_file, "newick") Zad3 # 2. Tlumaczenie sekwencji DNA na bialko (usuniecie kodonow stopu) protein_seq = record.seq.translate(to_stop=True) # Tlumaczenie na bialko record.seq = protein_seq record.description = record.description + " protein" # Dodanie sufiksu do ID SeqIO.write(record, output_handle, "fasta") =========================================== from Bio import AlignIO, Phylo, SeqIO import os # from Bio.Align.Applications import ClustalwCommandline - ten modul nie jest dluzej wspierany import subprocess # Wczytywanie pliku FASTA i wypisywanie id sekwencji fasta_file = "insulin.fa" for record in SeqIO.parse(fasta_file, "fasta"): print(record.id) def map_fasta_taxon(input_fasta, output_fasta, name_mapping): with open(output_fasta, "w") as out_handle: for record in SeqIO.parse(input_fasta, "fasta"): key = record.id.split(".")[0] # Extract dictionary key taxon = name_mapping.get(key, "Unknown") # Map to taxon record.description = taxon SeqIO.write(record, out_handle, "fasta") #ZADANIE 1 name_mapping = {'U00659': 'Sheep', 'AY044828': 'Pig', 'AY242098': 'Pig', 'AY242100': 'Pig', 'AY242101': 'Pig', 'AY242109': 'Pig', 'V00179': 'Dog', 'J02989': 'Owl monkey', 'AY138590': 'Human', 'X61092': 'Grivet', 'J00265': 'Human', 'X61089': 'Chimpanzee', 'K02233': 'Guinea pig', 'X04725': 'Mouse', 'AY438372': 'Chicken', 'AF160192': 'Aplysia californica'} map_fasta_taxon("insulin.fa", "insulin_mapped.fa", name_mapping) tree = Phylo.read(tree_file, "newick") # 4. Identyfikacja redundantnych sekwencji na podstawie gaĹÄzi o 0 dĹugoĹci redundant_ids = set() for clade in tree.get_terminals(): if clade.branch_length == 0: redundant_ids.add(clade.name)