Create the required input in Phylip format from your dictionary and load it with StringIO
. Note that the The sequence/protein ID can be up to 10 characters long. IDs less than 10 characters must have spaces appended to them to reach the 10 character fixed width.
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
from Bio import AlignIO
from io import StringIO
dct = {'Alpha': 'AACGTGGCCACAT',
'Beta': 'AAGGTCGCCACAC',
'Gamma': 'CAGTTCGCCACAA',
'Delta': 'GAGATTTCCGCCT',
'Epsilon': 'GAGATCTCCGCCC'}
count = len(dct)
length = max(map(len, dct.values()))
msa = f" {count} {length}\n"
msa += '\n'.join(f"{prot_id:<10} {sequence}" for prot_id, sequence in dct.items())
print(msa)
print()
aln = AlignIO.read(StringIO(msa), 'phylip')
print(aln)
print()
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(aln)
print(dm)
print()
constructor = DistanceTreeConstructor(calculator, 'nj')
tree = constructor.build_tree(aln)
print(tree)
Output:
5 13
Alpha AACGTGGCCACAT
Beta AAGGTCGCCACAC
Gamma CAGTTCGCCACAA
Delta GAGATTTCCGCCT
Epsilon GAGATCTCCGCCC
SingleLetterAlphabet() alignment with 5 rows and 13 columns
AACGTGGCCACAT Alpha
AAGGTCGCCACAC Beta
CAGTTCGCCACAA Gamma
GAGATTTCCGCCT Delta
GAGATCTCCGCCC Epsilon
Alpha 0
Beta 0.23076923076923073 0
Gamma 0.3846153846153846 0.23076923076923073 0
Delta 0.5384615384615384 0.5384615384615384 0.5384615384615384 0
Epsilon 0.6153846153846154 0.3846153846153846 0.46153846153846156 0.15384615384615385 0
Alpha Beta Gamma Delta Epsilon
Tree(rooted=False)
Clade(branch_length=0, name='Inner3')
Clade(branch_length=0.18269230769230765, name='Alpha')
Clade(branch_length=0.04807692307692307, name='Beta')
Clade(branch_length=0.04807692307692307, name='Inner2')
Clade(branch_length=0.27884615384615385, name='Inner1')
Clade(branch_length=0.051282051282051266, name='Epsilon')
Clade(branch_length=0.10256410256410259, name='Delta')
Clade(branch_length=0.14423076923076922, name='Gamma')