ad 1
in colab: !pip install Bio

from Bio.Align import substitution_matrices

# Print available matrices
print(substitution_matrices.load())


ad 2

Jukes-Cantor model, discrete time

a) jukes_cantor_one_step(mu) #single parameter, all frequencies in the matrix the same

def jukes_cantor_one_step(mu):
    # fill with off-diagonal values
    matrix = np.full((4, 4), mu / 4)
    # set the diagonal
    for i in range(4):
        matrix[i, i] = 1 - 3 * mu / 4
    return matrix

def jukes_cantor_substitution_matrix(mu, t):
    # fill with off-diagonal values
    matrix = np.full((4, 4), 1/4 - 1/4*np.exp(-4*mu*t))
    # set the diagonal
    for i in range(4):
        matrix[i, i] = 1/4 + 3/4*np.exp(-4*mu*t)
    return matrix

jukes_cantor_one_step(0.1)
array([[0.925, 0.025, 0.025, 0.025],
       [0.025, 0.925, 0.025, 0.025],
       [0.025, 0.025, 0.925, 0.025],
       [0.025, 0.025, 0.025, 0.925]])

jukes_cantor_one_step(0.01)
array([[0.9925, 0.0025, 0.0025, 0.0025],
       [0.0025, 0.9925, 0.0025, 0.0025],
       [0.0025, 0.0025, 0.9925, 0.0025],
       [0.0025, 0.0025, 0.0025, 0.9925]])

jukes_cantor_one_step(1e-06) #see lecture, real example
array([[9.9999925e-01, 2.5000000e-07, 2.5000000e-07, 2.5000000e-07],
       [2.5000000e-07, 9.9999925e-01, 2.5000000e-07, 2.5000000e-07],
       [2.5000000e-07, 2.5000000e-07, 9.9999925e-01, 2.5000000e-07],
       [2.5000000e-07, 2.5000000e-07, 2.5000000e-07, 9.9999925e-01]])


b) Jukes-Cantor model, continuous time
jukes_cantor_substitution_matrix(mu, t)

jukes_cantor_substitution_matrix(0.1, 1)
array([[0.75274003, 0.08241999, 0.08241999, 0.08241999],
       [0.08241999, 0.75274003, 0.08241999, 0.08241999],
       [0.08241999, 0.08241999, 0.75274003, 0.08241999],
       [0.08241999, 0.08241999, 0.08241999, 0.75274003]])

jukes_cantor_substitution_matrix(0.1, 10)
array([[0.26373673, 0.24542109, 0.24542109, 0.24542109],
       [0.24542109, 0.26373673, 0.24542109, 0.24542109],
       [0.24542109, 0.24542109, 0.26373673, 0.24542109],
       [0.24542109, 0.24542109, 0.24542109, 0.26373673]])


jukes_cantor_substitution_matrix(0.01, 1)
array([[0.97059208, 0.00980264, 0.00980264, 0.00980264],
       [0.00980264, 0.97059208, 0.00980264, 0.00980264],
       [0.00980264, 0.00980264, 0.97059208, 0.00980264],
       [0.00980264, 0.00980264, 0.00980264, 0.97059208]])

jukes_cantor_substitution_matrix(0.01, 10)
array([[0.75274003, 0.08241999, 0.08241999, 0.08241999],
       [0.08241999, 0.75274003, 0.08241999, 0.08241999],
       [0.08241999, 0.08241999, 0.75274003, 0.08241999],
       [0.08241999, 0.08241999, 0.08241999, 0.75274003]])

jukes_cantor_substitution_matrix(0.01, 100)
array([[0.26373673, 0.24542109, 0.24542109, 0.24542109],
       [0.24542109, 0.26373673, 0.24542109, 0.24542109],
       [0.24542109, 0.24542109, 0.26373673, 0.24542109],
       [0.24542109, 0.24542109, 0.24542109, 0.26373673]])

For detailed explanation why array values approach 0.25 read:
https://en.wikipedia.org/wiki/Models_of_DNA_evolution#JC69_model_(Jukes_and_Cantor_1969)

Additional, functions could look like:
kimura_substitution_matrix(alpha, beta, t) #instead single parameter (mu), here we have alpha for transitions, beta for transversions

def kimura_substitution_matrix(alpha, beta, t):
    """Kimura (K80) substitution matrix generator"""
    matrix = np.full((4, 4), 0.25 - 0.25 * np.exp(-2 * (alpha + beta) * t))  # Transversions

    diag = 0.25 + 0.5 * np.exp(-4 * beta * t) + 0.25 * np.exp(-2 * (alpha + beta) * t)  # No substitution
    transition = 0.25 + 0.25 * np.exp(-2 * (alpha + beta) * t) - 0.5 * np.exp(-4 * beta * t)  # Transitions

    for i in range(4):
        matrix[i, i] = diag  # Set diagonal

    # Assign transitions (AâG and CâT)
    matrix[0, 2] = matrix[2, 0] = transition  # A â G
    matrix[1, 3] = matrix[3, 1] = transition  # C â T

    return matrix

kimura_substitution_matrix(0.1, 0.05, 1) #alpha tends to be higher than beta
array([[0.84456993, 0.06479544, 0.02583918, 0.06479544],
       [0.06479544, 0.84456993, 0.06479544, 0.02583918],
       [0.02583918, 0.06479544, 0.84456993, 0.06479544],
       [0.06479544, 0.02583918, 0.06479544, 0.84456993]])

see also: https://en.wikipedia.org/wiki/Models_of_DNA_evolution#K80_model_(Kimura_1980)

felsenstein_substitution_matrix(mu, t, pi)
# the main difference is that here we use the frequencies that are taken from the data, it means that the matiric does not heve to 
# be symetrical, but it is still ok

pi = np.array([0.29, 0.18, 0.31, 0.22])  # Example unequal base frequencies for A, C, G, T
felsenstein_substitution_matrix(0.01, 1, pi)
array([[0.9721605 , 0.29      , 0.29      , 0.29      ],
       [0.18      , 0.96784734, 0.18      , 0.18      ],
       [0.31      , 0.31      , 0.97294471, 0.31      ],
       [0.22      , 0.22      , 0.22      , 0.96941576]])

see also:
https://en.wikipedia.org/wiki/Models_of_DNA_evolution#F81_model_(Felsenstein_1981)


thus our markov chain function using jukes_cantor_substitution_matrix can look like:
markov_jc(seq, mu, t)

where
seq - sequence
mu - frequency of mutations
t - time
Inside of jc() function given mu and t, we create jukes_cantor_substitution_matrix(0.01, 1), and then we go through the whole sequence
and mutate accoding it

========================================================

Extra reading:
TreeTime: Maximum-likelihood phylodynamic analysis
https://doi.org/10.1093/ve/vex042
https://github.com/neherlab/treetime/blob/master/treetime/nuc_models.py

Markov Chains in Python: Beginner Tutorial
https://www.datacamp.com/tutorial/markov-chains-python-tutorial