Action | Code Sample |
---|---|
Load | import rstoolbox as rs import matplotlib.pyplot as plt import seaborn as sns |
Read | # Load design population. A description dictionary can be provided to alter the # information loaded from the silent file. In this case, we load all the # sequence information available for all possible chains in the decoys. df = rs.io.parse_rosetta_file(‘1kx8gen2.silent.gz’, {‘sequence’: ‘*’}) |
# Select the top 5% designs by score and obtain the residues # overrepresented by more than 20% df_top = df[df[‘score’] < df[‘score’].quantile(0.05)] freq_top = rs.analysis.sequential_frequencies(df_top, ‘A’, ‘sequence’, ‘protein’) freq_all = df .sequence_frequencies (‘A’) # shortcut to utils.sequential_frequencies freq_diff = (top - freq) muts = freq_diff[(freq_diff.T > 0.20).any()].idxmax(axis = 1) muts = list(zip(muts.index, muts.values)) | |
# Select the best scored sequence that does NOT contain ANY of those residues pick = df .get_sequence_with (‘A’, muts, confidence = 0.25,                                                                                   invert = True).sort_values(‘score’).iloc[:1] # Setting a reference sequence in a DesignFrame allows to use this sequence as # source for mutant generation and sequence comparison, amongst others. seq = pick.iloc[0 ].get_sequence (‘A’) pick .add_reference_sequence (‘A’, seq) | |
# Generate mutants based on the identified overrepresented variants: # 1. Create a list with positions and residue type expected in each position muts = [(muts[i][0], muts[i] [1] + seq[muts[i][0] - 1]) for i in range (len(muts))] # 2 Generate a DesignFrame containing the new expected sequences variants = pick .generate_mutant_variants (‘A’, muts) variants .add_reference_sequence (‘A’, seq) # 3. Generate the resfiles that will guide the mutagenesis variants = variants .make_resfile (‘A’, ‘NATAA’, ‘mutants.resfile’) # 4. With Rosetta installed, we can automatically run those resfiles. variants = variants .apply_resfile (‘A’, ‘variants.silent’) variants = variants .identify_mutants (‘A’) | |
Plot | fig = plt.figure(figsize = (170 / 25.4, 170 / 25.4)) grid = (3, 4) |
# Visualize overrepresented residues in the top 5% ax = plt.subplot2grid(grid, (0, 0), colspan = 4, rowspan = 4) cbar_ax = plt.subplot2grid(grid, (4, 0), colspan = 4, rowspan = 1) sns.heatmap(freq_diff.T, ax = ax, vmin = 0, cbar_ax = cbar_ax) rs.utils.add_top_title (ax, ‘Top scoring enrichment’) | |
# Compare query positions: initial sequence vs. mutant generation ax = plt.subplot2grid(grid, (5, 0), colspan = 2, rowspan = 2) key_res = [mutants[0] for mutants in muts] rs.plot.logo_plot_in_axis (pick, ‘A’, ax = ax, _residueskr) ax = plt.subplot2grid(grid, (5, 2), colspan = 2, rowspan = 2) rs.plot.logo_plot_in_axis (variants, ‘A’, ax = ax, key_residues = kr) | |
# Check which mutations perform better ax = plt.subplot2grid(grid, (7, 0), colspan = 2, rowspan = 3) sns.scatterplot(‘mutant_count_A’, ‘score’, data = variants, ax = ax) # Show distribution of best performing decoys ax = plt.subplot2grid(grid, (7, 2), fig = fig, colspan = 2, rowspan = 3) rs.plot.logo_plot_in_axis (variants.sort_values(‘score’).head(3), ‘A’, ax = ax, key_residues = kr) plt.tight_layout() plt.savefig(‘BMC_Fig3.png’, dpi = 300) |