创建并连接mas特征

蛋白质extra_msa特征已经经过了nearest_neighbor_clusters,summarize_clusters等处理。通过make_msa_feat函数创建 protein['msa_feat']以及 protein['target_feat']特征 。

import tensorflow as tf
import numpy as np


def data_transforms_curry1(f):
  """Supply all arguments but the first."""

  def fc(*args, **kwargs):
    return lambda x: f(x, *args, **kwargs)

  return fc


@data_transforms_curry1
def make_msa_feat(protein):
  """Create and concatenate MSA features."""
  # Whether there is a domain break. Always zero for chains, but keeping
  # for compatibility with domain datasets.
  has_break = tf.clip_by_value(
      tf.cast(protein['between_segment_residues'], tf.float32),
      0, 1)
  aatype_1hot = tf.one_hot(protein['aatype'], 21, axis=-1)

  target_feat = [
      tf.expand_dims(has_break, axis=-1),
      aatype_1hot,  # Everyone gets the original sequence.
  ]

  msa_1hot = tf.one_hot(protein['msa'], 23, axis=-1)
  has_deletion = tf.clip_by_value(protein['deletion_matrix'], 0., 1.)
  deletion_value = tf.atan(protein['deletion_matrix'] / 3.) * (2. / np.pi)

  msa_feat = [
      msa_1hot,
      tf.expand_dims(has_deletion, axis=-1),
      tf.expand_dims(deletion_value, axis=-1),
  ]

  if 'cluster_profile' in protein:
    deletion_mean_value = (
        tf.atan(protein['cluster_deletion_mean'] / 3.) * (2. / np.pi))
    msa_feat.extend([
        protein['cluster_profile'],
        tf.expand_dims(deletion_mean_value, axis=-1),
    ])

  if 'extra_deletion_matrix' in protein:
    protein['extra_has_deletion'] = tf.clip_by_value(
        protein['extra_deletion_matrix'], 0., 1.)
    protein['extra_deletion_value'] = tf.atan(
        protein['extra_deletion_matrix'] / 3.) * (2. / np.pi)

  protein['msa_feat'] = tf.concat(msa_feat, axis=-1)
  protein['target_feat'] = tf.concat(target_feat, axis=-1)
  return protein

protein = make_msa_feat()(protein)

print(protein['msa_feat'].shape)  # [num_seq, num_resi, 49]
print(protein['target_feat'].shape)  # [num_resi, 22]

你可能感兴趣的:(生物信息学,python)