Source code for dataset_creator.tnt

from .utils import get_seq
from .base_dataset import DatasetBlock


[docs]class TntDatasetBlock(DatasetBlock):
[docs] def dataset_block(self): self.split_data() out = [] for block in self._blocks: if self.outgroup is not None: block = self.put_outgroup_at_start_of_block(block) out.append(self.convert_to_string(block)) return '\n'.join(out).strip() + '\n;\nproc/;'
[docs] def put_outgroup_at_start_of_block(self, block): other_sequences = [] for seq_record in block: if seq_record.voucher_code == self.outgroup: outgroup_sequence = seq_record else: other_sequences.append(seq_record) return [outgroup_sequence] + other_sequences
[docs] def convert_to_string(self, block): """ Takes a list of SeqRecordExpanded objects corresponding to a gene_code and produces the gene_block as string. :param block: :return: str. """ if self.aminoacids: molecule_type = "protein" else: molecule_type = "dna" out = None for seq_record in block: if not out: out = '&[{0}]\n'.format(molecule_type, seq_record.gene_code) taxon_id = '{0}_{1}_{2}'.format(seq_record.voucher_code, seq_record.taxonomy['genus'], seq_record.taxonomy['species'], ) sequence = get_seq(seq_record, self.codon_positions, self.aminoacids, self.degenerate) seq = sequence.seq if sequence.warning: self.warnings.append(sequence.warning) out += '{0}{1}\n'.format(taxon_id.ljust(55), seq) return out