# parse_genbank my $file = shift; my $seqin = Bio::SeqIO->new(-file => $file, -format => 'GenBank'); while(my $seq = $seqin->next_seq()){ foreach my $feat_object ($seq->get_SeqFeatures){ &decompose_feature($feat_object); } } # decompose_feature sub decompose_feature { my $feat_object = shift; # primary_tag = gene if($feat_object->primary_tag() eq 'gene'){ &start_another_gene($feat_object); $last_gene_feat = $feat_object; # primary_tag = CDS }elsif($feat_object->primary_tag() eq 'CDS'){ # make transcript if($last_gene_feat){ unless(&check_contain_in($last_gene_feat, $feat_object)){ throw( "\nCDS\n". "\tstart: ".$feat_object->start. "\n". "\tend: ".$feat_object->end. "\n". "is not contained in gene ". $last_gene_feat->start. "-". $last_gene_feat->end. "\n"); } } &create_transcript_and_exon($unique_id, 'protein_coding', 'KNOWN', $feat_object); } sub create_transcript_and_exon { my $unique_id = shift; my $type = shift; my $status = shift; my $feat_object = shift; $featurehash{$unique_id}{'type'} = $type; $featurehash{$unique_id}{'status'} = $status; # make transcript my $new_transcript = make_Transcript_object($t_stable_id, $type, $time); $featurehash{$unique_id}{'transcript'}{$t_stable_id} = $new_transcript; my $desc = get_desc($feat_object); $featurehash{$unique_id}{'description'} = $desc; # multi exons if($feat_object->location->isa('Bio::Location::SplitLocationI')){ for my $location ( $feat_object->location->sub_Location ) { my $exon_start = $location->start; my $exon_end = $location->end; my $exon_strand = $location->strand; # make exon my $exon = make_Exon_object($featurehash{$unique_id}{'slice'}, $exon_start, $exon_end, $exon_strand, $featurehash{$unique_id}{'transcript'}{$t_stable_id}, $e_stable_id, $time); $e_stable_id++; } # single exon }else{ # make exon my $exon = make_Exon_object($featurehash{$unique_id}{'slice'}, $feat_object->start, $feat_object->end, $feat_object->strand, $featurehash{$unique_id}{'transcript'}{$t_stable_id}, $e_stable_id, $time); $e_stable_id++; } } #store annotation #make gene my $gene = Bio::EnsEMBL::Gene->new( -START => $feature->{$key}->{'start'}, -END => $feature->{$key}->{'end'}, -STRAND => $feature->{$key}->{'strand'}, -SLICE => $feature->{$key}->{'slice'}, -ANALYSIS => create_analysis($LOGIC_NAME_EXON), -STABLE_ID => $feature->{$key}->{'stable_id'}, -VERSION => $ATTRIBUTE_VERSION, -TYPE => $feature->{$key}->{'type'}, -DESCRIPTION => $feature->{$key}->{'description'}, -STATUS => $feature->{$key}->{'status'}, -CANONICAL_TRANSCRIPT => $feature->{$key}->{'transcript'}->{$first_transcript}, -CREATED_DATE => $time, -MODIFIED_DATE => $time, -SOURCE => $feature->{$key}->{'source'}, ); # start to store gene object my $gene_DB_id = $db->get_GeneAdaptor->store($gene); # store gene xref $db->get_DBEntryAdaptor->store($feature->{$key}->{'DBEntry'}, $gene_DB_id, 'Gene'); $gene->display_xref($feature->{$key}->{'DBEntry'}); $db->get_GeneAdaptor->update($gene); # transcript xref for my $tran (@{$gene->get_all_Transcripts}) { my $DBEntry = Bio::EnsEMBL::DBEntry->new( -DBNAME => $SPE_PREFIX. '_TRANSCRIPT', -RELEASE => 1, -PRIMARY_ID => $feature->{$key}->{'ID'}, -DISPLAY_ID => $feature->{$key}->{'symbol'}, -DESCRIPTION=> $META->{'assembly.default'} ); $db->get_DBEntryAdaptor->store($DBEntry, $tran->dbID,'Transcript'); $tran->display_xref($DBEntry); $db->get_TranscriptAdaptor->update($tran); }