Context Navigation

Back to OpenBio*

OpenBio*: Main.java

File Main.java, 4.7 KB (added by markjschreiber, 17 years ago)
Roundtrip program for biojava

Line
1	package roundtrip;
2
3	import java.io.BufferedReader;
4	import java.io.FileReader;
5	import java.io.IOException;
6	import org.biojava.bio.seq.io.ParseException;
7	import org.biojava.bio.seq.io.SymbolTokenization;
8	import org.biojava.bio.symbol.AlphabetManager;
9	import org.biojava.bio.symbol.FiniteAlphabet;
10	import org.biojavax.Namespace;
11	import org.biojavax.bio.seq.RichSequence;
12	import org.biojavax.bio.seq.io.EMBLFormat;
13	import org.biojavax.bio.seq.io.EMBLxmlFormat;
14	import org.biojavax.bio.seq.io.FastaFormat;
15	import org.biojavax.bio.seq.io.GenbankFormat;
16	import org.biojavax.bio.seq.io.INSDseqFormat;
17	import org.biojavax.bio.seq.io.RichSequenceBuilderFactory;
18	import org.biojavax.bio.seq.io.RichSequenceFormat;
19	import org.biojavax.bio.seq.io.RichStreamReader;
20	import org.biojavax.bio.seq.io.RichStreamWriter;
21	import org.biojavax.bio.seq.io.UniProtFormat;
22	import org.biojavax.bio.seq.io.UniProtXMLFormat;
23	import org.biojavax.bio.taxa.NCBITaxon;
24	import org.biojavax.bio.taxa.io.NCBITaxonomyLoader;
25	import org.biojavax.bio.taxa.io.SimpleNCBITaxonomyLoader;
26
27	/**
28	* This program will round trip sequence formats
29	* @author Mark
30	*/
31	public class Main {
32
33	/**
34	* Attempts to find a format for a name String such as "genbank" or for a
35	* fully qualified string like org.biojavax.bio.seq.io.UniProtFormat
36	* @return the matching <code>RichSequenceFormat</code>
37	* @param name the name of the format, case insensitive except for qualified class names
38	* @throws java.lang.IllegalAccessException If java cannot reflectively access the named format.
39	* Only applies to fully qualified class names.
40	* @throws java.lang.ClassNotFoundException If a format can not be found for the name.
41	* @throws java.lang.InstantiationException If the found object cannot be created (only applies
42	* to fully qualified class names).
43	*/
44	public static RichSequenceFormat formatForName(String name)
45	throws ClassNotFoundException, InstantiationException, IllegalAccessException {
46	//determine the format to use
47	RichSequenceFormat format;
48	if (name.equalsIgnoreCase("fasta")) {
49	format = new FastaFormat();
50	} else if (name.equalsIgnoreCase("genbank")) {
51	format = new GenbankFormat();
52	} else if (name.equalsIgnoreCase("uniprot")) {
53	format = new UniProtFormat();
54	} else if (name.equalsIgnoreCase("embl")) {
55	format = new EMBLFormat();
56	} else if (name.equalsIgnoreCase("INSDseq")) {
57	format = new INSDseqFormat();
58	} else if (name.equalsIgnoreCase("EMBLxml")) {
59	format = new EMBLxmlFormat();
60	} else if (name.equalsIgnoreCase("UniprotXML")){
61	format = new UniProtXMLFormat();
62	} else {
63	Class formatClass = Class.forName(name);
64	format = (RichSequenceFormat) formatClass.newInstance();
65	}
66	return format;
67	}
68
69	public static void loadNCBITaxon() throws IOException, ParseException{
70	NCBITaxonomyLoader l = new SimpleNCBITaxonomyLoader();
71	BufferedReader nodes = new BufferedReader(new FileReader("nodes.dmp"));
72	BufferedReader names = new BufferedReader(new FileReader("names.dmp"));
73
74	NCBITaxon t;
75	while ((t = l.readNode(nodes)) != null) {} // read all the nodes first
76	while ((t = l.readName(names)) != null) {} // then read all the names
77	}
78
79	/**
80	* @param args the command line arguments
81	* args[0] the input file name
82	* args[1] the input format name or fully qualified classname (eg fasta, or
83	* org.biojavax.bio.seq.io.FastaFormat)
84	* args[2] the ouput format name (see above)
85	* args[3] the alphabet (commonly DNA or Protein)
86	* args[4] the namespace (something like gb)
87	*/
88	public static void main(String[] args) throws Exception {
89	BufferedReader br = new BufferedReader(new FileReader(args[0]));
90	RichSequenceFormat inFormat = formatForName(args[1]);
91	RichSequenceFormat outFormat = formatForName(args[2]);
92	FiniteAlphabet alpha = (FiniteAlphabet) AlphabetManager.alphabetForName(args[3]);
93	Namespace ns = null;
94	SymbolTokenization toke = alpha.getTokenization("default");
95
96	if(! (inFormat.getClass().equals(formatForName("fasta").getClass())
97	\|\| outFormat.getClass().equals(formatForName("fasta").getClass()))){
98	System.out.println("Loading NCBI taxonomy");
99	loadNCBITaxon();
100	}
101
102	RichStreamReader sr = new RichStreamReader(
103	br, inFormat, toke,
104	RichSequenceBuilderFactory.THRESHOLD,
105	null);
106
107	RichStreamWriter sw = new RichStreamWriter(System.out, outFormat);
108	sw.writeStream(sr, ns);
109	}
110	}

Download in other formats:

Original Format