PDBj-DDBJ-KEGG: workflow_using_ddbj_kegg_pdbj.xml

File workflow_using_ddbj_kegg_pdbj.xml, 12.8 KB (added by yshigemo, 10 years ago)

Taverna workflow using DDBJ, KEGG and PDBj

Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<s:scufl xmlns:s="http://org.embl.ebi.escience/xscufl/0.1alpha" version="0.2" log="0">
3  <s:workflowdescription lsid="urn:lsid:net.sf.taverna:wfDefinition:e1e41621-d605-4abd-990e-217bf1c7dbe8" author="" title="hackathon" />
4  <s:processor name="PDB_strnavi">
5    <s:defaults>
6      <s:default name="base">http://rest.pdbj.org/strnavi?</s:default>
7    </s:defaults>
8    <s:local>org.embl.ebi.escience.scuflworkers.java.WebPageFetcher</s:local>
9  </s:processor>
10  <s:processor name="to_list">
11    <s:defaults>
12      <s:default name="regex">\n</s:default>
13    </s:defaults>
14    <s:local>org.embl.ebi.escience.scuflworkers.java.SplitByRegex</s:local>
15  </s:processor>
16  <s:processor name="Concatenate2">
17    <s:defaults>
18      <s:default name="string2">'</s:default>
19    </s:defaults>
20    <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
21  </s:processor>
22  <s:processor name="Concatenate">
23    <s:defaults>
24      <s:default name="string1">/ENTRY/DAD/primary-accession = '</s:default>
25    </s:defaults>
26    <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
27  </s:processor>
28  <s:processor name="Concatenate4">
29    <s:defaults>
30      <s:default name="string2">'</s:default>
31    </s:defaults>
32    <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
33  </s:processor>
34  <s:processor name="Concatenate3">
35    <s:defaults>
36      <s:default name="string1">/ENTRY/PDB/header/idcode = '</s:default>
37    </s:defaults>
38    <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
39  </s:processor>
40  <s:processor name="Concatenate5">
41    <s:defaults>
42      <s:default name="string1">pdb:</s:default>
43    </s:defaults>
44    <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
45  </s:processor>
46  <s:processor name="returnXML">
47    <s:local>
48      org.embl.ebi.escience.scuflworkers.java.XMLOutputSplitter
49      <s:extensions>
50        <s:arraytype optional="false" unbounded="false" wrapped="true" typename="ArrayOfLinkDBRelation" name="return" qname="{SOAP/KEGG}ArrayOfLinkDBRelation">
51          <s:elementtype>
52            <s:complextype optional="false" unbounded="false" typename="LinkDBRelation" name="" qname="{SOAP/KEGG}LinkDBRelation">
53              <s:elements>
54                <s:basetype optional="false" unbounded="false" typename="string" name="entry_id1" qname="{http://www.w3.org/2001/XMLSchema}string" />
55                <s:basetype optional="false" unbounded="false" typename="string" name="entry_id2" qname="{http://www.w3.org/2001/XMLSchema}string" />
56                <s:basetype optional="false" unbounded="false" typename="string" name="type" qname="{http://www.w3.org/2001/XMLSchema}string" />
57                <s:basetype optional="false" unbounded="false" typename="string" name="path" qname="{http://www.w3.org/2001/XMLSchema}string" />
58              </s:elements>
59            </s:complextype>
60          </s:elementtype>
61        </s:arraytype>
62      </s:extensions>
63    </s:local>
64  </s:processor>
65  <s:processor name="returnXML1">
66    <s:local>
67      org.embl.ebi.escience.scuflworkers.java.XMLOutputSplitter
68      <s:extensions>
69        <s:complextype optional="false" unbounded="false" typename="LinkDBRelation" name="" qname="{SOAP/KEGG}LinkDBRelation">
70          <s:elements>
71            <s:basetype optional="false" unbounded="false" typename="string" name="entry_id1" qname="{http://www.w3.org/2001/XMLSchema}string" />
72            <s:basetype optional="false" unbounded="false" typename="string" name="entry_id2" qname="{http://www.w3.org/2001/XMLSchema}string" />
73            <s:basetype optional="false" unbounded="false" typename="string" name="type" qname="{http://www.w3.org/2001/XMLSchema}string" />
74            <s:basetype optional="false" unbounded="false" typename="string" name="path" qname="{http://www.w3.org/2001/XMLSchema}string" />
75          </s:elements>
76        </s:complextype>
77      </s:extensions>
78    </s:local>
79  </s:processor>
80  <s:processor name="ARSA_parser2">
81    <s:beanshell>
82      <s:scriptvalue>String definition = "hypothetical protein";
83BufferedReader br = new BufferedReader(new StringReader(output));
84br.readLine();
85br.readLine();
86String l = br.readLine();
87if(l.indexOf("hypothetical") == -1 &amp;&amp; l.indexOf("unidentified") == -1) {
88        definition = l;
89        break;
90}</s:scriptvalue>
91      <s:beanshellinputlist>
92        <s:beanshellinput s:syntactictype="'text/plain'">output</s:beanshellinput>
93      </s:beanshellinputlist>
94      <s:beanshelloutputlist>
95        <s:beanshelloutput s:syntactictype="'text/plain'">definition</s:beanshelloutput>
96      </s:beanshelloutputlist>
97      <s:dependencies s:classloader="iteration" />
98    </s:beanshell>
99  </s:processor>
100  <s:processor name="BLAST_PDB_parser">
101    <s:beanshell>
102      <s:scriptvalue>String pdbid = "";
103String chain = "";
104String url = "";
105boolean hasHit = false;
106
107for(Iterator it = blastResult.iterator(); it.hasNext(); ) {
108        String l = (String)it.next();
109        BufferedReader br = new BufferedReader(new StringReader(l));
110        String ll;
111        while((ll = br.readLine()) != null) {
112                String[] lll = ll.split("\t");
113                if(lll[1].indexOf("|") == -1) {
114                        continue;
115                }
116                String s = lll[1].substring(0, lll[1].indexOf("|"));
117                pdbid = s.substring(0, 4);
118                chain = s.substring(5);
119                if(!pdbid.equals("")) {
120                        hasHit = true;
121                        break;
122                }
123        }
124        br.close();
125        if(hasHit) {
126                break;
127        }
128}
129url = "/strnavi?pdbid=" + pdbid + "&amp;chain=" + chain + "&amp;fmt=simple";</s:scriptvalue>
130      <s:beanshellinputlist>
131        <s:beanshellinput s:syntactictype="l('text/plain')">blastResult</s:beanshellinput>
132      </s:beanshellinputlist>
133      <s:beanshelloutputlist>
134        <s:beanshelloutput s:syntactictype="'text/plain'">url</s:beanshelloutput>
135      </s:beanshelloutputlist>
136      <s:dependencies s:classloader="iteration" />
137    </s:beanshell>
138  </s:processor>
139  <s:processor name="ARSA_parser">
140    <s:beanshell>
141      <s:scriptvalue>String definition = "hypothetical protein";
142for(Iterator it = definitionList.iterator(); it.hasNext();) {
143        String s = (String)it.next();
144        BufferedReader br = new BufferedReader(new StringReader(s));
145        br.readLine();
146        br.readLine();
147        String l = br.readLine();
148        if(l.indexOf("hypothetical") == -1 &amp;&amp; l.indexOf("unidentified") == -1) {
149                definition = l;
150                break;
151        }
152}</s:scriptvalue>
153      <s:beanshellinputlist>
154        <s:beanshellinput s:syntactictype="l('text/plain')">definitionList</s:beanshellinput>
155      </s:beanshellinputlist>
156      <s:beanshelloutputlist>
157        <s:beanshelloutput s:syntactictype="'text/plain'">definition</s:beanshelloutput>
158      </s:beanshelloutputlist>
159      <s:dependencies s:classloader="iteration" />
160    </s:beanshell>
161  </s:processor>
162  <s:processor name="BLAST_DAD_parser">
163    <s:beanshell>
164      <s:scriptvalue>List accession = new ArrayList();
165BufferedReader br = new BufferedReader(new StringReader(blastResult));
166String l;
167while((l = br.readLine()) != null) {
168        String[] ll = l.split("\t");
169        String ddbj = ll[1].substring(0, ll[1].indexOf("|"));
170        accession.add(ddbj);
171}</s:scriptvalue>
172      <s:beanshellinputlist>
173        <s:beanshellinput s:syntactictype="'text/plain'">blastResult</s:beanshellinput>
174      </s:beanshellinputlist>
175      <s:beanshelloutputlist>
176        <s:beanshelloutput s:syntactictype="l('text/plain')">accession</s:beanshelloutput>
177      </s:beanshelloutputlist>
178      <s:dependencies s:classloader="iteration" />
179    </s:beanshell>
180  </s:processor>
181  <s:processor name="Get_pdbid">
182    <s:beanshell>
183      <s:scriptvalue>String pdbid = "";
184for(Iterator it = pdbidlist.iterator(); it.hasNext();) {
185        String l = (String)it.next();
186        pdbid = l.substring(0, 4);
187        break;
188}</s:scriptvalue>
189      <s:beanshellinputlist>
190        <s:beanshellinput s:syntactictype="l('text/plain')">pdbidlist</s:beanshellinput>
191      </s:beanshellinputlist>
192      <s:beanshelloutputlist>
193        <s:beanshelloutput s:syntactictype="'text/plain'">pdbid</s:beanshelloutput>
194      </s:beanshelloutputlist>
195      <s:dependencies s:classloader="iteration" />
196    </s:beanshell>
197  </s:processor>
198  <s:processor name="BLAST_against_PDB">
199    <s:description>Execute Blast with parameter</s:description>
200    <s:defaults>
201      <s:default name="program">blastp</s:default>
202      <s:default name="database">PDB</s:default>
203      <s:default name="param">-e 1 -m 8</s:default>
204    </s:defaults>
205    <s:arbitrarywsdl>
206      <s:wsdl>http://xml.nig.ac.jp/wsdl/Blast.wsdl</s:wsdl>
207      <s:operation>searchParam</s:operation>
208    </s:arbitrarywsdl>
209  </s:processor>
210  <s:processor name="BLAST_against_DAD">
211    <s:description>Execute Blast with parameter</s:description>
212    <s:defaults>
213      <s:default name="program">blastp</s:default>
214      <s:default name="database">DAD</s:default>
215      <s:default name="param">-m 8 -e 0.00000000001 -v 10</s:default>
216    </s:defaults>
217    <s:arbitrarywsdl>
218      <s:wsdl>http://xml.nig.ac.jp/wsdl/Blast.wsdl</s:wsdl>
219      <s:operation>searchParam</s:operation>
220    </s:arbitrarywsdl>
221  </s:processor>
222  <s:processor name="Get_DAD_FASTA">
223    <s:description>Get DAD entry of FASTA Format by Accession Number</s:description>
224    <s:arbitrarywsdl>
225      <s:wsdl>http://xml.nig.ac.jp/wsdl/GetEntry.wsdl</s:wsdl>
226      <s:operation>getFASTA_DADEntry</s:operation>
227    </s:arbitrarywsdl>
228  </s:processor>
229  <s:processor name="btit">
230    <s:arbitrarywsdl>
231      <s:wsdl>http://soap.genome.jp/KEGG.wsdl</s:wsdl>
232      <s:operation>btit</s:operation>
233    </s:arbitrarywsdl>
234  </s:processor>
235  <s:processor name="Get_DAD_definition">
236    <s:defaults>
237      <s:default name="returnPath">/ENTRY/DAD/definition</s:default>
238      <s:default name="offset">1</s:default>
239      <s:default name="count">1</s:default>
240    </s:defaults>
241    <s:arbitrarywsdl>
242      <s:wsdl>http://xml.nig.ac.jp/wsdl/ARSA.wsdl</s:wsdl>
243      <s:operation>searchByXMLPath</s:operation>
244    </s:arbitrarywsdl>
245  </s:processor>
246  <s:processor name="Get_PDB_definition">
247    <s:defaults>
248      <s:default name="returnPath">/ENTRY/PDB/title</s:default>
249      <s:default name="offset">1</s:default>
250      <s:default name="count">1</s:default>
251    </s:defaults>
252    <s:arbitrarywsdl critical="true">
253      <s:wsdl>http://xml.nig.ac.jp/wsdl/ARSA.wsdl</s:wsdl>
254      <s:operation>searchByXMLPath</s:operation>
255    </s:arbitrarywsdl>
256  </s:processor>
257  <s:processor name="get_linkdb_by_entry">
258    <s:defaults>
259      <s:default name="db">genes</s:default>
260      <s:default name="offset">1</s:default>
261      <s:default name="limit">10</s:default>
262    </s:defaults>
263    <s:arbitrarywsdl>
264      <s:wsdl>http://soap.genome.jp/KEGG.wsdl</s:wsdl>
265      <s:operation>get_linkdb_by_entry</s:operation>
266    </s:arbitrarywsdl>
267  </s:processor>
268  <s:link source="query" sink="BLAST_against_DAD:query" />
269  <s:link source="BLAST_DAD_parser:accession" sink="Concatenate:string2" />
270  <s:link source="BLAST_DAD_parser:accession" sink="Get_DAD_FASTA:accession" />
271  <s:link source="BLAST_PDB_parser:url" sink="PDB_strnavi:url" />
272  <s:link source="BLAST_against_DAD:Result" sink="BLAST_DAD_parser:blastResult" />
273  <s:link source="BLAST_against_PDB:Result" sink="BLAST_PDB_parser:blastResult" />
274  <s:link source="Concatenate2:output" sink="Get_DAD_definition:queryPath" />
275  <s:link source="Concatenate3:output" sink="Concatenate4:string1" />
276  <s:link source="Concatenate4:output" sink="Get_PDB_definition:queryPath" />
277  <s:link source="Concatenate5:output" sink="get_linkdb_by_entry:entry_id" />
278  <s:link source="Concatenate:output" sink="Concatenate2:string1" />
279  <s:link source="Get_DAD_FASTA:Result" sink="BLAST_against_PDB:query" />
280  <s:link source="Get_DAD_definition:Result" sink="ARSA_parser:definitionList" />
281  <s:link source="Get_PDB_definition:Result" sink="ARSA_parser2:output" />
282  <s:link source="Get_pdbid:pdbid" sink="Concatenate3:string2" />
283  <s:link source="Get_pdbid:pdbid" sink="Concatenate5:string2" />
284  <s:link source="PDB_strnavi:contents" sink="to_list:string" />
285  <s:link source="get_linkdb_by_entry:return" sink="returnXML:input" />
286  <s:link source="returnXML1:entry_id2" sink="btit:string" />
287  <s:link source="returnXML:return" sink="returnXML1:input" />
288  <s:link source="to_list:split" sink="Get_pdbid:pdbidlist" />
289  <s:link source="ARSA_parser2:definition" sink="annotation_from_PDBj" />
290  <s:link source="ARSA_parser:definition" sink="annotation_from_DDBJ" />
291  <s:link source="btit:return" sink="annotation_from_KEGG" />
292  <s:source name="query" />
293  <s:sink name="annotation_from_DDBJ" />
294  <s:sink name="annotation_from_PDBj">
295    <s:metadata>
296      <s:mimeTypes>
297        <s:mimeType>text/html</s:mimeType>
298      </s:mimeTypes>
299    </s:metadata>
300  </s:sink>
301  <s:sink name="annotation_from_KEGG">
302    <s:metadata>
303      <s:mimeTypes>
304        <s:mimeType>text/xml</s:mimeType>
305      </s:mimeTypes>
306    </s:metadata>
307  </s:sink>
308</s:scufl>
309