1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <s:scufl xmlns:s="http://org.embl.ebi.escience/xscufl/0.1alpha" version="0.2" log="0">
3 | <s:workflowdescription lsid="urn:lsid:net.sf.taverna:wfDefinition:e1e41621-d605-4abd-990e-217bf1c7dbe8" author="" title="hackathon" />
4 | <s:processor name="PDB_strnavi">
5 | <s:defaults>
6 | <s:default name="base">http://rest.pdbj.org/strnavi?</s:default>
7 | </s:defaults>
8 | <s:local>org.embl.ebi.escience.scuflworkers.java.WebPageFetcher</s:local>
9 | </s:processor>
10 | <s:processor name="to_list">
11 | <s:defaults>
12 | <s:default name="regex">\n</s:default>
13 | </s:defaults>
14 | <s:local>org.embl.ebi.escience.scuflworkers.java.SplitByRegex</s:local>
15 | </s:processor>
16 | <s:processor name="Concatenate2">
17 | <s:defaults>
18 | <s:default name="string2">'</s:default>
19 | </s:defaults>
20 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
21 | </s:processor>
22 | <s:processor name="Concatenate">
23 | <s:defaults>
24 | <s:default name="string1">/ENTRY/DAD/primary-accession = '</s:default>
25 | </s:defaults>
26 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
27 | </s:processor>
28 | <s:processor name="Concatenate4">
29 | <s:defaults>
30 | <s:default name="string2">'</s:default>
31 | </s:defaults>
32 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
33 | </s:processor>
34 | <s:processor name="Concatenate3">
35 | <s:defaults>
36 | <s:default name="string1">/ENTRY/PDB/header/idcode = '</s:default>
37 | </s:defaults>
38 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
39 | </s:processor>
40 | <s:processor name="Concatenate5">
41 | <s:defaults>
42 | <s:default name="string1">pdb:</s:default>
43 | </s:defaults>
44 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
45 | </s:processor>
46 | <s:processor name="returnXML">
47 | <s:local>
48 | org.embl.ebi.escience.scuflworkers.java.XMLOutputSplitter
49 | <s:extensions>
50 | <s:arraytype optional="false" unbounded="false" wrapped="true" typename="ArrayOfLinkDBRelation" name="return" qname="{SOAP/KEGG}ArrayOfLinkDBRelation">
51 | <s:elementtype>
52 | <s:complextype optional="false" unbounded="false" typename="LinkDBRelation" name="" qname="{SOAP/KEGG}LinkDBRelation">
53 | <s:elements>
54 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id1" qname="{http://www.w3.org/2001/XMLSchema}string" />
55 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id2" qname="{http://www.w3.org/2001/XMLSchema}string" />
56 | <s:basetype optional="false" unbounded="false" typename="string" name="type" qname="{http://www.w3.org/2001/XMLSchema}string" />
57 | <s:basetype optional="false" unbounded="false" typename="string" name="path" qname="{http://www.w3.org/2001/XMLSchema}string" />
58 | </s:elements>
59 | </s:complextype>
60 | </s:elementtype>
61 | </s:arraytype>
62 | </s:extensions>
63 | </s:local>
64 | </s:processor>
65 | <s:processor name="returnXML1">
66 | <s:local>
67 | org.embl.ebi.escience.scuflworkers.java.XMLOutputSplitter
68 | <s:extensions>
69 | <s:complextype optional="false" unbounded="false" typename="LinkDBRelation" name="" qname="{SOAP/KEGG}LinkDBRelation">
70 | <s:elements>
71 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id1" qname="{http://www.w3.org/2001/XMLSchema}string" />
72 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id2" qname="{http://www.w3.org/2001/XMLSchema}string" />
73 | <s:basetype optional="false" unbounded="false" typename="string" name="type" qname="{http://www.w3.org/2001/XMLSchema}string" />
74 | <s:basetype optional="false" unbounded="false" typename="string" name="path" qname="{http://www.w3.org/2001/XMLSchema}string" />
75 | </s:elements>
76 | </s:complextype>
77 | </s:extensions>
78 | </s:local>
79 | </s:processor>
80 | <s:processor name="ARSA_parser2">
81 | <s:beanshell>
82 | <s:scriptvalue>String definition = "hypothetical protein";
83 | BufferedReader br = new BufferedReader(new StringReader(output));
84 | br.readLine();
85 | br.readLine();
86 | String l = br.readLine();
87 | if(l.indexOf("hypothetical") == -1 && l.indexOf("unidentified") == -1) {
88 | definition = l;
89 | break;
90 | }</s:scriptvalue>
91 | <s:beanshellinputlist>
92 | <s:beanshellinput s:syntactictype="'text/plain'">output</s:beanshellinput>
93 | </s:beanshellinputlist>
94 | <s:beanshelloutputlist>
95 | <s:beanshelloutput s:syntactictype="'text/plain'">definition</s:beanshelloutput>
96 | </s:beanshelloutputlist>
97 | <s:dependencies s:classloader="iteration" />
98 | </s:beanshell>
99 | </s:processor>
100 | <s:processor name="BLAST_PDB_parser">
101 | <s:beanshell>
102 | <s:scriptvalue>String pdbid = "";
103 | String chain = "";
104 | String url = "";
105 | boolean hasHit = false;
106 |
107 | for(Iterator it = blastResult.iterator(); it.hasNext(); ) {
108 | String l = (String)it.next();
109 | BufferedReader br = new BufferedReader(new StringReader(l));
110 | String ll;
111 | while((ll = br.readLine()) != null) {
112 | String[] lll = ll.split("\t");
113 | if(lll[1].indexOf("|") == -1) {
114 | continue;
115 | }
116 | String s = lll[1].substring(0, lll[1].indexOf("|"));
117 | pdbid = s.substring(0, 4);
118 | chain = s.substring(5);
119 | if(!pdbid.equals("")) {
120 | hasHit = true;
121 | break;
122 | }
123 | }
124 | br.close();
125 | if(hasHit) {
126 | break;
127 | }
128 | }
129 | url = "/strnavi?pdbid=" + pdbid + "&chain=" + chain + "&fmt=simple";</s:scriptvalue>
130 | <s:beanshellinputlist>
131 | <s:beanshellinput s:syntactictype="l('text/plain')">blastResult</s:beanshellinput>
132 | </s:beanshellinputlist>
133 | <s:beanshelloutputlist>
134 | <s:beanshelloutput s:syntactictype="'text/plain'">url</s:beanshelloutput>
135 | </s:beanshelloutputlist>
136 | <s:dependencies s:classloader="iteration" />
137 | </s:beanshell>
138 | </s:processor>
139 | <s:processor name="ARSA_parser">
140 | <s:beanshell>
141 | <s:scriptvalue>String definition = "hypothetical protein";
142 | for(Iterator it = definitionList.iterator(); it.hasNext();) {
143 | String s = (String)it.next();
144 | BufferedReader br = new BufferedReader(new StringReader(s));
145 | br.readLine();
146 | br.readLine();
147 | String l = br.readLine();
148 | if(l.indexOf("hypothetical") == -1 && l.indexOf("unidentified") == -1) {
149 | definition = l;
150 | break;
151 | }
152 | }</s:scriptvalue>
153 | <s:beanshellinputlist>
154 | <s:beanshellinput s:syntactictype="l('text/plain')">definitionList</s:beanshellinput>
155 | </s:beanshellinputlist>
156 | <s:beanshelloutputlist>
157 | <s:beanshelloutput s:syntactictype="'text/plain'">definition</s:beanshelloutput>
158 | </s:beanshelloutputlist>
159 | <s:dependencies s:classloader="iteration" />
160 | </s:beanshell>
161 | </s:processor>
162 | <s:processor name="BLAST_DAD_parser">
163 | <s:beanshell>
164 | <s:scriptvalue>List accession = new ArrayList();
165 | BufferedReader br = new BufferedReader(new StringReader(blastResult));
166 | String l;
167 | while((l = br.readLine()) != null) {
168 | String[] ll = l.split("\t");
169 | String ddbj = ll[1].substring(0, ll[1].indexOf("|"));
170 | accession.add(ddbj);
171 | }</s:scriptvalue>
172 | <s:beanshellinputlist>
173 | <s:beanshellinput s:syntactictype="'text/plain'">blastResult</s:beanshellinput>
174 | </s:beanshellinputlist>
175 | <s:beanshelloutputlist>
176 | <s:beanshelloutput s:syntactictype="l('text/plain')">accession</s:beanshelloutput>
177 | </s:beanshelloutputlist>
178 | <s:dependencies s:classloader="iteration" />
179 | </s:beanshell>
180 | </s:processor>
181 | <s:processor name="Get_pdbid">
182 | <s:beanshell>
183 | <s:scriptvalue>String pdbid = "";
184 | for(Iterator it = pdbidlist.iterator(); it.hasNext();) {
185 | String l = (String)it.next();
186 | pdbid = l.substring(0, 4);
187 | break;
188 | }</s:scriptvalue>
189 | <s:beanshellinputlist>
190 | <s:beanshellinput s:syntactictype="l('text/plain')">pdbidlist</s:beanshellinput>
191 | </s:beanshellinputlist>
192 | <s:beanshelloutputlist>
193 | <s:beanshelloutput s:syntactictype="'text/plain'">pdbid</s:beanshelloutput>
194 | </s:beanshelloutputlist>
195 | <s:dependencies s:classloader="iteration" />
196 | </s:beanshell>
197 | </s:processor>
198 | <s:processor name="BLAST_against_PDB">
199 | <s:description>Execute Blast with parameter</s:description>
200 | <s:defaults>
201 | <s:default name="program">blastp</s:default>
202 | <s:default name="database">PDB</s:default>
203 | <s:default name="param">-e 1 -m 8</s:default>
204 | </s:defaults>
205 | <s:arbitrarywsdl>
206 | <s:wsdl>http://xml.nig.ac.jp/wsdl/Blast.wsdl</s:wsdl>
207 | <s:operation>searchParam</s:operation>
208 | </s:arbitrarywsdl>
209 | </s:processor>
210 | <s:processor name="BLAST_against_DAD">
211 | <s:description>Execute Blast with parameter</s:description>
212 | <s:defaults>
213 | <s:default name="program">blastp</s:default>
214 | <s:default name="database">DAD</s:default>
215 | <s:default name="param">-m 8 -e 0.00000000001 -v 10</s:default>
216 | </s:defaults>
217 | <s:arbitrarywsdl>
218 | <s:wsdl>http://xml.nig.ac.jp/wsdl/Blast.wsdl</s:wsdl>
219 | <s:operation>searchParam</s:operation>
220 | </s:arbitrarywsdl>
221 | </s:processor>
222 | <s:processor name="Get_DAD_FASTA">
223 | <s:description>Get DAD entry of FASTA Format by Accession Number</s:description>
224 | <s:arbitrarywsdl>
225 | <s:wsdl>http://xml.nig.ac.jp/wsdl/GetEntry.wsdl</s:wsdl>
226 | <s:operation>getFASTA_DADEntry</s:operation>
227 | </s:arbitrarywsdl>
228 | </s:processor>
229 | <s:processor name="btit">
230 | <s:arbitrarywsdl>
231 | <s:wsdl>http://soap.genome.jp/KEGG.wsdl</s:wsdl>
232 | <s:operation>btit</s:operation>
233 | </s:arbitrarywsdl>
234 | </s:processor>
235 | <s:processor name="Get_DAD_definition">
236 | <s:defaults>
237 | <s:default name="returnPath">/ENTRY/DAD/definition</s:default>
238 | <s:default name="offset">1</s:default>
239 | <s:default name="count">1</s:default>
240 | </s:defaults>
241 | <s:arbitrarywsdl>
242 | <s:wsdl>http://xml.nig.ac.jp/wsdl/ARSA.wsdl</s:wsdl>
243 | <s:operation>searchByXMLPath</s:operation>
244 | </s:arbitrarywsdl>
245 | </s:processor>
246 | <s:processor name="Get_PDB_definition">
247 | <s:defaults>
248 | <s:default name="returnPath">/ENTRY/PDB/title</s:default>
249 | <s:default name="offset">1</s:default>
250 | <s:default name="count">1</s:default>
251 | </s:defaults>
252 | <s:arbitrarywsdl critical="true">
253 | <s:wsdl>http://xml.nig.ac.jp/wsdl/ARSA.wsdl</s:wsdl>
254 | <s:operation>searchByXMLPath</s:operation>
255 | </s:arbitrarywsdl>
256 | </s:processor>
257 | <s:processor name="get_linkdb_by_entry">
258 | <s:defaults>
259 | <s:default name="db">genes</s:default>
260 | <s:default name="offset">1</s:default>
261 | <s:default name="limit">10</s:default>
262 | </s:defaults>
263 | <s:arbitrarywsdl>
264 | <s:wsdl>http://soap.genome.jp/KEGG.wsdl</s:wsdl>
265 | <s:operation>get_linkdb_by_entry</s:operation>
266 | </s:arbitrarywsdl>
267 | </s:processor>
268 | <s:link source="query" sink="BLAST_against_DAD:query" />
269 | <s:link source="BLAST_DAD_parser:accession" sink="Concatenate:string2" />
270 | <s:link source="BLAST_DAD_parser:accession" sink="Get_DAD_FASTA:accession" />
271 | <s:link source="BLAST_PDB_parser:url" sink="PDB_strnavi:url" />
272 | <s:link source="BLAST_against_DAD:Result" sink="BLAST_DAD_parser:blastResult" />
273 | <s:link source="BLAST_against_PDB:Result" sink="BLAST_PDB_parser:blastResult" />
274 | <s:link source="Concatenate2:output" sink="Get_DAD_definition:queryPath" />
275 | <s:link source="Concatenate3:output" sink="Concatenate4:string1" />
276 | <s:link source="Concatenate4:output" sink="Get_PDB_definition:queryPath" />
277 | <s:link source="Concatenate5:output" sink="get_linkdb_by_entry:entry_id" />
278 | <s:link source="Concatenate:output" sink="Concatenate2:string1" />
279 | <s:link source="Get_DAD_FASTA:Result" sink="BLAST_against_PDB:query" />
280 | <s:link source="Get_DAD_definition:Result" sink="ARSA_parser:definitionList" />
281 | <s:link source="Get_PDB_definition:Result" sink="ARSA_parser2:output" />
282 | <s:link source="Get_pdbid:pdbid" sink="Concatenate3:string2" />
283 | <s:link source="Get_pdbid:pdbid" sink="Concatenate5:string2" />
284 | <s:link source="PDB_strnavi:contents" sink="to_list:string" />
285 | <s:link source="get_linkdb_by_entry:return" sink="returnXML:input" />
286 | <s:link source="returnXML1:entry_id2" sink="btit:string" />
287 | <s:link source="returnXML:return" sink="returnXML1:input" />
288 | <s:link source="to_list:split" sink="Get_pdbid:pdbidlist" />
289 | <s:link source="ARSA_parser2:definition" sink="annotation_from_PDBj" />
290 | <s:link source="ARSA_parser:definition" sink="annotation_from_DDBJ" />
291 | <s:link source="btit:return" sink="annotation_from_KEGG" />
292 | <s:source name="query" />
293 | <s:sink name="annotation_from_DDBJ" />
294 | <s:sink name="annotation_from_PDBj">
295 | <s:metadata>
296 | <s:mimeTypes>
297 | <s:mimeType>text/html</s:mimeType>
298 | </s:mimeTypes>
299 | </s:metadata>
300 | </s:sink>
301 | <s:sink name="annotation_from_KEGG">
302 | <s:metadata>
303 | <s:mimeTypes>
304 | <s:mimeType>text/xml</s:mimeType>
305 | </s:mimeTypes>
306 | </s:metadata>
307 | </s:sink>
308 | </s:scufl>
309 |