| 1 | <?xml version="1.0" encoding="UTF-8"?>
|
|---|
| 2 | <s:scufl xmlns:s="http://org.embl.ebi.escience/xscufl/0.1alpha" version="0.2" log="0">
|
|---|
| 3 | <s:workflowdescription lsid="urn:lsid:net.sf.taverna:wfDefinition:e1e41621-d605-4abd-990e-217bf1c7dbe8" author="" title="hackathon" />
|
|---|
| 4 | <s:processor name="PDB_strnavi">
|
|---|
| 5 | <s:defaults>
|
|---|
| 6 | <s:default name="base">http://rest.pdbj.org/strnavi?</s:default>
|
|---|
| 7 | </s:defaults>
|
|---|
| 8 | <s:local>org.embl.ebi.escience.scuflworkers.java.WebPageFetcher</s:local>
|
|---|
| 9 | </s:processor>
|
|---|
| 10 | <s:processor name="to_list">
|
|---|
| 11 | <s:defaults>
|
|---|
| 12 | <s:default name="regex">\n</s:default>
|
|---|
| 13 | </s:defaults>
|
|---|
| 14 | <s:local>org.embl.ebi.escience.scuflworkers.java.SplitByRegex</s:local>
|
|---|
| 15 | </s:processor>
|
|---|
| 16 | <s:processor name="Concatenate2">
|
|---|
| 17 | <s:defaults>
|
|---|
| 18 | <s:default name="string2">'</s:default>
|
|---|
| 19 | </s:defaults>
|
|---|
| 20 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
|
|---|
| 21 | </s:processor>
|
|---|
| 22 | <s:processor name="Concatenate">
|
|---|
| 23 | <s:defaults>
|
|---|
| 24 | <s:default name="string1">/ENTRY/DAD/primary-accession = '</s:default>
|
|---|
| 25 | </s:defaults>
|
|---|
| 26 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
|
|---|
| 27 | </s:processor>
|
|---|
| 28 | <s:processor name="Concatenate4">
|
|---|
| 29 | <s:defaults>
|
|---|
| 30 | <s:default name="string2">'</s:default>
|
|---|
| 31 | </s:defaults>
|
|---|
| 32 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
|
|---|
| 33 | </s:processor>
|
|---|
| 34 | <s:processor name="Concatenate3">
|
|---|
| 35 | <s:defaults>
|
|---|
| 36 | <s:default name="string1">/ENTRY/PDB/header/idcode = '</s:default>
|
|---|
| 37 | </s:defaults>
|
|---|
| 38 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
|
|---|
| 39 | </s:processor>
|
|---|
| 40 | <s:processor name="Concatenate5">
|
|---|
| 41 | <s:defaults>
|
|---|
| 42 | <s:default name="string1">pdb:</s:default>
|
|---|
| 43 | </s:defaults>
|
|---|
| 44 | <s:local>org.embl.ebi.escience.scuflworkers.java.StringConcat</s:local>
|
|---|
| 45 | </s:processor>
|
|---|
| 46 | <s:processor name="returnXML">
|
|---|
| 47 | <s:local>
|
|---|
| 48 | org.embl.ebi.escience.scuflworkers.java.XMLOutputSplitter
|
|---|
| 49 | <s:extensions>
|
|---|
| 50 | <s:arraytype optional="false" unbounded="false" wrapped="true" typename="ArrayOfLinkDBRelation" name="return" qname="{SOAP/KEGG}ArrayOfLinkDBRelation">
|
|---|
| 51 | <s:elementtype>
|
|---|
| 52 | <s:complextype optional="false" unbounded="false" typename="LinkDBRelation" name="" qname="{SOAP/KEGG}LinkDBRelation">
|
|---|
| 53 | <s:elements>
|
|---|
| 54 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id1" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 55 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id2" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 56 | <s:basetype optional="false" unbounded="false" typename="string" name="type" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 57 | <s:basetype optional="false" unbounded="false" typename="string" name="path" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 58 | </s:elements>
|
|---|
| 59 | </s:complextype>
|
|---|
| 60 | </s:elementtype>
|
|---|
| 61 | </s:arraytype>
|
|---|
| 62 | </s:extensions>
|
|---|
| 63 | </s:local>
|
|---|
| 64 | </s:processor>
|
|---|
| 65 | <s:processor name="returnXML1">
|
|---|
| 66 | <s:local>
|
|---|
| 67 | org.embl.ebi.escience.scuflworkers.java.XMLOutputSplitter
|
|---|
| 68 | <s:extensions>
|
|---|
| 69 | <s:complextype optional="false" unbounded="false" typename="LinkDBRelation" name="" qname="{SOAP/KEGG}LinkDBRelation">
|
|---|
| 70 | <s:elements>
|
|---|
| 71 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id1" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 72 | <s:basetype optional="false" unbounded="false" typename="string" name="entry_id2" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 73 | <s:basetype optional="false" unbounded="false" typename="string" name="type" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 74 | <s:basetype optional="false" unbounded="false" typename="string" name="path" qname="{http://www.w3.org/2001/XMLSchema}string" />
|
|---|
| 75 | </s:elements>
|
|---|
| 76 | </s:complextype>
|
|---|
| 77 | </s:extensions>
|
|---|
| 78 | </s:local>
|
|---|
| 79 | </s:processor>
|
|---|
| 80 | <s:processor name="ARSA_parser2">
|
|---|
| 81 | <s:beanshell>
|
|---|
| 82 | <s:scriptvalue>String definition = "hypothetical protein";
|
|---|
| 83 | BufferedReader br = new BufferedReader(new StringReader(output));
|
|---|
| 84 | br.readLine();
|
|---|
| 85 | br.readLine();
|
|---|
| 86 | String l = br.readLine();
|
|---|
| 87 | if(l.indexOf("hypothetical") == -1 && l.indexOf("unidentified") == -1) {
|
|---|
| 88 | definition = l;
|
|---|
| 89 | break;
|
|---|
| 90 | }</s:scriptvalue>
|
|---|
| 91 | <s:beanshellinputlist>
|
|---|
| 92 | <s:beanshellinput s:syntactictype="'text/plain'">output</s:beanshellinput>
|
|---|
| 93 | </s:beanshellinputlist>
|
|---|
| 94 | <s:beanshelloutputlist>
|
|---|
| 95 | <s:beanshelloutput s:syntactictype="'text/plain'">definition</s:beanshelloutput>
|
|---|
| 96 | </s:beanshelloutputlist>
|
|---|
| 97 | <s:dependencies s:classloader="iteration" />
|
|---|
| 98 | </s:beanshell>
|
|---|
| 99 | </s:processor>
|
|---|
| 100 | <s:processor name="BLAST_PDB_parser">
|
|---|
| 101 | <s:beanshell>
|
|---|
| 102 | <s:scriptvalue>String pdbid = "";
|
|---|
| 103 | String chain = "";
|
|---|
| 104 | String url = "";
|
|---|
| 105 | boolean hasHit = false;
|
|---|
| 106 |
|
|---|
| 107 | for(Iterator it = blastResult.iterator(); it.hasNext(); ) {
|
|---|
| 108 | String l = (String)it.next();
|
|---|
| 109 | BufferedReader br = new BufferedReader(new StringReader(l));
|
|---|
| 110 | String ll;
|
|---|
| 111 | while((ll = br.readLine()) != null) {
|
|---|
| 112 | String[] lll = ll.split("\t");
|
|---|
| 113 | if(lll[1].indexOf("|") == -1) {
|
|---|
| 114 | continue;
|
|---|
| 115 | }
|
|---|
| 116 | String s = lll[1].substring(0, lll[1].indexOf("|"));
|
|---|
| 117 | pdbid = s.substring(0, 4);
|
|---|
| 118 | chain = s.substring(5);
|
|---|
| 119 | if(!pdbid.equals("")) {
|
|---|
| 120 | hasHit = true;
|
|---|
| 121 | break;
|
|---|
| 122 | }
|
|---|
| 123 | }
|
|---|
| 124 | br.close();
|
|---|
| 125 | if(hasHit) {
|
|---|
| 126 | break;
|
|---|
| 127 | }
|
|---|
| 128 | }
|
|---|
| 129 | url = "/strnavi?pdbid=" + pdbid + "&chain=" + chain + "&fmt=simple";</s:scriptvalue>
|
|---|
| 130 | <s:beanshellinputlist>
|
|---|
| 131 | <s:beanshellinput s:syntactictype="l('text/plain')">blastResult</s:beanshellinput>
|
|---|
| 132 | </s:beanshellinputlist>
|
|---|
| 133 | <s:beanshelloutputlist>
|
|---|
| 134 | <s:beanshelloutput s:syntactictype="'text/plain'">url</s:beanshelloutput>
|
|---|
| 135 | </s:beanshelloutputlist>
|
|---|
| 136 | <s:dependencies s:classloader="iteration" />
|
|---|
| 137 | </s:beanshell>
|
|---|
| 138 | </s:processor>
|
|---|
| 139 | <s:processor name="ARSA_parser">
|
|---|
| 140 | <s:beanshell>
|
|---|
| 141 | <s:scriptvalue>String definition = "hypothetical protein";
|
|---|
| 142 | for(Iterator it = definitionList.iterator(); it.hasNext();) {
|
|---|
| 143 | String s = (String)it.next();
|
|---|
| 144 | BufferedReader br = new BufferedReader(new StringReader(s));
|
|---|
| 145 | br.readLine();
|
|---|
| 146 | br.readLine();
|
|---|
| 147 | String l = br.readLine();
|
|---|
| 148 | if(l.indexOf("hypothetical") == -1 && l.indexOf("unidentified") == -1) {
|
|---|
| 149 | definition = l;
|
|---|
| 150 | break;
|
|---|
| 151 | }
|
|---|
| 152 | }</s:scriptvalue>
|
|---|
| 153 | <s:beanshellinputlist>
|
|---|
| 154 | <s:beanshellinput s:syntactictype="l('text/plain')">definitionList</s:beanshellinput>
|
|---|
| 155 | </s:beanshellinputlist>
|
|---|
| 156 | <s:beanshelloutputlist>
|
|---|
| 157 | <s:beanshelloutput s:syntactictype="'text/plain'">definition</s:beanshelloutput>
|
|---|
| 158 | </s:beanshelloutputlist>
|
|---|
| 159 | <s:dependencies s:classloader="iteration" />
|
|---|
| 160 | </s:beanshell>
|
|---|
| 161 | </s:processor>
|
|---|
| 162 | <s:processor name="BLAST_DAD_parser">
|
|---|
| 163 | <s:beanshell>
|
|---|
| 164 | <s:scriptvalue>List accession = new ArrayList();
|
|---|
| 165 | BufferedReader br = new BufferedReader(new StringReader(blastResult));
|
|---|
| 166 | String l;
|
|---|
| 167 | while((l = br.readLine()) != null) {
|
|---|
| 168 | String[] ll = l.split("\t");
|
|---|
| 169 | String ddbj = ll[1].substring(0, ll[1].indexOf("|"));
|
|---|
| 170 | accession.add(ddbj);
|
|---|
| 171 | }</s:scriptvalue>
|
|---|
| 172 | <s:beanshellinputlist>
|
|---|
| 173 | <s:beanshellinput s:syntactictype="'text/plain'">blastResult</s:beanshellinput>
|
|---|
| 174 | </s:beanshellinputlist>
|
|---|
| 175 | <s:beanshelloutputlist>
|
|---|
| 176 | <s:beanshelloutput s:syntactictype="l('text/plain')">accession</s:beanshelloutput>
|
|---|
| 177 | </s:beanshelloutputlist>
|
|---|
| 178 | <s:dependencies s:classloader="iteration" />
|
|---|
| 179 | </s:beanshell>
|
|---|
| 180 | </s:processor>
|
|---|
| 181 | <s:processor name="Get_pdbid">
|
|---|
| 182 | <s:beanshell>
|
|---|
| 183 | <s:scriptvalue>String pdbid = "";
|
|---|
| 184 | for(Iterator it = pdbidlist.iterator(); it.hasNext();) {
|
|---|
| 185 | String l = (String)it.next();
|
|---|
| 186 | pdbid = l.substring(0, 4);
|
|---|
| 187 | break;
|
|---|
| 188 | }</s:scriptvalue>
|
|---|
| 189 | <s:beanshellinputlist>
|
|---|
| 190 | <s:beanshellinput s:syntactictype="l('text/plain')">pdbidlist</s:beanshellinput>
|
|---|
| 191 | </s:beanshellinputlist>
|
|---|
| 192 | <s:beanshelloutputlist>
|
|---|
| 193 | <s:beanshelloutput s:syntactictype="'text/plain'">pdbid</s:beanshelloutput>
|
|---|
| 194 | </s:beanshelloutputlist>
|
|---|
| 195 | <s:dependencies s:classloader="iteration" />
|
|---|
| 196 | </s:beanshell>
|
|---|
| 197 | </s:processor>
|
|---|
| 198 | <s:processor name="BLAST_against_PDB">
|
|---|
| 199 | <s:description>Execute Blast with parameter</s:description>
|
|---|
| 200 | <s:defaults>
|
|---|
| 201 | <s:default name="program">blastp</s:default>
|
|---|
| 202 | <s:default name="database">PDB</s:default>
|
|---|
| 203 | <s:default name="param">-e 1 -m 8</s:default>
|
|---|
| 204 | </s:defaults>
|
|---|
| 205 | <s:arbitrarywsdl>
|
|---|
| 206 | <s:wsdl>http://xml.nig.ac.jp/wsdl/Blast.wsdl</s:wsdl>
|
|---|
| 207 | <s:operation>searchParam</s:operation>
|
|---|
| 208 | </s:arbitrarywsdl>
|
|---|
| 209 | </s:processor>
|
|---|
| 210 | <s:processor name="BLAST_against_DAD">
|
|---|
| 211 | <s:description>Execute Blast with parameter</s:description>
|
|---|
| 212 | <s:defaults>
|
|---|
| 213 | <s:default name="program">blastp</s:default>
|
|---|
| 214 | <s:default name="database">DAD</s:default>
|
|---|
| 215 | <s:default name="param">-m 8 -e 0.00000000001 -v 10</s:default>
|
|---|
| 216 | </s:defaults>
|
|---|
| 217 | <s:arbitrarywsdl>
|
|---|
| 218 | <s:wsdl>http://xml.nig.ac.jp/wsdl/Blast.wsdl</s:wsdl>
|
|---|
| 219 | <s:operation>searchParam</s:operation>
|
|---|
| 220 | </s:arbitrarywsdl>
|
|---|
| 221 | </s:processor>
|
|---|
| 222 | <s:processor name="Get_DAD_FASTA">
|
|---|
| 223 | <s:description>Get DAD entry of FASTA Format by Accession Number</s:description>
|
|---|
| 224 | <s:arbitrarywsdl>
|
|---|
| 225 | <s:wsdl>http://xml.nig.ac.jp/wsdl/GetEntry.wsdl</s:wsdl>
|
|---|
| 226 | <s:operation>getFASTA_DADEntry</s:operation>
|
|---|
| 227 | </s:arbitrarywsdl>
|
|---|
| 228 | </s:processor>
|
|---|
| 229 | <s:processor name="btit">
|
|---|
| 230 | <s:arbitrarywsdl>
|
|---|
| 231 | <s:wsdl>http://soap.genome.jp/KEGG.wsdl</s:wsdl>
|
|---|
| 232 | <s:operation>btit</s:operation>
|
|---|
| 233 | </s:arbitrarywsdl>
|
|---|
| 234 | </s:processor>
|
|---|
| 235 | <s:processor name="Get_DAD_definition">
|
|---|
| 236 | <s:defaults>
|
|---|
| 237 | <s:default name="returnPath">/ENTRY/DAD/definition</s:default>
|
|---|
| 238 | <s:default name="offset">1</s:default>
|
|---|
| 239 | <s:default name="count">1</s:default>
|
|---|
| 240 | </s:defaults>
|
|---|
| 241 | <s:arbitrarywsdl>
|
|---|
| 242 | <s:wsdl>http://xml.nig.ac.jp/wsdl/ARSA.wsdl</s:wsdl>
|
|---|
| 243 | <s:operation>searchByXMLPath</s:operation>
|
|---|
| 244 | </s:arbitrarywsdl>
|
|---|
| 245 | </s:processor>
|
|---|
| 246 | <s:processor name="Get_PDB_definition">
|
|---|
| 247 | <s:defaults>
|
|---|
| 248 | <s:default name="returnPath">/ENTRY/PDB/title</s:default>
|
|---|
| 249 | <s:default name="offset">1</s:default>
|
|---|
| 250 | <s:default name="count">1</s:default>
|
|---|
| 251 | </s:defaults>
|
|---|
| 252 | <s:arbitrarywsdl critical="true">
|
|---|
| 253 | <s:wsdl>http://xml.nig.ac.jp/wsdl/ARSA.wsdl</s:wsdl>
|
|---|
| 254 | <s:operation>searchByXMLPath</s:operation>
|
|---|
| 255 | </s:arbitrarywsdl>
|
|---|
| 256 | </s:processor>
|
|---|
| 257 | <s:processor name="get_linkdb_by_entry">
|
|---|
| 258 | <s:defaults>
|
|---|
| 259 | <s:default name="db">genes</s:default>
|
|---|
| 260 | <s:default name="offset">1</s:default>
|
|---|
| 261 | <s:default name="limit">10</s:default>
|
|---|
| 262 | </s:defaults>
|
|---|
| 263 | <s:arbitrarywsdl>
|
|---|
| 264 | <s:wsdl>http://soap.genome.jp/KEGG.wsdl</s:wsdl>
|
|---|
| 265 | <s:operation>get_linkdb_by_entry</s:operation>
|
|---|
| 266 | </s:arbitrarywsdl>
|
|---|
| 267 | </s:processor>
|
|---|
| 268 | <s:link source="query" sink="BLAST_against_DAD:query" />
|
|---|
| 269 | <s:link source="BLAST_DAD_parser:accession" sink="Concatenate:string2" />
|
|---|
| 270 | <s:link source="BLAST_DAD_parser:accession" sink="Get_DAD_FASTA:accession" />
|
|---|
| 271 | <s:link source="BLAST_PDB_parser:url" sink="PDB_strnavi:url" />
|
|---|
| 272 | <s:link source="BLAST_against_DAD:Result" sink="BLAST_DAD_parser:blastResult" />
|
|---|
| 273 | <s:link source="BLAST_against_PDB:Result" sink="BLAST_PDB_parser:blastResult" />
|
|---|
| 274 | <s:link source="Concatenate2:output" sink="Get_DAD_definition:queryPath" />
|
|---|
| 275 | <s:link source="Concatenate3:output" sink="Concatenate4:string1" />
|
|---|
| 276 | <s:link source="Concatenate4:output" sink="Get_PDB_definition:queryPath" />
|
|---|
| 277 | <s:link source="Concatenate5:output" sink="get_linkdb_by_entry:entry_id" />
|
|---|
| 278 | <s:link source="Concatenate:output" sink="Concatenate2:string1" />
|
|---|
| 279 | <s:link source="Get_DAD_FASTA:Result" sink="BLAST_against_PDB:query" />
|
|---|
| 280 | <s:link source="Get_DAD_definition:Result" sink="ARSA_parser:definitionList" />
|
|---|
| 281 | <s:link source="Get_PDB_definition:Result" sink="ARSA_parser2:output" />
|
|---|
| 282 | <s:link source="Get_pdbid:pdbid" sink="Concatenate3:string2" />
|
|---|
| 283 | <s:link source="Get_pdbid:pdbid" sink="Concatenate5:string2" />
|
|---|
| 284 | <s:link source="PDB_strnavi:contents" sink="to_list:string" />
|
|---|
| 285 | <s:link source="get_linkdb_by_entry:return" sink="returnXML:input" />
|
|---|
| 286 | <s:link source="returnXML1:entry_id2" sink="btit:string" />
|
|---|
| 287 | <s:link source="returnXML:return" sink="returnXML1:input" />
|
|---|
| 288 | <s:link source="to_list:split" sink="Get_pdbid:pdbidlist" />
|
|---|
| 289 | <s:link source="ARSA_parser2:definition" sink="annotation_from_PDBj" />
|
|---|
| 290 | <s:link source="ARSA_parser:definition" sink="annotation_from_DDBJ" />
|
|---|
| 291 | <s:link source="btit:return" sink="annotation_from_KEGG" />
|
|---|
| 292 | <s:source name="query" />
|
|---|
| 293 | <s:sink name="annotation_from_DDBJ" />
|
|---|
| 294 | <s:sink name="annotation_from_PDBj">
|
|---|
| 295 | <s:metadata>
|
|---|
| 296 | <s:mimeTypes>
|
|---|
| 297 | <s:mimeType>text/html</s:mimeType>
|
|---|
| 298 | </s:mimeTypes>
|
|---|
| 299 | </s:metadata>
|
|---|
| 300 | </s:sink>
|
|---|
| 301 | <s:sink name="annotation_from_KEGG">
|
|---|
| 302 | <s:metadata>
|
|---|
| 303 | <s:mimeTypes>
|
|---|
| 304 | <s:mimeType>text/xml</s:mimeType>
|
|---|
| 305 | </s:mimeTypes>
|
|---|
| 306 | </s:metadata>
|
|---|
| 307 | </s:sink>
|
|---|
| 308 | </s:scufl>
|
|---|
| 309 |
|
|---|