From a97f556e676b56137b53acae1d45a8af13b907c5 Mon Sep 17 00:00:00 2001 From: Julien Cornut <julien.cornut@hesge.ch> Date: Fri, 26 Feb 2016 11:58:01 +0100 Subject: [PATCH] Automatic Update --- ProcessFASTQ.ipynb | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/ProcessFASTQ.ipynb b/ProcessFASTQ.ipynb index 3555de0..1175871 100644 --- a/ProcessFASTQ.ipynb +++ b/ProcessFASTQ.ipynb @@ -751,6 +751,40 @@ "print(\"Filtering non-codant tRNA run time : {0}\".format(delta))" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Testing\n", + "\n", + "with open(\"3-Filtered/\" +fname+\".fastq\",\"r\") as filtered, \\\n", + " open(\"4-Bowtied/\" +fname+\".sam\",\"r\") as matches, \\\n", + " open(\"5-ncRNA-Removed/\"+fname+\".fastq\",\"w\") as substracted: \n", + " \n", + " # Iterator over fastq file\n", + " filt_iter = SeqIO.parse(filtered,\"fastq\")\n", + " \n", + " # Strip header (as in original script)\n", + " for _ in range(415-3): matches.readline()\n", + "\n", + " # Check last lines\n", + " print(matches.readline())\n", + " print(matches.readline())\n", + " print(matches.readline())\n", + " \n", + " # Generator over fastq where the corresponding sam field is 4,\n", + " # meaning no reported alignment\n", + " sub_iter = (rec for rec in filt_iter \\\n", + " if matches.readline().split('\\t')[1] == '4')\n", + "\n", + " # Write back fastq\n", + " SeqIO.write(sub_iter,substracted,\"fastq\")" + ] + }, { "cell_type": "markdown", "metadata": {}, -- GitLab