diff --git a/ProcessFASTQ.ipynb b/ProcessFASTQ.ipynb index 11758718b03b0f334e4104251194cf0bf976511b..3943225353d510c17aed04324529aa70099979b4 100644 --- a/ProcessFASTQ.ipynb +++ b/ProcessFASTQ.ipynb @@ -670,7 +670,7 @@ }, { "cell_type": "code", - "execution_count": 232, + "execution_count": 239, "metadata": { "collapsed": false }, @@ -683,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 240, "metadata": { "collapsed": false }, @@ -716,11 +716,13 @@ " print(matches.readline())\n", " print(matches.readline())\n", " print(matches.readline())\n", - " \n", + " \n", " # Generator over fastq where the corresponding sam field is 4,\n", " # meaning no reported alignment\n", - " sub_iter = (rec for rec in filt_iter \\\n", - " if matches.readline().split('\\t')[1] == '4')\n", + " #sub_iter = (rec for rec in filt_iter \\\n", + " # if matches.readline().split('\\t')[1] == '4')\n", + " \n", + " \n", "\n", " # Write back fastq\n", " SeqIO.write(sub_iter,substracted,\"fastq\")" @@ -728,19 +730,11 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Filtering non-codant tRNA run time : 0:00:01.407564\n" - ] - } - ], + "outputs": [], "source": [ "# Store current time\n", "after = datetime.datetime.now()\n", @@ -751,40 +745,6 @@ "print(\"Filtering non-codant tRNA run time : {0}\".format(delta))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Testing\n", - "\n", - "with open(\"3-Filtered/\" +fname+\".fastq\",\"r\") as filtered, \\\n", - " open(\"4-Bowtied/\" +fname+\".sam\",\"r\") as matches, \\\n", - " open(\"5-ncRNA-Removed/\"+fname+\".fastq\",\"w\") as substracted: \n", - " \n", - " # Iterator over fastq file\n", - " filt_iter = SeqIO.parse(filtered,\"fastq\")\n", - " \n", - " # Strip header (as in original script)\n", - " for _ in range(415-3): matches.readline()\n", - "\n", - " # Check last lines\n", - " print(matches.readline())\n", - " print(matches.readline())\n", - " print(matches.readline())\n", - " \n", - " # Generator over fastq where the corresponding sam field is 4,\n", - " # meaning no reported alignment\n", - " sub_iter = (rec for rec in filt_iter \\\n", - " if matches.readline().split('\\t')[1] == '4')\n", - "\n", - " # Write back fastq\n", - " SeqIO.write(sub_iter,substracted,\"fastq\")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -796,27 +756,11 @@ }, { "cell_type": "code", - "execution_count": 235, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Time loading forward index: 00:00:00\n", - "Time loading mirror index: 00:00:00\n", - "End-to-end 2/3-mismatch full-index search: 00:00:00\n", - "# reads processed: 445\n", - "# reads with at least one reported alignment: 240 (53.93%)\n", - "# reads that failed to align: 205 (46.07%)\n", - "Reported 240 alignments to 1 output stream(s)\n", - "Time searching: 00:00:00\n", - "Overall time: 00:00:00\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "source ./source\n", @@ -841,7 +785,7 @@ }, { "cell_type": "code", - "execution_count": 236, + "execution_count": null, "metadata": { "collapsed": true }, @@ -872,7 +816,7 @@ }, { "cell_type": "code", - "execution_count": 237, + "execution_count": null, "metadata": { "collapsed": false }, @@ -916,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": 238, + "execution_count": null, "metadata": { "collapsed": false },