Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
notebook-backup
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Julien Cornut
notebook-backup
Commits
c201391e
Commit
c201391e
authored
9 years ago
by
Julien Cornut
Browse files
Options
Downloads
Patches
Plain Diff
Automatic Update
parent
4612494e
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
PlotData.ipynb
+8
-25
8 additions, 25 deletions
PlotData.ipynb
with
8 additions
and
25 deletions
PlotData.ipynb
+
8
−
25
View file @
c201391e
...
...
@@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 1
57
,
"execution_count": 1
63
,
"metadata": {
"collapsed": false,
"scrolled": true
...
...
@@ -79,6 +79,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Processed : 5\n",
"\n",
"Undetermined_lane7_pair1\n",
"flowcell261_lane8_pair1_CAGATC \t\t Processsed\n",
"flowcell261_lane8_pair1_TGACCA \t\t Processsed\n",
...
...
@@ -94,29 +97,6 @@
"flowcell384_lane7_pair1_TGACCA\n",
"testing\n"
]
},
{
"data": {
"text/plain": [
"[None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None]"
]
},
"execution_count": 157,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
...
...
@@ -127,7 +107,10 @@
"\n",
"lst = (n+\" \\t\\t Processsed\" if n in wn else n for n in sorted(fn))\n",
"\n",
"[print(l) for l in lst]\n",
"print(\"\\nProcessed : {0}\\n\".format(len(fn & wn)))\n",
"\n",
"for l in lst:\n",
" print(l)\n",
"\n",
" \n"
]
...
...
%% Cell type:code id: tags:
```
python
# This lib is needed to parse fastq easily
from
Bio
import
SeqIO
# This lib is used to avoid using a bash magic cell to copy a file
from
shutil
import
copyfile
# Used to... list directory
from
os
import
listdir
,
path
# Check the time taken by a function to run
import
datetime
# Enter/Uncomment here the name of the file you want to process
# fname = "Undetermined_lane7_pair1"
# fname = "flowcell261_lane8_pair1_CAGATC"
# fname = "flowcell261_lane8_pair1_TGACCA"
# fname = "flowcell362_lane4_pair1_ACAGTG"
# fname = "flowcell362_lane4_pair1_ACTTGA"
# fname = "flowcell362_lane4_pair1_CAGATC"
# fname = "flowcell362_lane4_pair1_TGACCA"
# fname = "flowcell362_lane4_pair1_Undetermined"
# fname = "flowcell384_lane7_pair1_ACAGTG"
# fname = "flowcell384_lane7_pair1_ACTTGA"
# fname = "flowcell384_lane7_pair1_CAGATC"
# fname = "flowcell384_lane7_pair1_GATCAG"
# fname = "flowcell384_lane7_pair1_TGACCA"
fname
=
"
testing
"
# Print available files in 0-Raws/ directory
print
(
"
\n
Available files :
\n
"
)
for
file
in
sorted
(
listdir
(
"
0-Raws/
"
)):
print
(
"
-
"
+
file
.
split
(
'
.
'
)[
0
])
```
%% Output
Available files :
- Undetermined_lane7_pair1
- flowcell261_lane8_pair1_CAGATC
- flowcell261_lane8_pair1_TGACCA
- flowcell362_lane4_pair1_ACAGTG
- flowcell362_lane4_pair1_ACTTGA
- flowcell362_lane4_pair1_CAGATC
- flowcell362_lane4_pair1_TGACCA
- flowcell362_lane4_pair1_Undetermined
- flowcell384_lane7_pair1_ACAGTG
- flowcell384_lane7_pair1_ACTTGA
- flowcell384_lane7_pair1_CAGATC
- flowcell384_lane7_pair1_GATCAG
- flowcell384_lane7_pair1_TGACCA
- testing
%% Cell type:code id: tags:
```
python
from
pprint
import
pprint
fn
=
{
n
.
split
(
'
.
'
)[
0
]
for
n
in
listdir
(
"
0-Raws/
"
)}
wn
=
{
n
.
split
(
'
.
'
)[
0
]
for
n
in
listdir
(
"
7-WIGs/
"
)}
lst
=
(
n
+
"
\t\t
Processsed
"
if
n
in
wn
else
n
for
n
in
sorted
(
fn
))
[
print
(
l
)
for
l
in
lst
]
print
(
"
\n
Processed : {0}
\n
"
.
format
(
len
(
fn
&
wn
)))
for
l
in
lst
:
print
(
l
)
```
%% Output
Processed : 5
Undetermined_lane7_pair1
flowcell261_lane8_pair1_CAGATC Processsed
flowcell261_lane8_pair1_TGACCA Processsed
flowcell362_lane4_pair1_ACAGTG Processsed
flowcell362_lane4_pair1_ACTTGA Processsed
flowcell362_lane4_pair1_CAGATC Processsed
flowcell362_lane4_pair1_TGACCA
flowcell362_lane4_pair1_Undetermined
flowcell384_lane7_pair1_ACAGTG
flowcell384_lane7_pair1_ACTTGA
flowcell384_lane7_pair1_CAGATC
flowcell384_lane7_pair1_GATCAG
flowcell384_lane7_pair1_TGACCA
testing
[None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None]
%% Cell type:code id: tags:
```
python
%%
bash
export
name
=
"
testing
"
echo
$
name
wc
-
l
3
-
Filtered
/
testing
.
fastq
wc
-
l
4
-
Bowtied
/
testing
.
sam
wc
-
l
5
-
ncRNA
-
Removed
/
testing
.
fastq
```
%% Output
testing
4000 3-Filtered/testing.fastq
1415 4-Bowtied/testing.sam
1780 5-ncRNA-Removed/testing.fastq
%% Cell type:code id: tags:
```
python
lines_to_burn
=
415
from
time
import
sleep
with
open
(
"
3-Filtered/
"
+
fname
+
"
.fastq
"
,
"
r
"
)
as
filtered
,
\
open
(
"
4-Bowtied/
"
+
fname
+
"
.sam
"
,
"
r
"
)
as
matches
,
\
open
(
"
5-ncRNA-Removed/
"
+
fname
+
"
.fastq
"
,
"
w
"
)
as
substracted
:
# Iterator over fastq file
filt_iter
=
[
rec
for
rec
in
SeqIO
.
parse
(
filtered
,
"
fastq
"
)]
matches_iter
=
[
l
for
l
in
matches
][
lines_to_burn
:]
for
_
in
range
(
415
):
matches
.
readline
()
# Check last lines
i
=
0
im
=
0
j
=
0
jm
=
0
ma
=
0
pas
=
True
for
f
in
filt_iter
:
i
+=
1
#print()
for
m
in
matches_iter
:
m
=
m
.
split
(
'
\t
'
)[
0
]
mf
=
m
.
split
(
'
\t
'
)[
9
]
if
(
m
==
f
.
id
):
if
mf
==
4
ma
+=
1
im
+=
1
#print("{0}\t{1}".format(f.id,m))
print
(
i
)
for
m
in
matches_iter
:
j
+=
1
m
=
m
.
split
(
'
\t
'
)[
0
]
for
f
in
filt_iter
:
if
(
m
==
f
.
id
):
jm
+=
1
#print("{0}\t{1}".format(f.id,m))
print
(
i
,
im
,
j
,
jm
)
# Generator over fastq where the corresponding sam field is 4,
# meaning no reported alignment
# sub_iter = (rec for rec in filt_iter \
# if matches.readline().split('\t')[1] == '4')
# Write back fastq
# SeqIO.write(sub_iter,substracted,"fastq")
```
%% Output
1000
1000 1000 1000 1000
%% Cell type:code id: tags:
```
python
with
open
(
"
3-Filtered/
"
+
"
testing
"
+
"
.fastq
"
,
"
r
"
)
as
filtered
,
\
open
(
"
4-Bowtied/
"
+
"
testing
"
+
"
.sam
"
,
"
r
"
)
as
matches
,
\
open
(
"
5-ncRNA-Removed/
"
+
"
testing
"
+
"
.fastq
"
,
"
w
"
)
as
substracted
:
# Iterator over fastq file
filt_iter
=
SeqIO
.
parse
(
filtered
,
"
fastq
"
)
# Strip header (as in original script)
for
_
in
range
(
415
-
3
):
matches
.
readline
()
# Check last lines
print
(
matches
.
readline
())
print
(
matches
.
readline
())
print
(
matches
.
readline
())
# Generator over fastq where the corresponding sam field is 4,
# meaning no reported alignment
sub_iter
=
(
rec
for
rec
in
filt_iter
\
if
matches
.
readline
().
split
(
'
\t
'
)[
1
]
==
'
4
'
)
# Write back fastq
SeqIO
.
write
(
sub_iter
,
substracted
,
"
fastq
"
)
```
%% Output
@SQ SN:tW(UCA)Q LN:74
@SQ SN:tY(GUA)Q LN:84
@PG ID:Bowtie VN:1.1.2 CL:"bowtie --wrapper basic-0 -S -v 3 -p 4 --best ref/2-Indexes/Yeast-Noncoding/Yeast-Noncoding 3-Filtered/testing.fastq 4-Bowtied/testing.sam"
%% Cell type:code id: tags:
```
python
%%
bash
mkdir
-
p
test
/
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment