Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
notebook-backup
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Julien Cornut
notebook-backup
Commits
c201391e
Commit
c201391e
authored
9 years ago
by
Julien Cornut
Browse files
Options
Downloads
Patches
Plain Diff
Automatic Update
parent
4612494e
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
PlotData.ipynb
+8
-25
8 additions, 25 deletions
PlotData.ipynb
with
8 additions
and
25 deletions
PlotData.ipynb
+
8
−
25
View file @
c201391e
...
@@ -69,7 +69,7 @@
...
@@ -69,7 +69,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 1
57
,
"execution_count": 1
63
,
"metadata": {
"metadata": {
"collapsed": false,
"collapsed": false,
"scrolled": true
"scrolled": true
...
@@ -79,6 +79,9 @@
...
@@ -79,6 +79,9 @@
"name": "stdout",
"name": "stdout",
"output_type": "stream",
"output_type": "stream",
"text": [
"text": [
"\n",
"Processed : 5\n",
"\n",
"Undetermined_lane7_pair1\n",
"Undetermined_lane7_pair1\n",
"flowcell261_lane8_pair1_CAGATC \t\t Processsed\n",
"flowcell261_lane8_pair1_CAGATC \t\t Processsed\n",
"flowcell261_lane8_pair1_TGACCA \t\t Processsed\n",
"flowcell261_lane8_pair1_TGACCA \t\t Processsed\n",
...
@@ -94,29 +97,6 @@
...
@@ -94,29 +97,6 @@
"flowcell384_lane7_pair1_TGACCA\n",
"flowcell384_lane7_pair1_TGACCA\n",
"testing\n"
"testing\n"
]
]
},
{
"data": {
"text/plain": [
"[None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None,\n",
" None]"
]
},
"execution_count": 157,
"metadata": {},
"output_type": "execute_result"
}
}
],
],
"source": [
"source": [
...
@@ -127,7 +107,10 @@
...
@@ -127,7 +107,10 @@
"\n",
"\n",
"lst = (n+\" \\t\\t Processsed\" if n in wn else n for n in sorted(fn))\n",
"lst = (n+\" \\t\\t Processsed\" if n in wn else n for n in sorted(fn))\n",
"\n",
"\n",
"[print(l) for l in lst]\n",
"print(\"\\nProcessed : {0}\\n\".format(len(fn & wn)))\n",
"\n",
"for l in lst:\n",
" print(l)\n",
"\n",
"\n",
" \n"
" \n"
]
]
...
...
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
# This lib is needed to parse fastq easily
# This lib is needed to parse fastq easily
from
Bio
import
SeqIO
from
Bio
import
SeqIO
# This lib is used to avoid using a bash magic cell to copy a file
# This lib is used to avoid using a bash magic cell to copy a file
from
shutil
import
copyfile
from
shutil
import
copyfile
# Used to... list directory
# Used to... list directory
from
os
import
listdir
,
path
from
os
import
listdir
,
path
# Check the time taken by a function to run
# Check the time taken by a function to run
import
datetime
import
datetime
# Enter/Uncomment here the name of the file you want to process
# Enter/Uncomment here the name of the file you want to process
# fname = "Undetermined_lane7_pair1"
# fname = "Undetermined_lane7_pair1"
# fname = "flowcell261_lane8_pair1_CAGATC"
# fname = "flowcell261_lane8_pair1_CAGATC"
# fname = "flowcell261_lane8_pair1_TGACCA"
# fname = "flowcell261_lane8_pair1_TGACCA"
# fname = "flowcell362_lane4_pair1_ACAGTG"
# fname = "flowcell362_lane4_pair1_ACAGTG"
# fname = "flowcell362_lane4_pair1_ACTTGA"
# fname = "flowcell362_lane4_pair1_ACTTGA"
# fname = "flowcell362_lane4_pair1_CAGATC"
# fname = "flowcell362_lane4_pair1_CAGATC"
# fname = "flowcell362_lane4_pair1_TGACCA"
# fname = "flowcell362_lane4_pair1_TGACCA"
# fname = "flowcell362_lane4_pair1_Undetermined"
# fname = "flowcell362_lane4_pair1_Undetermined"
# fname = "flowcell384_lane7_pair1_ACAGTG"
# fname = "flowcell384_lane7_pair1_ACAGTG"
# fname = "flowcell384_lane7_pair1_ACTTGA"
# fname = "flowcell384_lane7_pair1_ACTTGA"
# fname = "flowcell384_lane7_pair1_CAGATC"
# fname = "flowcell384_lane7_pair1_CAGATC"
# fname = "flowcell384_lane7_pair1_GATCAG"
# fname = "flowcell384_lane7_pair1_GATCAG"
# fname = "flowcell384_lane7_pair1_TGACCA"
# fname = "flowcell384_lane7_pair1_TGACCA"
fname
=
"
testing
"
fname
=
"
testing
"
# Print available files in 0-Raws/ directory
# Print available files in 0-Raws/ directory
print
(
"
\n
Available files :
\n
"
)
print
(
"
\n
Available files :
\n
"
)
for
file
in
sorted
(
listdir
(
"
0-Raws/
"
)):
for
file
in
sorted
(
listdir
(
"
0-Raws/
"
)):
print
(
"
-
"
+
file
.
split
(
'
.
'
)[
0
])
print
(
"
-
"
+
file
.
split
(
'
.
'
)[
0
])
```
```
%% Output
%% Output
Available files :
Available files :
- Undetermined_lane7_pair1
- Undetermined_lane7_pair1
- flowcell261_lane8_pair1_CAGATC
- flowcell261_lane8_pair1_CAGATC
- flowcell261_lane8_pair1_TGACCA
- flowcell261_lane8_pair1_TGACCA
- flowcell362_lane4_pair1_ACAGTG
- flowcell362_lane4_pair1_ACAGTG
- flowcell362_lane4_pair1_ACTTGA
- flowcell362_lane4_pair1_ACTTGA
- flowcell362_lane4_pair1_CAGATC
- flowcell362_lane4_pair1_CAGATC
- flowcell362_lane4_pair1_TGACCA
- flowcell362_lane4_pair1_TGACCA
- flowcell362_lane4_pair1_Undetermined
- flowcell362_lane4_pair1_Undetermined
- flowcell384_lane7_pair1_ACAGTG
- flowcell384_lane7_pair1_ACAGTG
- flowcell384_lane7_pair1_ACTTGA
- flowcell384_lane7_pair1_ACTTGA
- flowcell384_lane7_pair1_CAGATC
- flowcell384_lane7_pair1_CAGATC
- flowcell384_lane7_pair1_GATCAG
- flowcell384_lane7_pair1_GATCAG
- flowcell384_lane7_pair1_TGACCA
- flowcell384_lane7_pair1_TGACCA
- testing
- testing
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
from
pprint
import
pprint
from
pprint
import
pprint
fn
=
{
n
.
split
(
'
.
'
)[
0
]
for
n
in
listdir
(
"
0-Raws/
"
)}
fn
=
{
n
.
split
(
'
.
'
)[
0
]
for
n
in
listdir
(
"
0-Raws/
"
)}
wn
=
{
n
.
split
(
'
.
'
)[
0
]
for
n
in
listdir
(
"
7-WIGs/
"
)}
wn
=
{
n
.
split
(
'
.
'
)[
0
]
for
n
in
listdir
(
"
7-WIGs/
"
)}
lst
=
(
n
+
"
\t\t
Processsed
"
if
n
in
wn
else
n
for
n
in
sorted
(
fn
))
lst
=
(
n
+
"
\t\t
Processsed
"
if
n
in
wn
else
n
for
n
in
sorted
(
fn
))
[
print
(
l
)
for
l
in
lst
]
print
(
"
\n
Processed : {0}
\n
"
.
format
(
len
(
fn
&
wn
)))
for
l
in
lst
:
print
(
l
)
```
```
%% Output
%% Output
Processed : 5
Undetermined_lane7_pair1
Undetermined_lane7_pair1
flowcell261_lane8_pair1_CAGATC Processsed
flowcell261_lane8_pair1_CAGATC Processsed
flowcell261_lane8_pair1_TGACCA Processsed
flowcell261_lane8_pair1_TGACCA Processsed
flowcell362_lane4_pair1_ACAGTG Processsed
flowcell362_lane4_pair1_ACAGTG Processsed
flowcell362_lane4_pair1_ACTTGA Processsed
flowcell362_lane4_pair1_ACTTGA Processsed
flowcell362_lane4_pair1_CAGATC Processsed
flowcell362_lane4_pair1_CAGATC Processsed
flowcell362_lane4_pair1_TGACCA
flowcell362_lane4_pair1_TGACCA
flowcell362_lane4_pair1_Undetermined
flowcell362_lane4_pair1_Undetermined
flowcell384_lane7_pair1_ACAGTG
flowcell384_lane7_pair1_ACAGTG
flowcell384_lane7_pair1_ACTTGA
flowcell384_lane7_pair1_ACTTGA
flowcell384_lane7_pair1_CAGATC
flowcell384_lane7_pair1_CAGATC
flowcell384_lane7_pair1_GATCAG
flowcell384_lane7_pair1_GATCAG
flowcell384_lane7_pair1_TGACCA
flowcell384_lane7_pair1_TGACCA
testing
testing
[None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None]
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
%%
bash
%%
bash
export
name
=
"
testing
"
export
name
=
"
testing
"
echo
$
name
echo
$
name
wc
-
l
3
-
Filtered
/
testing
.
fastq
wc
-
l
3
-
Filtered
/
testing
.
fastq
wc
-
l
4
-
Bowtied
/
testing
.
sam
wc
-
l
4
-
Bowtied
/
testing
.
sam
wc
-
l
5
-
ncRNA
-
Removed
/
testing
.
fastq
wc
-
l
5
-
ncRNA
-
Removed
/
testing
.
fastq
```
```
%% Output
%% Output
testing
testing
4000 3-Filtered/testing.fastq
4000 3-Filtered/testing.fastq
1415 4-Bowtied/testing.sam
1415 4-Bowtied/testing.sam
1780 5-ncRNA-Removed/testing.fastq
1780 5-ncRNA-Removed/testing.fastq
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
lines_to_burn
=
415
lines_to_burn
=
415
from
time
import
sleep
from
time
import
sleep
with
open
(
"
3-Filtered/
"
+
fname
+
"
.fastq
"
,
"
r
"
)
as
filtered
,
\
with
open
(
"
3-Filtered/
"
+
fname
+
"
.fastq
"
,
"
r
"
)
as
filtered
,
\
open
(
"
4-Bowtied/
"
+
fname
+
"
.sam
"
,
"
r
"
)
as
matches
,
\
open
(
"
4-Bowtied/
"
+
fname
+
"
.sam
"
,
"
r
"
)
as
matches
,
\
open
(
"
5-ncRNA-Removed/
"
+
fname
+
"
.fastq
"
,
"
w
"
)
as
substracted
:
open
(
"
5-ncRNA-Removed/
"
+
fname
+
"
.fastq
"
,
"
w
"
)
as
substracted
:
# Iterator over fastq file
# Iterator over fastq file
filt_iter
=
[
rec
for
rec
in
SeqIO
.
parse
(
filtered
,
"
fastq
"
)]
filt_iter
=
[
rec
for
rec
in
SeqIO
.
parse
(
filtered
,
"
fastq
"
)]
matches_iter
=
[
l
for
l
in
matches
][
lines_to_burn
:]
matches_iter
=
[
l
for
l
in
matches
][
lines_to_burn
:]
for
_
in
range
(
415
):
matches
.
readline
()
for
_
in
range
(
415
):
matches
.
readline
()
# Check last lines
# Check last lines
i
=
0
i
=
0
im
=
0
im
=
0
j
=
0
j
=
0
jm
=
0
jm
=
0
ma
=
0
ma
=
0
pas
=
True
pas
=
True
for
f
in
filt_iter
:
for
f
in
filt_iter
:
i
+=
1
i
+=
1
#print()
#print()
for
m
in
matches_iter
:
for
m
in
matches_iter
:
m
=
m
.
split
(
'
\t
'
)[
0
]
m
=
m
.
split
(
'
\t
'
)[
0
]
mf
=
m
.
split
(
'
\t
'
)[
9
]
mf
=
m
.
split
(
'
\t
'
)[
9
]
if
(
m
==
f
.
id
):
if
(
m
==
f
.
id
):
if
mf
==
4
if
mf
==
4
ma
+=
1
ma
+=
1
im
+=
1
im
+=
1
#print("{0}\t{1}".format(f.id,m))
#print("{0}\t{1}".format(f.id,m))
print
(
i
)
print
(
i
)
for
m
in
matches_iter
:
for
m
in
matches_iter
:
j
+=
1
j
+=
1
m
=
m
.
split
(
'
\t
'
)[
0
]
m
=
m
.
split
(
'
\t
'
)[
0
]
for
f
in
filt_iter
:
for
f
in
filt_iter
:
if
(
m
==
f
.
id
):
if
(
m
==
f
.
id
):
jm
+=
1
jm
+=
1
#print("{0}\t{1}".format(f.id,m))
#print("{0}\t{1}".format(f.id,m))
print
(
i
,
im
,
j
,
jm
)
print
(
i
,
im
,
j
,
jm
)
# Generator over fastq where the corresponding sam field is 4,
# Generator over fastq where the corresponding sam field is 4,
# meaning no reported alignment
# meaning no reported alignment
# sub_iter = (rec for rec in filt_iter \
# sub_iter = (rec for rec in filt_iter \
# if matches.readline().split('\t')[1] == '4')
# if matches.readline().split('\t')[1] == '4')
# Write back fastq
# Write back fastq
# SeqIO.write(sub_iter,substracted,"fastq")
# SeqIO.write(sub_iter,substracted,"fastq")
```
```
%% Output
%% Output
1000
1000
1000 1000 1000 1000
1000 1000 1000 1000
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
with
open
(
"
3-Filtered/
"
+
"
testing
"
+
"
.fastq
"
,
"
r
"
)
as
filtered
,
\
with
open
(
"
3-Filtered/
"
+
"
testing
"
+
"
.fastq
"
,
"
r
"
)
as
filtered
,
\
open
(
"
4-Bowtied/
"
+
"
testing
"
+
"
.sam
"
,
"
r
"
)
as
matches
,
\
open
(
"
4-Bowtied/
"
+
"
testing
"
+
"
.sam
"
,
"
r
"
)
as
matches
,
\
open
(
"
5-ncRNA-Removed/
"
+
"
testing
"
+
"
.fastq
"
,
"
w
"
)
as
substracted
:
open
(
"
5-ncRNA-Removed/
"
+
"
testing
"
+
"
.fastq
"
,
"
w
"
)
as
substracted
:
# Iterator over fastq file
# Iterator over fastq file
filt_iter
=
SeqIO
.
parse
(
filtered
,
"
fastq
"
)
filt_iter
=
SeqIO
.
parse
(
filtered
,
"
fastq
"
)
# Strip header (as in original script)
# Strip header (as in original script)
for
_
in
range
(
415
-
3
):
matches
.
readline
()
for
_
in
range
(
415
-
3
):
matches
.
readline
()
# Check last lines
# Check last lines
print
(
matches
.
readline
())
print
(
matches
.
readline
())
print
(
matches
.
readline
())
print
(
matches
.
readline
())
print
(
matches
.
readline
())
print
(
matches
.
readline
())
# Generator over fastq where the corresponding sam field is 4,
# Generator over fastq where the corresponding sam field is 4,
# meaning no reported alignment
# meaning no reported alignment
sub_iter
=
(
rec
for
rec
in
filt_iter
\
sub_iter
=
(
rec
for
rec
in
filt_iter
\
if
matches
.
readline
().
split
(
'
\t
'
)[
1
]
==
'
4
'
)
if
matches
.
readline
().
split
(
'
\t
'
)[
1
]
==
'
4
'
)
# Write back fastq
# Write back fastq
SeqIO
.
write
(
sub_iter
,
substracted
,
"
fastq
"
)
SeqIO
.
write
(
sub_iter
,
substracted
,
"
fastq
"
)
```
```
%% Output
%% Output
@SQ SN:tW(UCA)Q LN:74
@SQ SN:tW(UCA)Q LN:74
@SQ SN:tY(GUA)Q LN:84
@SQ SN:tY(GUA)Q LN:84
@PG ID:Bowtie VN:1.1.2 CL:"bowtie --wrapper basic-0 -S -v 3 -p 4 --best ref/2-Indexes/Yeast-Noncoding/Yeast-Noncoding 3-Filtered/testing.fastq 4-Bowtied/testing.sam"
@PG ID:Bowtie VN:1.1.2 CL:"bowtie --wrapper basic-0 -S -v 3 -p 4 --best ref/2-Indexes/Yeast-Noncoding/Yeast-Noncoding 3-Filtered/testing.fastq 4-Bowtied/testing.sam"
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
%%
bash
%%
bash
mkdir
-
p
test
/
mkdir
-
p
test
/
```
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment