Skip to content
Snippets Groups Projects
Commit 4fb9a0e4 authored by Kevin Bonga's avatar Kevin Bonga
Browse files

initial commit

parents
Branches master
No related tags found
No related merge requests found
Showing
with 675 additions and 0 deletions
{
"configurations": [
{
"name": "TP motif->search",
"type": "python",
"arguments": ["Bonsoir pariiiiiiiiis","2", "filename"],
"file": "$USER_HOME$/Documents/ecoleProfessionel/école/HEPIA/3meAnnee/algo_av/bonga-tp-motifs/search.py",
},
{
"name": "TP motif->motifs",
"type": "python",
"file": "$USER_HOME$/Documents/ecoleProfessionel/école/HEPIA/3meAnnee/algo_av/bonga-tp-motifs/motifs.py",
}
]
}
.idea/
*__
import timeit as ti
# assumes strings are of the same length
def compare(str1, str2):
le = len(str1)
for i in range(le):
if str1[i] != str2[i]:
return False, i
return True, le
def naive(pattern, text, uniks=False):
lenp = len(pattern)
if uniks:
shifts = []
i = 0
while i < len(text)-lenp:
isEqual, shift = compare(pattern, text[i:i+lenp])
if isEqual:
shifts.append(i)
i += max(shift, 1)
else:
shifts = [i for i in range(len(text)-lenp) if pattern == text[i:i+lenp]]
return shifts
#------------------------------------
#automate
#------------------------------------
def automate(T, Tab, alphabet, m):
q = 0
for i in range(len(T)):
if T[i] in alphabet:
q = Tab[q][alphabet[T[i]]]
else:
q = 0
if q == m:
s = i - m + 1
print(s)
def table_transit(motif, alphabet):
table = []
m = len(motif)
for i in range(len(motif) + 1):
sub_list = [0] * len(alphabet)
table.append(sub_list)
alphabet_map = {char: idx for idx, char in enumerate(alphabet)}
for q in range(m + 1):
for char in alphabet:
k = min(m, q + 1)
while k > 0 and motif[:k] != (motif[:q] + char)[-k:]:
k -= 1
table[q][alphabet_map[char]] = k
return table, alphabet_map
#------------------------------------
#kmp
#------------------------------------
def kmp_recherche(T, M):
n = len(T)
m = len(M)
pi = calculer_table_prefixe(M)
q = 0 # Longueur de la correspondance actuelle
for i in range(n):
while q > 0 and M[q] != T[i]:
q = pi[q - 1]
if M[q] == T[i]:
q += 1
if q == m:
print(f"Motif trouvé au décalage {i - m + 1}")
q = pi[q - 1]
def calculer_table_prefixe(motif):
m = len(motif)
pi = [0] * m
j = 0 # Longueur du précédent plus long préfixe suffixe propre
for i in range(1, m):
while j > 0 and motif[i] != motif[j]:
j = pi[j - 1]
if motif[i] == motif[j]:
j += 1
pi[i] = j
else:
pi[i] = 0
return pi
def rabin_karp(T, M, B, q):
n = len(T)
m = len(M)
# Calculer p et t_0
p = 0
t = 0
h = 1
# La valeur de h serait "pow(B, m-1) % q"
for i in range(m - 1):
h = (h * B) % q
# Calculer la valeur initiale de p et t
for i in range(m):
p = (B * p + int(M[i])) % q
t = (B * t + int(T[i])) % q
# Parcourir le texte pour comparer les valeurs hachées
for s in range(n - m + 1):
if p == t:
if T[s:s + m] == M:
print(f"Motif trouvé au décalage {s}")
if s < n - m:
t = (B * (t - int(T[s]) * h) + int(T[s + m])) % q
if t < 0:
t += q
def _tests():
text = "Tom is a nice boy. But Tom is also sometime acting weird. Tom is going to the swiming pool and to the party."
# print(naive("Tom", text))
# print(ti.timeit(lambda: naive("Tom", text), number=10000))
# text = "Tom is a nice boy. But Tomawakislong is also sometime acting weird. Tom is going to the swiming pool and gg."
# print(naive("Tomawakislong", text, True))
# print(ti.timeit(lambda: naive("Tomawakislong", text, True), number=10000))
# text = "TomawakislongTomTomawakislongTTomawakislonomToToTTTToTTomawakislonToTTTToToTTTToToTTTToToTTTToTTomawakislong"
# print(naive("Tomawakislong", text, True))
# print(ti.timeit(lambda: naive("Tomawakislong", text, True), number=10000))
motif = "aabab"
texte = "aaaababaabaababaab"
alphabet = set(motif)
table, alphabet_map = table_transit(motif, alphabet)
#afficher_table_transition(table, alphabet)
#automate(texte, table, alphabet_map, len(motif))
#kmp_recherche(texte, motif)
T = "50261592653265286"
M = "26"
B = 10
q = 11
rabin_karp(T, M, B, q)
if __name__ == '__main__':
_tests()
\ No newline at end of file
#!/usr/bin/env python
import argparse
import pathlib
import motifs as pattern
if __name__ == '__main__':
parser = argparse.ArgumentParser(
prog='search',
description='Search a string in a file or give description of algorithms tables'
)
parser.add_argument('pattern', type=str)
parser.add_argument('algo', type=int)
parser.add_argument('filename', nargs='?', type=pathlib.Path, default=None)
p = parser.parse_args()
if p.algo == 1: # Rabin Karp
# Format de sortie -> à générer avec votre code
if (p.filename is None):
# Afficher la base, le nombre 1er pour le modulo, le hash du motif
print("26 37 18")
else:
# Afficher le nombre d'occurences du motif
# suivi de la liste des positions de sa 1ere lettre dans le texte
print("13") # nombre d'occurences du motifs
print("0 3 46 67 109") #liste des positions du motif
elif p.algo == 2: #Automate fini
# Format de sortie -> à générer avec votre code
if (p.filename is None):
# Afficher le tableau de la fonction de transition
# P. ex. pour le motif M = "ababaca"
# a b c
print("1 0 0") # etat 0
print("1 2 0") # etat 1
print("3 0 0") # etat 2
print("1 4 0") # etat 3
print("5 0 0") # etat 4
print("1 4 6") # etat 5
print("7 0 0") # etat 6
print("1 2 0") # etat 7
else:
# Afficher le nombre d'occurences du motif
# suivi de la liste des positions de sa 1ere lettre dans le texte
print("13") # nombre d'occurences du motifs
print("0 3 46 67 109") #liste des positions du motif
elif p.algo == 3: #Knut-Morris-Pratt
# Format de sortie -> à générer avec votre code
if (p.filename is None):
#Afficher le tableau des prefixes
# P. ex. pour le motif M = "ababaca"
# 0 1 2 3 4 5 6 q
# a b a b a c a M[q]
print("0 0 1 2 3 0 1") # pi[q]
else:
# Afficher le nombre d'occurences du motif
# suivi de la liste des positions de sa 1ere lettre dans le texte
print("13") # nombre d'occurences du motifs
print("0 3 46 67 109") #liste des positions du motif
elif p.algo == 4: #Boyer-Moore
# Format de sortie -> à générer avec votre code
if (p.filename is None):
#Afficher les deux tableaux des decalages
# P. ex. pour le motif M = "anpanman"
# 1er tableau
# a n p m * lettre (selon ordre dans le motif)
print("1 0 5 2 8") # decalage
# 2eme tableau
# partie du motif bonne (depuis la droite):
# n an man nman anman panman npanman anpanman
# decalage: 8 3 6 6 6 6 6 6
print("8 3 6 6 6 6 6 6") # decalage
else:
# Afficher le nombre d'occurences du motif
# suivi de la liste des positions de sa 1ere lettre dans le texte
print("13") # nombre d'occurences du motifs
print("0 3 46 67 109") #liste des positions du motif
else:
print("Algorithm not implemented")
exit(2)
File added
File added
File added
File added
File added
import timeit as ti
# assumes strings are of the same length
def compare(str1, str2):
le = len(str1)
for i in range(le):
if str1[i] != str2[i]:
return False, i
return True, le
def naive(pattern, text, uniks=False):
lenp = len(pattern)
if uniks:
shifts = []
i = 0
while i < len(text)-lenp:
isEqual, shift = compare(pattern, text[i:i+lenp])
if isEqual:
shifts.append(i)
i += max(shift, 1)
else:
shifts = [i for i in range(len(text)-lenp) if pattern == text[i:i+lenp]]
return shifts
def tests():
text = "Tom is a nice boy. But Tom is also sometime acting weird. Tom is going to the swiming pool and to the party."
print(naive("Tom", text))
print(ti.timeit(lambda: naive("Tom", text), number=10000))
text = "Tom is a nice boy. But Tomawakislong is also sometime acting weird. Tom is going to the swiming pool and gg."
print(naive("Tomawakislong", text, True))
print(ti.timeit(lambda: naive("Tomawakislong", text, True), number=10000))
text = "TomawakislongTomTomawakislongTTomawakislonomToToTTTToTTomawakislonToTTTToToTTTToToTTTToToTTTToTTomawakislong"
print(naive("Tomawakislong", text, True))
print(ti.timeit(lambda: naive("Tomawakislong", text, True), number=10000))
if __name__ == '__main__':
tests()
\ No newline at end of file
def naif_exo1(T, M):
for d in range(len(T) - len(M) + 1):
if M == T[d:d + len(M)]:
print(d)
def naif_exo2():
return 1
def automate(T, Tab, alphabet_map, m):
q = 0
for i in range(len(T)):
if T[i] in alphabet_map:
q = Tab[q][alphabet_map[T[i]]]
else:
q = 0
if q == m:
s = i - m + 1
print(s)
def table_trans(motif):
m = len(motif)
alphabet = set(motif)
alphabet = sorted(list(alphabet))
alphabet_map = {char: idx for idx, char in enumerate(alphabet)}
table = [[0] * len(alphabet) for _ in range(m + 1)]
for q in range(m + 1):
for char in alphabet:
k = min(m, q + 1)
while k > 0 and motif[:k] != (motif[:q] + char)[-k:]:
k -= 1
table[q][alphabet_map[char]] = k
return table, alphabet_map
def calculer_table_prefixe(motif):
m = len(motif)
pi = [0] * m
j = 0 # Longueur du précédent plus long préfixe suffixe propre
for i in range(1, m):
while j > 0 and motif[i] != motif[j]:
j = pi[j - 1]
if motif[i] == motif[j]:
j += 1
pi[i] = j
else:
pi[i] = 0
return pi
def kmp_recherche(T, M):
n = len(T)
m = len(M)
pi = calculer_table_prefixe(M)
q = 0 # Longueur de la correspondance actuelle
for i in range(n):
while q > 0 and M[q] != T[i]:
q = pi[q - 1]
if M[q] == T[i]:
q += 1
if q == m:
print(f"Motif trouvé au décalage {i - m + 1}")
q = pi[q - 1]
def rabin_karp(T, M, B, q):
n = len(T)
m = len(M)
p = 0 # valeur numérique du motif
t = 0 # valeur numérique de la première sous-chaîne de T
h = 1 # B^(m-1) % q
# Le plus grand facteur de B^(m-1) % q
for i in range(m-1):
h = (h * B) % q
# Calculer p et t0
for i in range(m):
p = (B * p + int(M[i])) % q
t = (B * t + int(T[i])) % q
# Glisser le motif sur le texte de gauche à droite
for s in range(n - m + 1):
# Vérifier la valeur de hachage des caractères
if p == t:
# Si les valeurs de hachage correspondent, vérifier les caractères un par un
if T[s:s + m] == M:
print(f"Motif trouvé au décalage {s}")
# Calculer ts+1
if s < n - m:
t = (B * (t - int(T[s]) * h) + int(T[s + m])) % q
# On peut obtenir une valeur négative de t, on ajoute q pour la rendre positive
if t < 0:
t += q
# Ex3
alphabet = ['a', 'b']
motif = "aabab"
texte = "aaaababaabaababaab"
table, alphabet_map = table_trans(motif)
automate(texte, table, alphabet_map, len(motif))
# Exemple d'utilisation
texte = "bacbababaabcbab"
motif = "ababaca"
kmp_recherche(texte, motif)
# Exemple d'utilisation
texte = "47368"
motif = "368"
B = 10
q = 13
rabin_karp(texte, motif, B, q)
\ No newline at end of file
File added
File added
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
*.xml
*.iml
\ No newline at end of file
%% Cell type:code id:initial_id tags:
``` python
# Import necessary modules
from kohonen import KohonenNetwork
from utils import read_colors_data, plot_kohonen_network
import numpy as np
from sklearn.datasets import make_blobs, make_circles
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
```
%% Cell type:code id:19116123d0f4aa26 tags:
``` python
# Section 1: Load and preprocess data
## Load color data
colors_data = read_colors_data('colors.dat')
## Load MNIST data
with np.load('ressources/mnist.npz') as f:
x_train, y_train = f['x_train'], f['y_train']
x_train = x_train[y_train < 9] # Select digits 0 to 8
x_train = x_train.reshape(-1, 28*28) # Flatten images
x_train = MinMaxScaler().fit_transform(x_train) # Normalize data
## Generate synthetic data: 3 clusters
X_blobs, y_blobs = make_blobs(n_samples=300, centers=3)
X_blobs = MinMaxScaler().fit_transform(X_blobs) # Normalize data
## Generate synthetic data: 1 disk
X_circles, y_circles = make_circles(n_samples=100, factor=0.5, noise=0.05)
X_circles = MinMaxScaler().fit_transform(X_circles) # Normalize data
```
%% Cell type:code id:b1598789d919a169 tags:
``` python
```
%% Cell type:code id:dad6fa058c9bdcd8 tags:
``` python
# Section 2: Define a function to train and plot Kohonen network
def train_and_plot(data, shape, input_dim, learning_rate=0.1, radius=5, time_const=1000, iter=10000, title='Kohonen Network'):
network = KohonenNetwork(shape, input_dim)
network.train(data, learning_rate=learning_rate, radius=radius, time_const=time_const)
plot_kohonen_network(network)
plt.title(title)
plt.show()
```
%% Cell type:code id:6634e950f9c55161 tags:
``` python
# Section 3: Train and visualize Kohonen networks
## Color data
train_and_plot(colors_data, shape=(10, 10), input_dim=3, title='Kohonen Network - Colors Data')
```
%% Cell type:code id:b82906c417a07f0b tags:
``` python
# Section 4: Experiment with different parameters
## Experiment with different learning rates, radii, and time constants
params = [
{'learning_rate': 0.1, 'radius': 5, 'time_const': 1000},
{'learning_rate': 0.05, 'radius': 3, 'time_const': 500},
{'learning_rate': 0.01, 'radius': 1, 'time_const': 200},
]
for i, param in enumerate(params):
train_and_plot(colors_data, shape=(10, 10), input_dim=3, **param, title=f'Kohonen Network - Colors Data (Experiment {i+1})')
```
%% Cell type:code id:3a7c4882554c3541 tags:
``` python
```
File added
File added
File added
File added
3
100
10
10
1 0.6016 0.3312 0.02719
2 0.2443 0.7386 0.4752
3 0.03123 0.1551 0.8372
4 0.6681 0.6176 0.4934
5 0.3817 0.3099 0.2329
6 0.7456 0.6161 0.8035
7 0.7264 0.7082 0.4943
8 0.4236 0.8177 0.7734
9 0.959 0.1522 0.5616
10 0.2505 0.8411 0.6837
11 0.9357 0.3852 0.9949
12 0.3167 0.8943 0.6251
13 0.9921 0.1581 0.9839
14 0.9157 0.1701 0.2903
15 0.98 0.05771 0.05249
16 0.79 0.06803 0.08952
17 0.7274 0.7479 0.4584
18 0.959 0.7485 0.3375
19 0.6597 0.07821 0.5996
20 0.1363 0.402 0.6912
21 0.1184 0.8541 0.663
22 0.9606 0.8639 0.5369
23 0.7197 0.3827 0.7834
24 0.9146 0.3304 0.1947
25 0.8617 0.7454 0.4802
26 0.3528 0.1911 0.6869
27 0.8066 0.9266 0.732
28 0.2574 0.9873 0.1123
29 0.4896 0.2166 0.05149
30 0.5845 0.4328 0.6615
31 0.0594 0.2754 0.8176
32 0.8125 0.02045 0.9707
33 0.8009 0.5172 0.3549
34 0.7118 0.113 0.4899
35 0.07155 0.4502 0.9137
36 0.9243 0.2358 0.8106
37 0.3613 0.5754 0.7495
38 0.6559 0.6784 0.6918
39 0.1648 0.8315 0.2486
40 0.6047 0.8216 0.4212
41 0.3102 0.8083 0.5586
42 0.2171 0.2831 0.5929
43 0.07987 0.2866 0.7759
44 0.4717 0.06548 0.5717
45 0.5608 0.2668 0.7189
46 0.2378 0.8172 0.3168
47 0.762 0.6657 0.7049
48 0.8107 0.9975 0.9204
49 0.9621 0.2912 0.8299
50 0.3536 0.6454 0.6799
51 0.3461 0.4724 0.6581
52 0.03544 0.9509 0.7633
53 0.726 0.5523 0.1835
54 0.6338 0.8692 0.0242
55 0.7221 0.6885 0.2828
56 0.01081 0.8837 0.3302
57 0.6176 0.1357 0.01362
58 0.1174 0.8901 0.7828
59 0.03962 0.9702 0.7845
60 0.1751 0.8142 0.3821
61 0.9355 0.2495 0.561
62 0.1394 0.06385 0.4866
63 0.9391 0.05512 0.3403
64 0.5986 0.805 0.3922
65 0.1135 0.9382 0.3148
66 0.3596 0.0724 0.7999
67 0.5897 0.9277 0.2058
68 0.7609 0.01614 0.7627
69 0.5579 0.9435 0.1841
70 0.02541 0.3346 0.1376
71 0.5401 0.9495 0.3104
72 0.5579 0.08707 0.669
73 0.9465 0.1807 0.2338
74 0.1638 0.217 0.5447
75 0.7816 0.5736 0.592
76 0.6136 0.5167 0.2534
77 0.8209 0.3518 0.3201
78 0.06349 0.6665 0.94
79 0.6565 0.2349 0.19
80 0.6423 0.383 0.7373
81 0.00386 0.1972 0.2361
82 0.3471 0.837 0.5254
83 0.8478 0.6382 0.9458
84 0.5478 0.7486 0.2994
85 0.7842 0.8178 0.4341
86 0.2141 0.2811 0.9513
87 0.5371 0.9891 0.4812
88 0.2471 0.3317 0.5378
89 0.7767 0.1449 0.1258
90 0.5354 0.5773 0.4242
91 0.2272 0.214 0.7716
92 0.4337 0.9144 0.1826
93 0.9781 0.01106 0.3549
94 0.9644 0.5108 0.4073
95 0.2183 0.702 0.1984
96 0.8364 0.09746 0.4761
97 0.8448 0.2834 0.2933
98 0.4742 0.3624 0.2188
99 0.7155 0.9768 0.5989
100 0.4036 0.3688 0.2762
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment