version k-means qui marche!!

This commit is contained in:
2025-11-21 14:31:37 +01:00
parent b77383681e
commit 1ff888cf34
43 changed files with 431 additions and 3178 deletions

View File

@@ -17,7 +17,6 @@ if __name__ == "__main__":
filename = args.filename
token = args.token
heights = get_heights(filename)
print(f"Heights: {heights}")
# drop the fist, last, and 12th bar
heights = heights[1:11] + heights[12:-1]
decoded = spotify_bar_decode(heights)

View File

@@ -1,78 +1,45 @@
from skimage import io
from skimage.color import rgb2gray
from skimage.filters import threshold_otsu
from skimage.measure import label, regionprops
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage import io
from skimage.color import rgb2gray
from skimage.filters import threshold_otsu, gaussian
from skimage.morphology import closing, square
from skimage.measure import label, regionprops
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
import matplotlib.patches as patches
def get_heights(filename: str) -> list:
"""Open an image and return a list of the bar heights.
Also saves three images: the processed binary image, the original image with drawings, and a smoothed binary image.
"""
from sklearn.cluster import KMeans
def get_heights(filename: str):
image = io.imread(filename)
im = rgb2gray(image)
binary_im = im > threshold_otsu(im)
smooth_im = gaussian(binary_im.astype(float), sigma=1)
morph_im = closing(smooth_im > 0.5, square(3))
# Apply Gaussian smoothing to binary image to make bars look rounder
smooth_im = gaussian(binary_im, sigma=2)
# Threshold again to obtain a smoothed binary
smooth_binary = smooth_im > 0.5
labeled = label(smooth_binary)
bar_dimensions = [r.bbox for r in regionprops(labeled)]
bar_dimensions.sort(key=lambda x: x[1]) # Sort left to right
# The first object (spotify logo) is the max height of the bars
logo = bar_dimensions[0]
max_height = logo[2] - logo[0]
sequence = []
# Create figure and axis for drawing
fig, ax = plt.subplots()
ax.imshow(smooth_binary, cmap='gray')
# Draw rectangle around logo
logo_rect = patches.Rectangle(
(logo[1], logo[0]),
logo[3]-logo[1],
logo[2]-logo[0],
linewidth=2,
edgecolor='yellow',
facecolor='none'
)
ax.add_patch(logo_rect)
# Add 'Logo' text near the rectangle
ax.text(logo[1], logo[0] - 10, 'Logo', color='yellow', fontsize=12, weight='bold')
# Draw bars and center markers
for bar in bar_dimensions[1:]:
height = bar[2] - bar[0]
ratio = height / max_height
ratio *= 8
ratio //= 1
val = int(ratio - 1)
sequence.append(val)
# Draw rectangle around bar
rect = patches.Rectangle(
(bar[1], bar[0]),
bar[3]-bar[1],
bar[2]-bar[0],
linewidth=1,
edgecolor='red',
facecolor='none'
)
ax.add_patch(rect)
# Draw center marker (white circle)
center_x = (bar[1] + bar[3]) / 2
center_y = (bar[0] + bar[2]) / 2
center_marker = patches.Circle((center_x, center_y), radius=5, color='black')
ax.add_patch(center_marker)
# Save processed binary image used for calculations
plt.imsave('processed_binary_image.png', binary_im, cmap='gray')
# Save smoothed binary image
plt.imsave('img_smooth.png', smooth_binary, cmap='gray')
# Save image with drawings
plt.axis('off')
plt.savefig('image_with_drawings.png', bbox_inches='tight', pad_inches=0)
plt.close()
return sequence
labeled = label(morph_im)
bar_dims = [r.bbox for r in regionprops(labeled)]
bar_dims.sort(key=lambda x: x[1]) # left to right
bars = bar_dims[1:] # skip logo
bar_heights_raw = []
for bar in bars:
top, left, bottom, right = bar
effective_height = bottom - top # use bounding box height directly
bar_heights_raw.append(effective_height)
print(len(bars))
# Cluster measured heights to 8 clusters representing discrete bar levels
bar_heights_raw_np = np.array(bar_heights_raw).reshape(-1, 1)
kmeans = KMeans(n_clusters=8, random_state=0).fit(bar_heights_raw_np)
cluster_centers = np.sort(kmeans.cluster_centers_.flatten())
# Assign each bar height to closest cluster center index (0 to 7)
predicted_levels = []
for h in bar_heights_raw:
diffs = np.abs(cluster_centers - h)
closest_cluster = np.argmin(diffs)
predicted_levels.append(closest_cluster)
print(len(predicted_levels))
return predicted_levels

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB