version k-means qui marche!!
This commit is contained in:
@@ -17,7 +17,6 @@ if __name__ == "__main__":
|
||||
filename = args.filename
|
||||
token = args.token
|
||||
heights = get_heights(filename)
|
||||
print(f"Heights: {heights}")
|
||||
# drop the fist, last, and 12th bar
|
||||
heights = heights[1:11] + heights[12:-1]
|
||||
decoded = spotify_bar_decode(heights)
|
||||
|
||||
@@ -1,78 +1,45 @@
|
||||
from skimage import io
|
||||
from skimage.color import rgb2gray
|
||||
from skimage.filters import threshold_otsu
|
||||
from skimage.measure import label, regionprops
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
from skimage import io
|
||||
from skimage.color import rgb2gray
|
||||
from skimage.filters import threshold_otsu, gaussian
|
||||
from skimage.morphology import closing, square
|
||||
from skimage.measure import label, regionprops
|
||||
import matplotlib.pyplot as plt
|
||||
from skimage.color import rgb2gray
|
||||
import matplotlib.patches as patches
|
||||
def get_heights(filename: str) -> list:
|
||||
"""Open an image and return a list of the bar heights.
|
||||
Also saves three images: the processed binary image, the original image with drawings, and a smoothed binary image.
|
||||
"""
|
||||
from sklearn.cluster import KMeans
|
||||
|
||||
def get_heights(filename: str):
|
||||
image = io.imread(filename)
|
||||
im = rgb2gray(image)
|
||||
binary_im = im > threshold_otsu(im)
|
||||
smooth_im = gaussian(binary_im.astype(float), sigma=1)
|
||||
morph_im = closing(smooth_im > 0.5, square(3))
|
||||
|
||||
# Apply Gaussian smoothing to binary image to make bars look rounder
|
||||
smooth_im = gaussian(binary_im, sigma=2)
|
||||
# Threshold again to obtain a smoothed binary
|
||||
smooth_binary = smooth_im > 0.5
|
||||
labeled = label(smooth_binary)
|
||||
bar_dimensions = [r.bbox for r in regionprops(labeled)]
|
||||
bar_dimensions.sort(key=lambda x: x[1]) # Sort left to right
|
||||
# The first object (spotify logo) is the max height of the bars
|
||||
logo = bar_dimensions[0]
|
||||
max_height = logo[2] - logo[0]
|
||||
sequence = []
|
||||
# Create figure and axis for drawing
|
||||
fig, ax = plt.subplots()
|
||||
ax.imshow(smooth_binary, cmap='gray')
|
||||
# Draw rectangle around logo
|
||||
logo_rect = patches.Rectangle(
|
||||
(logo[1], logo[0]),
|
||||
logo[3]-logo[1],
|
||||
logo[2]-logo[0],
|
||||
linewidth=2,
|
||||
edgecolor='yellow',
|
||||
facecolor='none'
|
||||
)
|
||||
ax.add_patch(logo_rect)
|
||||
# Add 'Logo' text near the rectangle
|
||||
ax.text(logo[1], logo[0] - 10, 'Logo', color='yellow', fontsize=12, weight='bold')
|
||||
# Draw bars and center markers
|
||||
for bar in bar_dimensions[1:]:
|
||||
height = bar[2] - bar[0]
|
||||
ratio = height / max_height
|
||||
ratio *= 8
|
||||
ratio //= 1
|
||||
val = int(ratio - 1)
|
||||
sequence.append(val)
|
||||
# Draw rectangle around bar
|
||||
rect = patches.Rectangle(
|
||||
(bar[1], bar[0]),
|
||||
bar[3]-bar[1],
|
||||
bar[2]-bar[0],
|
||||
linewidth=1,
|
||||
edgecolor='red',
|
||||
facecolor='none'
|
||||
)
|
||||
ax.add_patch(rect)
|
||||
# Draw center marker (white circle)
|
||||
center_x = (bar[1] + bar[3]) / 2
|
||||
center_y = (bar[0] + bar[2]) / 2
|
||||
center_marker = patches.Circle((center_x, center_y), radius=5, color='black')
|
||||
ax.add_patch(center_marker)
|
||||
# Save processed binary image used for calculations
|
||||
plt.imsave('processed_binary_image.png', binary_im, cmap='gray')
|
||||
# Save smoothed binary image
|
||||
plt.imsave('img_smooth.png', smooth_binary, cmap='gray')
|
||||
# Save image with drawings
|
||||
plt.axis('off')
|
||||
plt.savefig('image_with_drawings.png', bbox_inches='tight', pad_inches=0)
|
||||
plt.close()
|
||||
return sequence
|
||||
labeled = label(morph_im)
|
||||
bar_dims = [r.bbox for r in regionprops(labeled)]
|
||||
bar_dims.sort(key=lambda x: x[1]) # left to right
|
||||
|
||||
bars = bar_dims[1:] # skip logo
|
||||
bar_heights_raw = []
|
||||
|
||||
for bar in bars:
|
||||
top, left, bottom, right = bar
|
||||
effective_height = bottom - top # use bounding box height directly
|
||||
bar_heights_raw.append(effective_height)
|
||||
print(len(bars))
|
||||
# Cluster measured heights to 8 clusters representing discrete bar levels
|
||||
bar_heights_raw_np = np.array(bar_heights_raw).reshape(-1, 1)
|
||||
|
||||
kmeans = KMeans(n_clusters=8, random_state=0).fit(bar_heights_raw_np)
|
||||
cluster_centers = np.sort(kmeans.cluster_centers_.flatten())
|
||||
|
||||
# Assign each bar height to closest cluster center index (0 to 7)
|
||||
predicted_levels = []
|
||||
for h in bar_heights_raw:
|
||||
diffs = np.abs(cluster_centers - h)
|
||||
closest_cluster = np.argmin(diffs)
|
||||
predicted_levels.append(closest_cluster)
|
||||
|
||||
|
||||
print(len(predicted_levels))
|
||||
return predicted_levels
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 11 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 2.3 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 2.3 KiB |
Reference in New Issue
Block a user