Obtener datos de la ttwig con matplotlib

Estoy usando matplotlib en python para construir un diagtwig de dispersión.

Supongamos que tengo las siguientes 2 listas de datos.

X = [1,2,3,4,5]

Y = [6,7,8,9,10]

luego uso X como el valor del eje X e Y como el valor del eje Y para hacer un diagtwig de dispersión. Así que tendré una imagen con 5 puntos de dispersión, ¿verdad?

Ahora la pregunta: ¿es posible crear una conexión para estos 5 puntos con los datos reales? Por ejemplo, cuando hago clic en uno de estos 5 puntos, ¿puede decirme qué datos originales he usado para hacer este punto?

gracias por adelantado

Usando una versión ligeramente modificada del DataCursor de Joe Kington :

import matplotlib.pyplot as plt import matplotlib.mlab as mlab import matplotlib.cbook as cbook import numpy as np def fmt(x, y): return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x = x, y = y) class DataCursor(object): # https://stackoverflow.com/a/4674445/190597 """A simple data cursor widget that displays the x,y location of a matplotlib artist when it is selected.""" def __init__(self, artists, x = [], y = [], tolerance = 5, offsets = (-20, 20), formatter = fmt, display_all = False): """Create the data cursor and connect it to the relevant figure. "artists" is the matplotlib artist or sequence of artists that will be selected. "tolerance" is the radius (in points) that the mouse click must be within to select the artist. "offsets" is a tuple of (x,y) offsets in points from the selected point to the displayed annotation box "formatter" is a callback function which takes 2 numeric arguments and returns a string "display_all" controls whether more than one annotation box will be shown if there are multiple axes. Only one will be shown per-axis, regardless. """ self._points = np.column_stack((x,y)) self.formatter = formatter self.offsets = offsets self.display_all = display_all if not cbook.iterable(artists): artists = [artists] self.artists = artists self.axes = tuple(set(art.axes for art in self.artists)) self.figures = tuple(set(ax.figure for ax in self.axes)) self.annotations = {} for ax in self.axes: self.annotations[ax] = self.annotate(ax) for artist in self.artists: artist.set_picker(tolerance) for fig in self.figures: fig.canvas.mpl_connect('pick_event', self) def annotate(self, ax): """Draws and hides the annotation box for the given axis "ax".""" annotation = ax.annotate(self.formatter, xy = (0, 0), ha = 'right', xytext = self.offsets, textcoords = 'offset points', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0') ) annotation.set_visible(False) return annotation def snap(self, x, y): """Return the value in self._points closest to (x, y). """ idx = np.nanargmin(((self._points - (x,y))**2).sum(axis = -1)) return self._points[idx] def __call__(self, event): """Intended to be called through "mpl_connect".""" # Rather than trying to interpolate, just display the clicked coords # This will only be called if it's within "tolerance", anyway. x, y = event.mouseevent.xdata, event.mouseevent.ydata annotation = self.annotations[event.artist.axes] if x is not None: if not self.display_all: # Hide any other annotation boxes... for ann in self.annotations.values(): ann.set_visible(False) # Update the annotation in the current axis.. x, y = self.snap(x, y) annotation.xy = x, y annotation.set_text(self.formatter(x, y)) annotation.set_visible(True) event.canvas.draw() x=[1,2,3,4,5] y=[6,7,8,9,10] fig = plt.figure() ax = fig.add_subplot(1, 1, 1) scat = ax.scatter(x, y) DataCursor(scat, x, y) plt.show() 

rendimientos

introduzca la descripción de la imagen aquí

Puede hacer clic en cualquiera de los puntos y el globo mostrará los valores de datos subyacentes.


Mi pequeña modificación en el DataCursor fue agregar el método de snap , que garantiza que el punto de datos que se muestra proviene del conjunto de datos original, en lugar de la ubicación en la que realmente hizo clic el mouse.


Si tiene instalado Scipy, puede preferir esta versión del Cursor, que hace que el globo siga el mouse (sin hacer clic):

 import datetime as DT import matplotlib.pyplot as plt import matplotlib.dates as mdates import numpy as np import scipy.spatial as spatial def fmt(x, y, is_date): if is_date: x = mdates.num2date(x).strftime("%Y-%m-%d") return 'x: {x}\ny: {y}'.format(x=x, y=y) else: return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x=x, y=y) class FollowDotCursor(object): """Display the x,y location of the nearest data point.""" def __init__(self, ax, x, y, tolerance=5, formatter=fmt, offsets=(-20, 20)): try: x = np.asarray(x, dtype='float') self.is_date = False except (TypeError, ValueError): x = np.asarray(mdates.date2num(x), dtype='float') self.is_date = True y = np.asarray(y, dtype='float') self._points = np.column_stack((x, y)) self.offsets = offsets self.scale = x.ptp() self.scale = y.ptp() / self.scale if self.scale else 1 self.tree = spatial.cKDTree(self.scaled(self._points)) self.formatter = formatter self.tolerance = tolerance self.ax = ax self.fig = ax.figure self.ax.xaxis.set_label_position('top') self.dot = ax.scatter( [x.min()], [y.min()], s=130, color='green', alpha=0.7) self.annotation = self.setup_annotation() plt.connect('motion_notify_event', self) def scaled(self, points): points = np.asarray(points) return points * (self.scale, 1) def __call__(self, event): ax = self.ax # event.inaxes is always the current axis. If you use twinx, ax could be # a different axis. if event.inaxes == ax: x, y = event.xdata, event.ydata elif event.inaxes is None: return else: inv = ax.transData.inverted() x, y = inv.transform([(event.x, event.y)]).ravel() annotation = self.annotation x, y = self.snap(x, y) annotation.xy = x, y annotation.set_text(self.formatter(x, y, self.is_date)) self.dot.set_offsets((x, y)) bbox = ax.viewLim event.canvas.draw() def setup_annotation(self): """Draw and hide the annotation box.""" annotation = self.ax.annotate( '', xy=(0, 0), ha = 'right', xytext = self.offsets, textcoords = 'offset points', va = 'bottom', bbox = dict( boxstyle='round,pad=0.5', fc='yellow', alpha=0.75), arrowprops = dict( arrowstyle='->', connectionstyle='arc3,rad=0')) return annotation def snap(self, x, y): """Return the value in self.tree closest to x, y.""" dist, idx = self.tree.query(self.scaled((x, y)), k=1, p=1) try: return self._points[idx] except IndexError: # IndexError: index out of bounds return self._points[0] x = [DT.date.today()+DT.timedelta(days=i) for i in [10,20,30,40,50]] y = [6,7,8,9,10] fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(x, y) cursor = FollowDotCursor(ax, x, y) fig.autofmt_xdate() plt.show() 

introduzca la descripción de la imagen aquí

Puede hacer esto usando mpld3 ahora en unas pocas líneas:

https://mpld3.github.io/examples/html_tooltips.html