df.head()
Condition Condition_Desc Price Location Model_Year Mileage Exterior_Color Make Warranty Model ... Vehicle_Title OBO Feedback_Perc Watch_Count N_Reviews Seller_Status Vehicle_Tile Auction Buy_Now Bid_Count
0 Used mint!!! very low miles 11412.0 McHenry, Illinois, United States 2013.0 16000.0 Black Harley-Davidson Unspecified Touring ... NaN FALSE 8.1 NaN 2427 Private Seller Clear True FALSE 28.0
1 Used Perfect condition 17200.0 Fort Recovery, Ohio, United States 2016.0 60.0 Black Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 17 657 Private Seller Clear True TRUE 0.0
2 Used NaN 3872.0 Chicago, Illinois, United States 1970.0 25763.0 Silver/Blue BMW Vehicle does NOT have an existing warranty R-Series ... NaN FALSE 100 NaN 136 NaN Clear True FALSE 26.0
3 Used CLEAN TITLE READY TO RIDE HOME 6575.0 Green Bay, Wisconsin, United States 2009.0 33142.0 Red Harley-Davidson NaN Touring ... NaN FALSE 100 NaN 2920 Dealer Clear True FALSE 11.0
4 Used NaN 10000.0 West Bend, Wisconsin, United States 2012.0 17800.0 Blue Harley-Davidson NO WARRANTY Touring ... NaN FALSE 100 13 271 OWNER Clear True TRUE 0.0
5 rows × 22 columns
x_bar = df['Price'].mean()
x_std = df['Price'].std()
df['Price'] > x_bar + 2.5*x_std#异常值:均值+2.5倍的标准差
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 False
16 False
17 False
18 False
19 False
20 False
21 False
22 False
23 False
24 False
25 False
26 True
27 False
28 False
29 False
30 False
31 False
32 False
33 False
34 False
35 False
36 False
37 False
38 False
39 False
40 False
41 False
42 False
43 False
44 False
45 False
46 False
47 False
48 False
49 False
...
7443 False
7444 False
7445 False
7446 False
7447 False
7448 False
7449 False
7450 False
7451 False
7452 False
7453 False
7454 False
7455 False
7456 False
7457 False
7458 False
7459 False
7460 False
7461 False
7462 False
7463 False
7464 False
7465 False
7466 False
7467 False
7468 False
7469 False
7470 False
7471 False
7472 False
7473 False
7474 False
7475 False
7476 False
7477 False
7478 False
7479 False
7480 False
7481 False
7482 False
7483 False
7484 False
7485 False
7486 False
7487 False
7488 False
7489 False
7490 False
7491 False
7492 False
Name: Price, Length: 7493, dtype: bool
any(df['Price']>x_bar + 2.5*x_std)
True
any(df['Price']<x_bar-2.5*x_std)
False
df[df['Price'] > x_bar + 2.5*x_std]
Condition Condition_Desc Price Location Model_Year Mileage Exterior_Color Make Warranty Model ... Vehicle_Title OBO Feedback_Perc Watch_Count N_Reviews Seller_Status Vehicle_Tile Auction Buy_Now Bid_Count
26 Used NaN 45900.0 South Haven, Michigan, United States 2009.0 40000.0 Red Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN FALSE 100 25 199 Private Seller Clear False TRUE NaN
62 Used NaN 34900.0 Valparaiso, Indiana, United States 2014.0 12649.0 Purple Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN FALSE 100 NaN 238 Private Seller Clear False TRUE NaN
94 New NaN 36100.0 Miami, Florida, United States 2017.0 949.0 Blue black silver Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 6.1 NaN 157 Private Seller Clear True FALSE 21.0
122 Used NaN 47720.0 Gastonia, North Carolina, United States 2016.0 3355.0 SILVER Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 11 1016 Dealer Clear False TRUE NaN
129 Used NaN 51552.0 Gastonia, North Carolina, United States 2017.0 3335.0 ORANGE Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 21 1016 Dealer Clear False TRUE NaN
143 Used NaN 49441.0 Gastonia, North Carolina, United States 2017.0 770.0 SILVER Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 19 1016 Dealer Clear False TRUE NaN
157 Used NaN 49441.0 Gastonia, North Carolina, United States 2017.0 770.0 SILVER Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 19 1016 Dealer Clear False TRUE NaN
167 Used NaN 31999.0 West Bloomfield, Michigan, United States 2016.0 300.0 Silver Harley-Davidson Vehicle has an existing warranty Touring ... NaN TRUE 100 1< 14 Private Seller Clear False TRUE NaN
191 Used NaN 60000.0 Alexandria, Louisiana, United States 2004.0 971.0 Green Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN TRUE 100 11 290 Dealer Clear False TRUE NaN
193 Used NaN 32888.0 Gastonia, North Carolina, United States 2017.0 151.0 SATIN BLACK & RED Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 18 1016 Dealer Clear False TRUE NaN
215 Used NaN 36908.0 Gastonia, North Carolina, United States 2014.0 8180.0 BLACK & RED Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 13 1016 Dealer Clear False TRUE NaN
221 New NaN 33000.0 Sacramento, California, United States 2017.0 101.0 Orange Harley-Davidson Vehicle has an existing warranty Touring ... NaN TRUE 100 4< 75 Private Seller Clear False TRUE NaN
222 Used Brand new conversion on a 2016 harley davidson... 48000.0 Gilbert, Arizona, United States 2016.0 1100.0 Silver Harley-Davidson NaN Touring ... NaN FALSE 100 18 13 NaN Clear False TRUE NaN
224 Used NaN 31991.0 Gastonia, North Carolina, United States 2015.0 3555.0 BLACK & ORANGE Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 11 1016 Dealer Clear False TRUE NaN
226 Used NaN 47720.0 Gastonia, North Carolina, United States 2016.0 3355.0 SILVER Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 8< 1016 Dealer Clear False TRUE NaN
235 Used NaN 41444.0 Gastonia, North Carolina, United States 2016.0 3998.0 SATIN BLACK & ORANGE Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 5< 1016 Dealer Clear False TRUE NaN
267 Used NaN 33857.0 Gastonia, North Carolina, United States 2017.0 2280.0 WHITE Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 5< 1016 Dealer Clear False TRUE NaN
272 Used NaN 51000.0 Fontana, California, United States 2016.0 3500.0 Silver Harley-Davidson NaN Touring ... NaN FALSE NaN 4< 3 Private Seller Clear True TRUE 0.0
294 New NaN 48900.0 Lima, Ohio, United States 2017.0 0.0 Red Honda Vehicle has an existing warranty Gold Wing ... NaN TRUE 100 7< 1645 Dealer Clear False TRUE NaN
295 Used ALL USED ITEMS, offered at BUY IT NOW PRICES! ... 100000.0 Lathrop, California, United States 2000.0 111111.0 all Harley-Davidson Vehicle does NOT have an existing warranty Other ... NaN FALSE 9.8 10 3280 Dealer Clear False TRUE NaN
296 Used NaN 70000.0 Gilbert, Arizona, United States 2010.0 1863.0 Black Harley-Davidson NaN Other ... NaN TRUE NaN 8< 7 Private Seller Clear False TRUE NaN
298 Used NaN 34000.0 Riverside, California, United States 2013.0 7530.0 Yellow Harley-Davidson NaN Touring ... NaN FALSE 100 NaN 37 Private Seller Clear True FALSE 0.0
300 Used NaN 34000.0 Riverside, California, United States 2013.0 7530.0 Yellow Harley-Davidson NaN Touring ... NaN FALSE 100 NaN 37 Private Seller Clear True FALSE 0.0
308 Used NaN 33857.0 Gastonia, North Carolina, United States 2017.0 2280.0 WHITE Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 7< 1016 Dealer Clear False TRUE NaN
313 Used NaN 39919.0 Gastonia, North Carolina, United States 2017.0 1770.0 TITANIUM SILVER Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 12 1016 Dealer Clear False TRUE NaN
324 Used NaN 32333.0 Gastonia, North Carolina, United States 2016.0 1795.0 GREY & RED Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 11 1016 Dealer Clear False TRUE NaN
344 Used 5,700 miles 65000.0 Bonney Lake, Washington, United States 2015.0 5200.0 Green Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN TRUE 100 3< 266 Private Seller Clear False TRUE NaN
359 Used 5,700 miles 65000.0 Bonney Lake, Washington, United States 2015.0 5200.0 Green Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN TRUE 100 3< 266 Private Seller Clear False TRUE NaN
371 Used NaN 31500.0 Lee, New Hampshire, United States 2016.0 554.0 NaN Harley-Davidson Vehicle has an existing warranty Touring ... NaN TRUE 100 2< 352 Dealer Clear False TRUE NaN
382 New NaN 48900.0 Lima, Ohio, United States 2017.0 0.0 Red Honda Vehicle has an existing warranty Gold Wing ... NaN TRUE 100 7< 1645 Dealer Clear False TRUE NaN
399 Used NaN 36992.0 Gastonia, North Carolina, United States 2016.0 4140.0 SILVER Harley-Davidson Vehicle has an existing warranty Touring ... NaN FALSE 100 6< 1016 Dealer Clear False TRUE NaN
404 Used MINT CONDITION 49900.0 Timmonsville, South Carolina, United States 2013.0 6500.0 Green Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN TRUE 100 21 907 Private Seller Clear False TRUE NaN
414 Used NaN 36499.0 Brentwood, Tennessee, United States 2016.0 2200.0 Red Harley-Davidson Vehicle has an existing warranty Touring ... NaN TRUE 100 6< 222 NaN Clear False TRUE NaN
528 Used NaN 32500.0 Eagle River, Wisconsin, United States 2016.0 8000.0 NaN Harley-Davidson Vehicle has an existing warranty Touring ... NaN TRUE 100 2< 1553 Private Seller Clear False TRUE NaN
568 Used NaN 32500.0 Eagle River, Wisconsin, United States 2016.0 8000.0 NaN Harley-Davidson Vehicle has an existing warranty Touring ... NaN TRUE 100 2< 1553 Private Seller Clear False TRUE NaN
630 Used only 4554 miles on bike with new independent s... 35000.0 Gilbert, Arizona, United States 2015.0 4554.0 NaN Harley-Davidson NaN Touring ... NaN FALSE 100 6< 13 NaN Clear False TRUE NaN
632 Used NaN 45000.0 Wichita Falls, Texas, United States 2013.0 2500.0 Black Harley-Davidson NaN Touring ... NaN TRUE 100 1< 42 Private Seller Clear False TRUE NaN
649 Used NaN 32989.0 Piedmont, South Carolina, United States 2014.0 4359.0 TRIBAL ORANGE Harley-Davidson Unspecified Touring ... NaN TRUE 100 9< 174 Private Seller Clear False TRUE NaN
860 Used bike has 6027 miles and the conversion is new 32500.0 Gilbert, Arizona, United States 2010.0 6027.0 NaN Harley-Davidson NaN Touring ... NaN FALSE 100 5< 13 NaN Clear False TRUE NaN
985 Used SUPER CLEAN LIKE NEW CALIFORNIA SIDE CAR TRIKE... 34900.0 Lima, Ohio, United States 2012.0 7101.0 BLUE / SILVER Honda Vehicle has an existing warranty Gold Wing ... NaN TRUE 100 1< 1645 Dealer Clear False TRUE NaN
1092 Used bike has 16954 miles. Q-TEC conversion is new 37000.0 Gilbert, Arizona, United States 2014.0 16954.0 NaN Harley-Davidson NaN Touring ... NaN FALSE 100 3< 13 NaN Clear False TRUE NaN
1179 Used bike has 16954 miles. Q-TEC conversion is new 37000.0 Gilbert, Arizona, United States 2014.0 16954.0 NaN Harley-Davidson NaN Touring ... NaN FALSE 100 3< 13 NaN Clear False TRUE NaN
1217 Used NaN 39995.0 Tempe, Arizona, United States 2014.0 1.0 Silver Harley-Davidson NaN Road King? ... TRUE >90 12 8 Dealer Clear FALSE True NaN NaN
1235 New NaN 43900.0 Lima, Ohio, United States 2017.0 0.0 BLUE / BLACK Honda Vehicle has an existing warranty Gold Wing ... NaN TRUE 100 1< 1645 Dealer Clear False TRUE NaN
1236 Used NaN 39000.0 Toms River, New Jersey, United States 2012.0 6000.0 Silver Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN FALSE 100 NaN 108 Private Seller Clear True FALSE 0.0
1237 Used NaN 32000.0 Parker, Colorado, United States 2012.0 2850.0 Orange Harley-Davidson NaN Touring ... NaN TRUE 100 4< 1599 Private Seller Clear False TRUE NaN
1262 New NaN 43900.0 Lima, Ohio, United States 2017.0 0.0 BLUE / BLACK Honda Vehicle has an existing warranty Gold Wing ... NaN TRUE 100 1< 1645 Dealer Clear False TRUE NaN
1311 Used NaN 39995.0 Tempe, Arizona, United States 2014.0 1.0 Silver Harley-Davidson NaN Road King? ... TRUE >90 12 8 Dealer Clear FALSE True NaN NaN
1487 Used NaN 42500.0 Lubbock, Texas, United States 2014.0 5200.0 Black Harley-Davidson Vehicle does NOT have an existing warranty Softail ... NaN FALSE 100 NaN 23 Private Seller Clear True TRUE 0.0
1550 Used NaN 42500.0 Lubbock, Texas, United States 2014.0 5200.0 Black Harley-Davidson Vehicle does NOT have an existing warranty Softail ... NaN FALSE 100 NaN 23 Private Seller Clear True TRUE 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4858 Used NaN 57000.0 West Bloomfield, Michigan, United States 2016.0 1706.0 White Campagna NaN T-Rex16S-P ... NaN TRUE 100 NaN 14 Private Seller Clear False TRUE NaN
4931 Used NaN 34900.0 Lake Park, Georgia, United States 2001.0 2500.0 Red Boss Hoss Vehicle does NOT have an existing warranty 434 stroker ... NaN FALSE 100 NaN 38 Private Seller Clear True FALSE 4.0
5058 Used NaN 32450.0 Dallas, Texas, United States 2016.0 1515.0 Charcoal Slate and Carbon Dust Harley-Davidson Unspecified FLTRUSE - Screamin Eagle Road Glide Ultra CVO ... NaN TRUE 100 6< 532 Dealer Clear False TRUE NaN
5104 Used No defects, is in immaculate condition 70000.0 Whyteleafe, United Kingdom 2006.0 100.0 NaN MV Agusta NaN MV Agusta F4 1000 Veltro Strada ... NaN TRUE 100 4< 228 Private Seller Clear False TRUE NaN
5144 Used NaN 37000.0 Brandon, Wisconsin, United States 1990.0 2220.0 NaN Custom Built Motorcycles NaN Pro Street ... NaN TRUE NaN NaN 0 NaN Clear False TRUE NaN
5157 Used Spotless custom built show bike, best of every... 34900.0 Pittsburgh, Pennsylvania, United States 2006.0 1200.0 Orange Custom Built Motorcycles Vehicle does NOT have an existing warranty Chopper ... NaN FALSE NaN NaN 0 Private Seller Clear False TRUE NaN
5182 New NaN 59997.0 Houston, Texas, United States 2014.0 0.0 Red Ducati Unspecified 1199 Superleggera ... NaN TRUE 100 13 246 Dealer Clear False TRUE NaN
5184 New NaN 65000.0 Marietta, Georgia, United States 2010.0 12.0 Gray Custom Built Motorcycles Vehicle does NOT have an existing warranty Chopper ... NaN FALSE NaN NaN 0 Private Seller Clear False TRUE NaN
5207 Used NaN 33977.0 Dallas, Texas, United States 2016.0 1.0 Black Big Dog Unspecified K-9 124 Level 3 Graphics BIG DOG IS BACK ... NaN TRUE 100 NaN 532 Dealer Clear False TRUE NaN
5210 Used NaN 31488.0 Dallas, Texas, United States 2016.0 208.0 Gray Big Dog Unspecified Mastiff Limited Edition ... NaN TRUE 100 1< 532 Dealer Clear False TRUE NaN
5231 Used No expense spared in this beautiful museum gra... 34000.0 Miami, Florida, United States 1968.0 4708.0 NaN Norton NaN NaN ... NaN TRUE >97 27 115 NaN Clear False TRUE NaN
5240 Used NaN 67447.0 Pompano Beach, Florida, United States 2017.0 0.0 Blue Campagna Unspecified T-Rex ... NaN TRUE 100 2< 1765 Dealer Clear False TRUE NaN
5265 Used NaN 61447.0 Pompano Beach, Florida, United States 2017.0 22.0 Gray Campagna Unspecified T-Rex ... NaN TRUE 100 1< 1765 Dealer Clear False TRUE NaN
5267 Used NaN 63446.0 Pompano Beach, Florida, United States 2017.0 0.0 Orange Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False TRUE NaN
5268 New NaN 50000.0 Saint-Jean-Chrysostome, Quebec, Canada NaN NaN NaN NaN NaN NaN ... NaN TRUE NaN 13 2 NaN NaN False TRUE NaN
5270 Used NaN 67447.0 Pompano Beach, Florida, United States 2017.0 21.0 Red Campagna Unspecified T-Rex ... NaN TRUE 100 2< 1765 Dealer Clear False TRUE NaN
5273 Used NaN 61447.0 Pompano Beach, Florida, United States 2017.0 22.0 Blue Campagna Unspecified T-Rex ... NaN TRUE 100 1< 1765 Dealer Clear False TRUE NaN
5274 Used NaN 61447.0 Pompano Beach, Florida, United States 2017.0 0.0 White Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False TRUE NaN
5286 Used NaN 69446.0 Pompano Beach, Florida, United States 2017.0 22.0 Orange Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False TRUE NaN
5287 Used NaN 61447.0 Pompano Beach, Florida, United States 2017.0 22.0 Red Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False TRUE NaN
5388 Used NaN 32677.0 Scott City, Missouri, United States 2016.0 4476.0 Red Harley-Davidson Vehicle has an existing warranty Cvo Limited ... NaN FALSE 100 NaN 4 Dealer Clear False FALSE NaN
5482 New NaN 52000.0 Indianapolis, Indiana, United States 2016.0 1.0 Gray Kawasaki NaN Ninja ... NaN TRUE >80 NaN 375 Dealer Clear False FALSE NaN
5484 Used NaN 49988.0 Mooresville, North Carolina, United States 2016.0 3142.0 Blue Harley-Davidson Unspecified STREET GLIDE SPECIAL (FLHXS) ... NaN TRUE 100 NaN 824 Dealer Clear False FALSE NaN
5601 New NaN 31999.0 Indianapolis, Indiana, United States 2017.0 1.0 Blue Sapphire & Brilliant Blue Indian NaN Roadmaster? ... TRUE >80 NaN 375 Dealer Clear FALSE False NaN NaN
5672 Used NaN 46988.0 Mooresville, North Carolina, United States 2014.0 2707.0 Orange Harley-Davidson Unspecified Street Glide Special FLHX ... NaN TRUE 100 NaN 824 Dealer Clear False FALSE NaN
5729 Used NaN 35900.0 Bear, Delaware, United States 2008.0 9500.0 Red Harley-Davidson NaN Touring ... NaN FALSE 100 15 46 Private Seller Rebuilt, Rebuildable & Reconstructed True TRUE 0.0
5798 Used NaN 69446.0 Pompano Beach, Florida, United States 2017.0 22.0 Orange Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False FALSE NaN
5824 Used NaN 61447.0 Pompano Beach, Florida, United States 2017.0 22.0 Blue Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False FALSE NaN
5934 Used NaN 60000.0 Red Deer, Alberta, Canada 2002.0 8266.0 Black Harley-Davidson Vehicle does NOT have an existing warranty Road Glide ... NaN TRUE 100 2< 3 Private Seller NaN False TRUE NaN
6232 Used NaN 51995.0 Charlotte, North Carolina, United States 2012.0 6685.0 NaN Harley-Davidson Vehicle has an existing warranty Other ... NaN TRUE 8.2 NaN 80 Dealer Clear False FALSE NaN
6345 Used Mike Hailwood was one of motorcycle racings al... 42900.0 Saint Louis, Missouri, United States 1980.0 12083.0 Red/Green Ducati Vehicle does NOT have an existing warranty Other ... NaN TRUE 8.6 NaN 2251 Dealer Clear False FALSE NaN
6394 Used NaN 38995.0 Omaha, Nebraska, United States 1958.0 61.0 Red Harley-Davidson Vehicle does NOT have an existing warranty Touring ... NaN TRUE 3.4 NaN 738 Dealer Clear False FALSE NaN
6483 Used NaN 37000.0 McKinney, Texas, United States 2014.0 15698.0 Black *Harley-Davidson* Martin Bros Unspecified Custom Road Glide ... NaN TRUE >50 NaN 397 Dealer Clear False FALSE NaN
6588 Used NaN 41500.0 Saint Louis, Missouri, United States 1981.0 1.0 Red Ducati Unspecified Other ... NaN TRUE 8.6 NaN 2251 Dealer Clear False FALSE NaN
6806 Used NaN 41500.0 Saint Louis, Missouri, United States 1980.0 12083.0 Green Ducati Unspecified Other ... NaN TRUE 8.6 NaN 2251 Dealer Clear False FALSE NaN
6917 Used NaN 35999.0 Delbarton, West Virginia, United States 2016.0 14327.0 Blue Harley-Davidson Unspecified FLHTKSE - CVO?Limited ... NaN TRUE 100 NaN 28 Dealer Clear False FALSE NaN
6924 Used NaN 33588.0 Scott City, Missouri, United States 2016.0 3960.0 Red Harley-Davidson Vehicle has an existing warranty Cvo Road Glide Ultra ... NaN FALSE 100 NaN 4 Dealer Clear False FALSE NaN
6970 Used NaN 38999.0 Elmhurst, Illinois, United States 2015.0 6970.0 Black Boss Hoss Unspecified BHC-9 Coupe Trike ... NaN TRUE 100 NaN 152 Dealer Clear False FALSE NaN
6988 Used NaN 35450.0 Dallas, Texas, United States 2017.0 2864.0 Black Harley-Davidson Unspecified FLHXSE - Screamin Eagle Street Glide CVO ... NaN TRUE 100 NaN 532 Dealer Clear False FALSE NaN
6999 Used NaN 37500.0 Poland, Maine, United States 2017.0 2571.0 Silver/Black Harley-Davidson NaN Tri Glide ... NaN TRUE 6.4 NaN 992 Private Seller Clear False FALSE NaN
7001 Used NaN 37500.0 Poland, Maine, United States 2017.0 2571.0 Silver/Black Harley-Davidson NaN Tri Glide ... NaN TRUE 6.4 NaN 992 Private Seller Clear False FALSE NaN
7020 New NaN 54900.0 Glen Ellyn, Illinois, United States 2016.0 1.0 Yellow Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 573 Dealer Clear False FALSE NaN
7045 New NaN 52897.0 Glen Ellyn, Illinois, United States 2016.0 0.0 Red Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 573 Dealer Clear False FALSE NaN
7050 New NaN 52897.0 Glen Ellyn, Illinois, United States 2016.0 0.0 Red Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 573 Dealer Clear False FALSE NaN
7064 New NaN 54897.0 Glen Ellyn, Illinois, United States 2016.0 1.0 Gray Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 573 Dealer Clear False FALSE NaN
7138 Used NaN 42500.0 Phoenix, Arizona, United States 1999.0 700.0 Blue ADVANTAGE Vehicle does NOT have an existing warranty 32 Victory ... NaN TRUE 100 NaN 1452 Dealer Clear False FALSE NaN
7221 Used NaN 67447.0 Pompano Beach, Florida, United States 2017.0 0.0 Black Campagna Unspecified T-Rex ... NaN TRUE 100 NaN 1765 Dealer Clear False FALSE NaN
7249 Used NaN 34900.0 Greensboro, North Carolina, United States 2017.0 2341.0 Black Harley-Davidson Unspecified FLHXSE - CVO?Street Glide? ... TRUE NaN NaN 0 Dealer Clear FALSE False NaN NaN
7337 Used NaN 32999.0 Greensboro, North Carolina, United States 2016.0 10.0 Black Indian Unspecified Chief?Vintage Jack Daniel's?Limited Edition ... NaN TRUE 100 NaN 312 Dealer Clear False FALSE NaN
7340 Used NaN 32877.0 Scott City, Missouri, United States 2016.0 5100.0 Black Harley-Davidson Vehicle has an existing warranty FLHXSE - CVO Street Glide ... NaN FALSE 100 NaN 4 Dealer Clear False FALSE NaN
160 rows × 22 columns
df['Price'].describe()
count 7493.000000
mean 9968.811557
std 8497.326850
min 0.000000
25% 4158.000000
50% 7995.000000
75% 13000.000000
max 100000.000000
Name: Price, dtype: float64
Q1 = df['Price'].quantile(q = 0.25)#四分之一分位数
Q3 = df['Price'].quantile(q = 0.75)#四分之三分位数
IQR = Q3 - Q1#分位差
any(df['Price'] > Q3 + 1.5*IQR)#上离群点
True
any(df['Price'] < Q1 - 1.5*IQR)#下离群点
False
import matplotlib.pyplot as plt
%matplotlib inline
#确保图能在jupyter notebook里面运行出来
df['Price'].plot(kind = 'box')#箱图(盒图)
<matplotlib.axes._subplots.AxesSubplot at 0x1f698c8f198>
#分布图
plt.style.use('seaborn')#使用seaborn绘图风格,好看
df.Price.plot(kind = 'hist', bins = 30, density = True)#bins是柱状个数;density是否绘制成概率密度形式
df.Price.plot(kind = 'kde')#核密度图
plt.show()
P99 = df['Price'].quantile(q = 0.99)
P1 = df['Price'].quantile(q = 0.01)
df['Price_new'] = df['Price']
df.loc[df['Price_new'] > P99, 'Price_new'] = P99# 盖帽法
df.loc[df['Price'] < P1, 'Price_new'] = P1
df[['Price', 'Price_new']].describe()
Price Price_new
count 7493.000000 7493.000000
mean 9968.811557 9821.220873
std 8497.326850 7737.092537
min 0.000000 100.000000
25% 4158.000000 4158.000000
50% 7995.000000 7995.000000
75% 13000.000000 13000.000000
max 100000.000000 39995.320000
df['Price_new'].plot(kind = 'box')
<matplotlib.axes._subplots.AxesSubplot at 0x1f69acf1eb8>
等宽分箱
df['Price_bin'] = pd.cut(df['Price_new'], bins = 5, labels = range(5))
df['Price_bin']
0 1
1 2
2 0
3 0
4 1
5 0
6 3
7 0
8 0
9 0
10 0
11 1
12 0
13 0
14 0
15 1
16 1
17 1
18 2
19 1
20 1
21 1
22 0
23 0
24 3
25 1
26 4
27 1
28 1
29 2
30 3
31 2
32 1
33 1
34 2
35 0
36 0
37 1
38 0
39 2
40 0
41 0
42 0
43 1
44 1
45 0
46 0
47 2
48 1
49 0
..
7443 0
7444 0
7445 0
7446 0
7447 2
7448 0
7449 0
7450 0
7451 0
7452 0
7453 1
7454 0
7455 0
7456 0
7457 1
7458 1
7459 0
7460 0
7461 0
7462 0
7463 0
7464 0
7465 0
7466 0
7467 0
7468 1
7469 1
7470 0
7471 0
7472 0
7473 0
7474 0
7475 0
7476 1
7477 0
7478 1
7479 0
7480 0
7481 1
7482 0
7483 0
7484 0
7485 0
7486 1
7487 0
7488 0
7489 1
7490 0
7491 0
7492 1
Name: Price_bin, Length: 7493, dtype: category
Categories (5, int64): [0 < 1 < 2 < 3 < 4]
df['Price_bin'].value_counts()
0 3873
1 2331
2 866
3 277
4 146
Name: Price_bin, dtype: int64
df['Price_bin'].value_counts().plot(kind = 'bar')
<matplotlib.axes._subplots.AxesSubplot at 0x1f697d6b710>
df['Price_bin'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1f69adb5a58>
#自己定义分段标准
w = [100, 1000, 5000, 10000, 20000, 100000]
df['Price_bin'] = pd.cut(df['Price_new'], bins = w, labels = range(5))
df[['Price_new','Price_bin']]
Price_new Price_bin
0 11412.00 3
1 17200.00 3
2 3872.00 1
3 6575.00 2
4 10000.00 2
5 1500.00 1
6 24900.00 4
7 1400.00 1
8 5100.00 2
9 8000.00 2
10 2125.00 1
11 11100.00 3
12 1125.00 1
13 3550.00 1
14 5500.00 2
15 9000.00 2
16 8100.00 2
17 14000.00 3
18 20000.00 3
19 13000.00 3
20 10900.00 3
21 9999.00 2
22 5700.00 2
23 7400.00 2
24 26500.00 4
25 9850.00 2
26 39995.32 4
27 10600.00 3
28 12000.00 3
29 20000.00 3
30 25000.00 4
31 20000.00 3
32 11000.00 3
33 8500.00 2
34 18000.00 3
35 7700.00 2
36 2750.00 1
37 10895.00 3
38 6700.00 2
39 18000.00 3
40 7300.00 2
41 6800.00 2
42 5000.00 1
43 13200.00 3
44 11400.00 3
45 3250.00 1
46 8000.00 2
47 17800.00 3
48 8500.00 2
49 4050.00 1
... ... ...
7443 7900.00 2
7444 5900.00 2
7445 3900.00 1
7446 7900.00 2
7447 23499.00 4
7448 6500.00 2
7449 6500.00 2
7450 3900.00 1
7451 5200.00 2
7452 3900.00 1
7453 8300.00 2
7454 7400.00 2
7455 4700.00 1
7456 4900.00 1
7457 8500.00 2
7458 9200.00 2
7459 3900.00 1
7460 7500.00 2
7461 4200.00 1
7462 6500.00 2
7463 7800.00 2
7464 4400.00 1
7465 7900.00 2
7466 6500.00 2
7467 7400.00 2
7468 8800.00 2
7469 13570.00 3
7470 7900.00 2
7471 6200.00 2
7472 7500.00 2
7473 5500.00 2
7474 3400.00 1
7475 3900.00 1
7476 8500.00 2
7477 4900.00 1
7478 9900.00 2
7479 2900.00 1
7480 6500.00 2
7481 10500.00 3
7482 7200.00 2
7483 7500.00 2
7484 5000.00 1
7485 5000.00 1
7486 8900.00 2
7487 4900.00 1
7488 3900.00 1
7489 8900.00 2
7490 7800.00 2
7491 7900.00 2
7492 12970.00 3
7493 rows × 2 columns
df['Price_bin'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1f699bfaac8>
df['Price_bin'] = pd.cut(df['Price_new'], 5)
df['Price_bin']
0 (8079.064, 16058.128]
1 (16058.128, 24037.192]
2 (60.105, 8079.064]
3 (60.105, 8079.064]
4 (8079.064, 16058.128]
5 (60.105, 8079.064]
6 (24037.192, 32016.256]
7 (60.105, 8079.064]
8 (60.105, 8079.064]
9 (60.105, 8079.064]
10 (60.105, 8079.064]
11 (8079.064, 16058.128]
12 (60.105, 8079.064]
13 (60.105, 8079.064]
14 (60.105, 8079.064]
15 (8079.064, 16058.128]
16 (8079.064, 16058.128]
17 (8079.064, 16058.128]
18 (16058.128, 24037.192]
19 (8079.064, 16058.128]
20 (8079.064, 16058.128]
21 (8079.064, 16058.128]
22 (60.105, 8079.064]
23 (60.105, 8079.064]
24 (24037.192, 32016.256]
25 (8079.064, 16058.128]
26 (32016.256, 39995.32]
27 (8079.064, 16058.128]
28 (8079.064, 16058.128]
29 (16058.128, 24037.192]
30 (24037.192, 32016.256]
31 (16058.128, 24037.192]
32 (8079.064, 16058.128]
33 (8079.064, 16058.128]
34 (16058.128, 24037.192]
35 (60.105, 8079.064]
36 (60.105, 8079.064]
37 (8079.064, 16058.128]
38 (60.105, 8079.064]
39 (16058.128, 24037.192]
40 (60.105, 8079.064]
41 (60.105, 8079.064]
42 (60.105, 8079.064]
43 (8079.064, 16058.128]
44 (8079.064, 16058.128]
45 (60.105, 8079.064]
46 (60.105, 8079.064]
47 (16058.128, 24037.192]
48 (8079.064, 16058.128]
49 (60.105, 8079.064]
...
7443 (60.105, 8079.064]
7444 (60.105, 8079.064]
7445 (60.105, 8079.064]
7446 (60.105, 8079.064]
7447 (16058.128, 24037.192]
7448 (60.105, 8079.064]
7449 (60.105, 8079.064]
7450 (60.105, 8079.064]
7451 (60.105, 8079.064]
7452 (60.105, 8079.064]
7453 (8079.064, 16058.128]
7454 (60.105, 8079.064]
7455 (60.105, 8079.064]
7456 (60.105, 8079.064]
7457 (8079.064, 16058.128]
7458 (8079.064, 16058.128]
7459 (60.105, 8079.064]
7460 (60.105, 8079.064]
7461 (60.105, 8079.064]
7462 (60.105, 8079.064]
7463 (60.105, 8079.064]
7464 (60.105, 8079.064]
7465 (60.105, 8079.064]
7466 (60.105, 8079.064]
7467 (60.105, 8079.064]
7468 (8079.064, 16058.128]
7469 (8079.064, 16058.128]
7470 (60.105, 8079.064]
7471 (60.105, 8079.064]
7472 (60.105, 8079.064]
7473 (60.105, 8079.064]
7474 (60.105, 8079.064]
7475 (60.105, 8079.064]
7476 (8079.064, 16058.128]
7477 (60.105, 8079.064]
7478 (8079.064, 16058.128]
7479 (60.105, 8079.064]
7480 (60.105, 8079.064]
7481 (8079.064, 16058.128]
7482 (60.105, 8079.064]
7483 (60.105, 8079.064]
7484 (60.105, 8079.064]
7485 (60.105, 8079.064]
7486 (8079.064, 16058.128]
7487 (60.105, 8079.064]
7488 (60.105, 8079.064]
7489 (8079.064, 16058.128]
7490 (60.105, 8079.064]
7491 (60.105, 8079.064]
7492 (8079.064, 16058.128]
Name: Price_bin, Length: 7493, dtype: category
Categories (5, interval[float64]): [(60.105, 8079.064] < (8079.064, 16058.128] < (16058.128, 24037.192] < (24037.192, 32016.256] < (32016.256, 39995.32]]
df.Price_bin.value_counts()
(60.105, 8079.064] 3873
(8079.064, 16058.128] 2331
(16058.128, 24037.192] 866
(24037.192, 32016.256] 277
(32016.256, 39995.32] 146
Name: Price_bin, dtype: int64
#等频分箱
k = 5
w = [i/k for i in range(k+1)]
w
[0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
df['Price_bin'] = pd.qcut(df['Price_new'], q = w, labels = range(5))
df['Price_bin'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1f699b6a6a0>
#找分位点
k = 5
w1 = df['Price_new'].quantile([i/k for i in range(k+1)])
w1
0.0 100.00
0.2 3500.00
0.4 6491.00
0.6 9777.00
0.8 14999.00
1.0 39995.32
Name: Price_new, dtype: float64
#让最小值再小一点,最大值再大一点
w1[0] = w1[0] * 0.95
w1[1.0] = w1[1.0] * 1.1
w1
0.0 95.000
0.2 3500.000
0.4 6491.000
0.6 9777.000
0.8 14999.000
1.0 43994.852
Name: Price_new, dtype: float64
#按照这个分段标准用等宽分箱来分,分出来的一定是等频
df['Price_bin'] = pd.cut(df['Price_new'], bins = w1, labels = range(5))
df['Price_bin'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1f699c2ee80>
欢迎阅读数据清洗系列文章:python数据清洗工具、方法、过程整理归纳