שימו לב: על מנת להריץ את התאים ב-Live Code, יש לייבא תחילה את ספרית pandas ע”י הרצת השורת הראשונה בתא למטה.
בנוסף, נגביל את מספר השורות והעמודות שתופענה בהדפסת הטבלאות ע”י שורות הקוד השניה והשלישית:
import pandas as pd
pd.options.display.max_rows=5
pd.options.display.max_columns=5
שאלות ממבחני עבר#
E.
def read_in_range(start_month,start_year,end_month,end_year):
cur_month=start_month
cur_year=start_year
dfs=[]
while cur_year < end_year or (cur_year == end_year and cur_month <= end_month):
dfs.append(pd.read_csv("files/orders_"+str(cur_month)+"_"+str(cur_year)+".csv")) # The "files/" prefix in the path is due to technical reasons, and should be ignored.
cur_month+=1
if cur_month>12:
cur_year+=1
cur_month=1
return pd.concat(dfs, ignore_index=True)
Read products and order tables
df_p=pd.read_csv("files/products2.csv") # The "files/" prefix in the path is due to technical reasons, and should be ignored.
df_o=read_in_range(2,2024,4,2024)
F.
display(df_o)
df_o.fillna(df_o.median(),inplace=True)
display(df_o)
| Espresso | Americano | Sandwich | Salad | |
|---|---|---|---|---|
| 0 | 0 | 1.0 | 0 | 1.0 |
| 1 | 1 | 1.0 | 0 | NaN |
| ... | ... | ... | ... | ... |
| 7 | 1 | 5.0 | 0 | 0.0 |
| 8 | 2 | 0.0 | 5 | 0.0 |
9 rows × 4 columns
| Espresso | Americano | Sandwich | Salad | |
|---|---|---|---|---|
| 0 | 0 | 1.0 | 0 | 1.0 |
| 1 | 1 | 1.0 | 0 | 0.5 |
| ... | ... | ... | ... | ... |
| 7 | 1 | 5.0 | 0 | 0.0 |
| 8 | 2 | 0.0 | 5 | 0.0 |
9 rows × 4 columns
G.
df_p.index=df_p['Name'] # First line is missing
df_join=pd.concat((df_p,df_o.T), axis=1)
df_join
| Name | Price | ... | 7 | 8 | |
|---|---|---|---|---|---|
| Sandwich | Sandwich | 30 | ... | 0.0 | 5.0 |
| Salad | Salad | 40 | ... | 0.0 | 0.0 |
| Espresso | Espresso | 8 | ... | 1.0 | 2.0 |
| Americano | Americano | 11 | ... | 5.0 | 0.0 |
4 rows × 13 columns
H.
profit=df_join.iloc[:,4:].sum(axis=1)*( df_join['Price']- df_join['Cost']) # "axis of sum should be 1"
profit
Sandwich 120.0
Salad 110.0
Espresso 42.0
Americano 75.0
dtype: float64
I.
df_p['Gain']=df_o.sum()*(df_p['Price']-df_p['Cost'])
df_p.groupby('Vendor')['Gain'].sum().idxmin()
'LIV'