cs122d-2022-spring: CS122D-Lecture9-10-Mongo-Notebook.ipynb

File CS122D-Lecture9-10-Mongo-Notebook.ipynb, 48.3 KB (added by Mike Carey, 4 years ago)

Jupyter notebook for lectures 9-10

Line 
1{
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {},
7 "outputs": [],
8 "source": [
9 "#------------------------------------------------------------------------------------------------\n",
10 "# CS122D query notebook 1 -- MongoDB find() and aggregate() queries and their SQL equivalents\n",
11 "#------------------------------------------------------------------------------------------------"
12 ]
13 },
14 {
15 "cell_type": "code",
16 "execution_count": 2,
17 "metadata": {},
18 "outputs": [],
19 "source": [
20 "#-----------------------------------------------------------------------------------\n",
21 "# set up the environment\n",
22 "#-----------------------------------------------------------------------------------"
23 ]
24 },
25 {
26 "cell_type": "code",
27 "execution_count": 3,
28 "metadata": {},
29 "outputs": [],
30 "source": [
31 "import pymongo\n",
32 "from bson.json_util import dumps\n",
33 "\n",
34 "#uri = \"your-Atlas-instance-uri-goes-here\"\n",
35 "uri = \"mongodb+srv://dtabass:CS122D-Atlas???@cs122d.6qf6u.mongodb.net/demodb?retryWrites=true&w=majority\"\n",
36 "client = pymongo.MongoClient(uri)\n",
37 "demodb = client.demodb"
38 ]
39 },
40 {
41 "cell_type": "code",
42 "execution_count": 4,
43 "metadata": {},
44 "outputs": [],
45 "source": [
46 "#-----------------------------------------------------------------------------------\n",
47 "# start with a clean slate (via the equivalent of SQL's DROP TABLE)\n",
48 "#-----------------------------------------------------------------------------------"
49 ]
50 },
51 {
52 "cell_type": "code",
53 "execution_count": 5,
54 "metadata": {},
55 "outputs": [],
56 "source": [
57 "demodb.customers.drop()"
58 ]
59 },
60 {
61 "cell_type": "code",
62 "execution_count": 6,
63 "metadata": {},
64 "outputs": [],
65 "source": [
66 "demodb.orders.drop()"
67 ]
68 },
69 {
70 "cell_type": "code",
71 "execution_count": 7,
72 "metadata": {},
73 "outputs": [],
74 "source": [
75 "#-----------------------------------------------------------------------------------\n",
76 "# get all the customer data ready\n",
77 "#-----------------------------------------------------------------------------------"
78 ]
79 },
80 {
81 "cell_type": "code",
82 "execution_count": 8,
83 "metadata": {},
84 "outputs": [],
85 "source": [
86 "customers = [\n",
87 " {\"custid\": \"C13\", \"name\": \"T. Cruise\", \"address\": { \"street\": \"201 Main St.\", \"city\": \"St. Louis, MO\", \"zipcode\": \"63101\" }, \"rating\": 750 },\n",
88 " {\"custid\": \"C25\", \"name\": \"M. Streep\", \"address\": { \"street\": \"690 River St.\", \"city\": \"Hanover, MA\", \"zipcode\": \"02340\" }, \"rating\": 690 },\n",
89 " {\"custid\": \"C31\", \"name\": \"B. Pitt\", \"address\": { \"street\": \"360 Mountain Ave.\", \"city\": \"St. Louis, MO\", \"zipcode\": \"63101\" } },\n",
90 " {\"custid\": \"C35\", \"name\": \"J. Roberts\", \"address\": { \"street\": \"420 Green St.\", \"city\": \"Boston, MA\", \"zipcode\": \"02115\" }, \"rating\": 565 },\n",
91 " {\"custid\": \"C37\", \"name\": \"T. Hanks\", \"address\": { \"street\": \"120 Harbor Blvd.\", \"city\": \"Boston, MA\", \"zipcode\": \"02115\" }, \"rating\": 750 },\n",
92 " {\"custid\": \"C41\", \"name\": \"R. Duvall\", \"address\": { \"street\": \"150 Market St.\", \"city\": \"St. Louis, MO\", \"zipcode\": \"63101\" }, \"rating\": 640 },\n",
93 " {\"custid\": \"C47\", \"name\": \"S. Loren\", \"address\": { \"street\": \"Via del Corso\", \"city\": \"Rome, Italy\" }, \"rating\": 625 }\n",
94 "]"
95 ]
96 },
97 {
98 "cell_type": "code",
99 "execution_count": 9,
100 "metadata": {},
101 "outputs": [
102 {
103 "data": {
104 "text/plain": [
105 "<pymongo.results.InsertManyResult at 0x7fb8e008dcc0>"
106 ]
107 },
108 "execution_count": 9,
109 "metadata": {},
110 "output_type": "execute_result"
111 }
112 ],
113 "source": [
114 "demodb.customers.insert_many(customers)"
115 ]
116 },
117 {
118 "cell_type": "code",
119 "execution_count": 10,
120 "metadata": {},
121 "outputs": [
122 {
123 "data": {
124 "text/plain": [
125 "7"
126 ]
127 },
128 "execution_count": 10,
129 "metadata": {},
130 "output_type": "execute_result"
131 }
132 ],
133 "source": [
134 "# count the number of customers\n",
135 "demodb.customers.count_documents( {} )"
136 ]
137 },
138 {
139 "cell_type": "code",
140 "execution_count": 11,
141 "metadata": {},
142 "outputs": [],
143 "source": [
144 "#-----------------------------------------------------------------------------------\n",
145 "# get all the order data ready\n",
146 "#-----------------------------------------------------------------------------------"
147 ]
148 },
149 {
150 "cell_type": "code",
151 "execution_count": 12,
152 "metadata": {},
153 "outputs": [],
154 "source": [
155 "orders = [\n",
156 " { \"orderno\": 1001, \"custid\": \"C41\", \"order_date\": \"2017-04-29\", \"ship_date\": \"2017-05-03\", \"items\": [ { \"itemno\": 347, \"qty\": 5, \"price\": 19.99 }, { \"itemno\": 193, \"qty\": 2, \"price\": 28.89 } ] },\n",
157 " { \"orderno\": 1002, \"custid\": \"C13\", \"order_date\": \"2017-05-01\", \"ship_date\": \"2017-05-03\", \"items\": [ { \"itemno\": 460, \"qty\": 95, \"price\": 100.99 }, { \"itemno\": 680, \"qty\": 150, \"price\": 8.75 } ] },\n",
158 " { \"orderno\": 1003, \"custid\": \"C31\", \"order_date\": \"2017-06-15\", \"ship_date\": \"2017-06-16\", \"items\": [ { \"itemno\": 120, \"qty\": 2, \"price\": 88.99 }, { \"itemno\": 460, \"qty\": 3, \"price\": 99.99 } ] },\n",
159 " { \"orderno\": 1004, \"custid\": \"C35\", \"order_date\": \"2017-07-10\", \"ship_date\": \"2017-07-15\", \"items\": [ { \"itemno\": 680, \"qty\": 6, \"price\": 9.99 }, { \"itemno\": 195, \"qty\": 4, \"price\": 35.00 } ] },\n",
160 " { \"orderno\": 1005, \"custid\": \"C37\", \"order_date\": \"2017-08-30\", \"items\": [ { \"itemno\": 460, \"qty\": 2, \"price\": 99.98 }, { \"itemno\": 347, \"qty\": 120, \"price\": 22.00 }, { \"itemno\": 780, \"qty\": 1, \"price\": 1500.00 }, { \"itemno\": 375, \"qty\": 2, \"price\": 149.98 } ] },\n",
161 " { \"orderno\": 1006, \"custid\": \"C41\", \"order_date\": \"2017-09-02\", \"ship_date\": \"2017-09-04\", \"items\": [ { \"itemno\": 680, \"qty\": 51, \"price\": 25.98 }, { \"itemno\": 120, \"qty\": 65, \"price\": 85.00 }, { \"itemno\": 460, \"qty\": 120, \"price\": 99.98 } ] },\n",
162 " { \"orderno\": 1007, \"custid\": \"C13\", \"order_date\": \"2017-09-13\", \"ship_date\": \"2017-09-20\", \"items\": [ { \"itemno\": 185, \"qty\": 5, \"price\": 21.99 }, { \"itemno\": 680, \"qty\": 1, \"price\": 20.50 } ] },\n",
163 " { \"orderno\": 1008, \"custid\": \"C13\", \"order_date\": \"2017-10-13\", \"items\": [ { \"itemno\": 460, \"qty\": 20, \"price\": 99.99 } ] }\n",
164 "]"
165 ]
166 },
167 {
168 "cell_type": "code",
169 "execution_count": 13,
170 "metadata": {},
171 "outputs": [
172 {
173 "data": {
174 "text/plain": [
175 "<pymongo.results.InsertManyResult at 0x7fb9100d3140>"
176 ]
177 },
178 "execution_count": 13,
179 "metadata": {},
180 "output_type": "execute_result"
181 }
182 ],
183 "source": [
184 "demodb.orders.insert_many(orders)"
185 ]
186 },
187 {
188 "cell_type": "code",
189 "execution_count": 14,
190 "metadata": {},
191 "outputs": [
192 {
193 "data": {
194 "text/plain": [
195 "8"
196 ]
197 },
198 "execution_count": 14,
199 "metadata": {},
200 "output_type": "execute_result"
201 }
202 ],
203 "source": [
204 "# count the number of orders\n",
205 "demodb.orders.count_documents( {} )"
206 ]
207 },
208 {
209 "cell_type": "code",
210 "execution_count": 15,
211 "metadata": {},
212 "outputs": [],
213 "source": [
214 "#-----------------------------------------------------------------------------------\n",
215 "# now let's explore a series of SQL-inspired use cases!\n",
216 "#-----------------------------------------------------------------------------------"
217 ]
218 },
219 {
220 "cell_type": "code",
221 "execution_count": 16,
222 "metadata": {},
223 "outputs": [],
224 "source": [
225 "#-----------------------------------------------------------------------------------\n",
226 "# we'll start with a variety of single-collection (select-project) queries\n",
227 "#-----------------------------------------------------------------------------------"
228 ]
229 },
230 {
231 "cell_type": "code",
232 "execution_count": 18,
233 "metadata": {},
234 "outputs": [
235 {
236 "name": "stdout",
237 "output_type": "stream",
238 "text": [
239 "[\n",
240 " {\n",
241 " \"_id\": {\n",
242 " \"$oid\": \"626725e44ac934b62d707e68\"\n",
243 " },\n",
244 " \"custid\": \"C13\",\n",
245 " \"name\": \"T. Cruise\",\n",
246 " \"address\": {\n",
247 " \"street\": \"201 Main St.\",\n",
248 " \"city\": \"St. Louis, MO\",\n",
249 " \"zipcode\": \"63101\"\n",
250 " },\n",
251 " \"rating\": 750\n",
252 " },\n",
253 " {\n",
254 " \"_id\": {\n",
255 " \"$oid\": \"626725e44ac934b62d707e69\"\n",
256 " },\n",
257 " \"custid\": \"C25\",\n",
258 " \"name\": \"M. Streep\",\n",
259 " \"address\": {\n",
260 " \"street\": \"690 River St.\",\n",
261 " \"city\": \"Hanover, MA\",\n",
262 " \"zipcode\": \"02340\"\n",
263 " },\n",
264 " \"rating\": 690\n",
265 " },\n",
266 " {\n",
267 " \"_id\": {\n",
268 " \"$oid\": \"626725e44ac934b62d707e6a\"\n",
269 " },\n",
270 " \"custid\": \"C31\",\n",
271 " \"name\": \"B. Pitt\",\n",
272 " \"address\": {\n",
273 " \"street\": \"360 Mountain Ave.\",\n",
274 " \"city\": \"St. Louis, MO\",\n",
275 " \"zipcode\": \"63101\"\n",
276 " }\n",
277 " },\n",
278 " {\n",
279 " \"_id\": {\n",
280 " \"$oid\": \"626725e44ac934b62d707e6b\"\n",
281 " },\n",
282 " \"custid\": \"C35\",\n",
283 " \"name\": \"J. Roberts\",\n",
284 " \"address\": {\n",
285 " \"street\": \"420 Green St.\",\n",
286 " \"city\": \"Boston, MA\",\n",
287 " \"zipcode\": \"02115\"\n",
288 " },\n",
289 " \"rating\": 565\n",
290 " },\n",
291 " {\n",
292 " \"_id\": {\n",
293 " \"$oid\": \"626725e44ac934b62d707e6c\"\n",
294 " },\n",
295 " \"custid\": \"C37\",\n",
296 " \"name\": \"T. Hanks\",\n",
297 " \"address\": {\n",
298 " \"street\": \"120 Harbor Blvd.\",\n",
299 " \"city\": \"Boston, MA\",\n",
300 " \"zipcode\": \"02115\"\n",
301 " },\n",
302 " \"rating\": 750\n",
303 " },\n",
304 " {\n",
305 " \"_id\": {\n",
306 " \"$oid\": \"626725e44ac934b62d707e6d\"\n",
307 " },\n",
308 " \"custid\": \"C41\",\n",
309 " \"name\": \"R. Duvall\",\n",
310 " \"address\": {\n",
311 " \"street\": \"150 Market St.\",\n",
312 " \"city\": \"St. Louis, MO\",\n",
313 " \"zipcode\": \"63101\"\n",
314 " },\n",
315 " \"rating\": 640\n",
316 " },\n",
317 " {\n",
318 " \"_id\": {\n",
319 " \"$oid\": \"626725e44ac934b62d707e6e\"\n",
320 " },\n",
321 " \"custid\": \"C47\",\n",
322 " \"name\": \"S. Loren\",\n",
323 " \"address\": {\n",
324 " \"street\": \"Via del Corso\",\n",
325 " \"city\": \"Rome, Italy\"\n",
326 " },\n",
327 " \"rating\": 625\n",
328 " }\n",
329 "]\n"
330 ]
331 }
332 ],
333 "source": [
334 "# SELECT * FROM customers \n",
335 "\n",
336 "cursor = demodb.customers.find( {} )\n",
337 "print(dumps(cursor, indent=2))"
338 ]
339 },
340 {
341 "cell_type": "code",
342 "execution_count": 19,
343 "metadata": {},
344 "outputs": [
345 {
346 "data": {
347 "text/plain": [
348 "{'_id': ObjectId('626725e44ac934b62d707e68'),\n",
349 " 'custid': 'C13',\n",
350 " 'name': 'T. Cruise',\n",
351 " 'address': {'street': '201 Main St.',\n",
352 " 'city': 'St. Louis, MO',\n",
353 " 'zipcode': '63101'},\n",
354 " 'rating': 750}"
355 ]
356 },
357 "execution_count": 19,
358 "metadata": {},
359 "output_type": "execute_result"
360 }
361 ],
362 "source": [
363 "# SELECT * FROM customers LIMIT 1\n",
364 "\n",
365 "demodb.customers.find_one( {} )"
366 ]
367 },
368 {
369 "cell_type": "code",
370 "execution_count": 20,
371 "metadata": {},
372 "outputs": [
373 {
374 "name": "stdout",
375 "output_type": "stream",
376 "text": [
377 "[\n",
378 " {\n",
379 " \"_id\": {\n",
380 " \"$oid\": \"626725e44ac934b62d707e68\"\n",
381 " },\n",
382 " \"name\": \"T. Cruise\",\n",
383 " \"rating\": 750\n",
384 " },\n",
385 " {\n",
386 " \"_id\": {\n",
387 " \"$oid\": \"626725e44ac934b62d707e69\"\n",
388 " },\n",
389 " \"name\": \"M. Streep\",\n",
390 " \"rating\": 690\n",
391 " },\n",
392 " {\n",
393 " \"_id\": {\n",
394 " \"$oid\": \"626725e44ac934b62d707e6a\"\n",
395 " },\n",
396 " \"name\": \"B. Pitt\"\n",
397 " },\n",
398 " {\n",
399 " \"_id\": {\n",
400 " \"$oid\": \"626725e44ac934b62d707e6b\"\n",
401 " },\n",
402 " \"name\": \"J. Roberts\",\n",
403 " \"rating\": 565\n",
404 " },\n",
405 " {\n",
406 " \"_id\": {\n",
407 " \"$oid\": \"626725e44ac934b62d707e6c\"\n",
408 " },\n",
409 " \"name\": \"T. Hanks\",\n",
410 " \"rating\": 750\n",
411 " },\n",
412 " {\n",
413 " \"_id\": {\n",
414 " \"$oid\": \"626725e44ac934b62d707e6d\"\n",
415 " },\n",
416 " \"name\": \"R. Duvall\",\n",
417 " \"rating\": 640\n",
418 " },\n",
419 " {\n",
420 " \"_id\": {\n",
421 " \"$oid\": \"626725e44ac934b62d707e6e\"\n",
422 " },\n",
423 " \"name\": \"S. Loren\",\n",
424 " \"rating\": 625\n",
425 " }\n",
426 "]\n"
427 ]
428 }
429 ],
430 "source": [
431 "# SELECT _id, name, rating FROM customers -- note that the key is returned by default in MongoDB\n",
432 "\n",
433 "cursor = demodb.customers.find( {}, {\"name\": 1, \"rating\": 1} )\n",
434 "print(dumps(cursor, indent=2))"
435 ]
436 },
437 {
438 "cell_type": "code",
439 "execution_count": 21,
440 "metadata": {},
441 "outputs": [
442 {
443 "name": "stdout",
444 "output_type": "stream",
445 "text": [
446 "[\n",
447 " {\n",
448 " \"name\": \"T. Cruise\",\n",
449 " \"rating\": 750\n",
450 " },\n",
451 " {\n",
452 " \"name\": \"M. Streep\",\n",
453 " \"rating\": 690\n",
454 " },\n",
455 " {\n",
456 " \"name\": \"B. Pitt\"\n",
457 " },\n",
458 " {\n",
459 " \"name\": \"J. Roberts\",\n",
460 " \"rating\": 565\n",
461 " },\n",
462 " {\n",
463 " \"name\": \"T. Hanks\",\n",
464 " \"rating\": 750\n",
465 " },\n",
466 " {\n",
467 " \"name\": \"R. Duvall\",\n",
468 " \"rating\": 640\n",
469 " },\n",
470 " {\n",
471 " \"name\": \"S. Loren\",\n",
472 " \"rating\": 625\n",
473 " }\n",
474 "]\n"
475 ]
476 }
477 ],
478 "source": [
479 "# SELECT name, rating FROM customers -- minus that pesky key\n",
480 "\n",
481 "cursor = demodb.customers.find( {}, {\"_id\": 0, \"name\": 1, \"rating\":1} )\n",
482 "print(dumps(cursor, indent=2))"
483 ]
484 },
485 {
486 "cell_type": "code",
487 "execution_count": 22,
488 "metadata": {},
489 "outputs": [
490 {
491 "name": "stdout",
492 "output_type": "stream",
493 "text": [
494 "[\n",
495 " {\n",
496 " \"custid\": \"C13\",\n",
497 " \"name\": \"T. Cruise\",\n",
498 " \"rating\": 750\n",
499 " },\n",
500 " {\n",
501 " \"custid\": \"C25\",\n",
502 " \"name\": \"M. Streep\",\n",
503 " \"rating\": 690\n",
504 " },\n",
505 " {\n",
506 " \"custid\": \"C31\",\n",
507 " \"name\": \"B. Pitt\"\n",
508 " },\n",
509 " {\n",
510 " \"custid\": \"C35\",\n",
511 " \"name\": \"J. Roberts\",\n",
512 " \"rating\": 565\n",
513 " },\n",
514 " {\n",
515 " \"custid\": \"C37\",\n",
516 " \"name\": \"T. Hanks\",\n",
517 " \"rating\": 750\n",
518 " },\n",
519 " {\n",
520 " \"custid\": \"C41\",\n",
521 " \"name\": \"R. Duvall\",\n",
522 " \"rating\": 640\n",
523 " },\n",
524 " {\n",
525 " \"custid\": \"C47\",\n",
526 " \"name\": \"S. Loren\",\n",
527 " \"rating\": 625\n",
528 " }\n",
529 "]\n"
530 ]
531 }
532 ],
533 "source": [
534 "# SELECT *-{_id, address} FROM customers -- NOTE: you can't really request this in SQL! :-)\n",
535 "\n",
536 "cursor = demodb.customers.find( {}, {\"_id\": 0, \"address\": 0} )\n",
537 "print(dumps(cursor, indent=2))"
538 ]
539 },
540 {
541 "cell_type": "code",
542 "execution_count": 23,
543 "metadata": {},
544 "outputs": [
545 {
546 "name": "stdout",
547 "output_type": "stream",
548 "text": [
549 "[\n",
550 " {\n",
551 " \"name\": \"T. Cruise\",\n",
552 " \"rating\": 750\n",
553 " },\n",
554 " {\n",
555 " \"name\": \"T. Hanks\",\n",
556 " \"rating\": 750\n",
557 " }\n",
558 "]\n"
559 ]
560 }
561 ],
562 "source": [
563 "# SELECT name, rating FROM customers WHERE rating = 750\n",
564 "\n",
565 "cursor = demodb.customers.find( {\"rating\": 750}, {\"_id\": 0, \"name\": 1, \"rating\":1} )\n",
566 "print(dumps(cursor, indent=2))"
567 ]
568 },
569 {
570 "cell_type": "code",
571 "execution_count": 24,
572 "metadata": {},
573 "outputs": [
574 {
575 "name": "stdout",
576 "output_type": "stream",
577 "text": [
578 "[\n",
579 " {\n",
580 " \"name\": \"T. Cruise\",\n",
581 " \"rating\": 750\n",
582 " },\n",
583 " {\n",
584 " \"name\": \"M. Streep\",\n",
585 " \"rating\": 690\n",
586 " },\n",
587 " {\n",
588 " \"name\": \"T. Hanks\",\n",
589 " \"rating\": 750\n",
590 " }\n",
591 "]\n"
592 ]
593 }
594 ],
595 "source": [
596 "# SELECT name, rating FROM customers WHERE rating >= 650\n",
597 "\n",
598 "cursor = demodb.customers.find( {\"rating\": {\"$gte\": 650}}, {\"_id\": 0, \"name\": 1, \"rating\":1} )\n",
599 "print(dumps(cursor, indent=2))"
600 ]
601 },
602 {
603 "cell_type": "code",
604 "execution_count": 25,
605 "metadata": {},
606 "outputs": [
607 {
608 "name": "stdout",
609 "output_type": "stream",
610 "text": [
611 "[\n",
612 " {\n",
613 " \"name\": \"T. Hanks\",\n",
614 " \"rating\": 750\n",
615 " }\n",
616 "]\n"
617 ]
618 }
619 ],
620 "source": [
621 "# SELECT name, rating FROM customers WHERE rating = 750 AND name = 'T. Hanks'\n",
622 "\n",
623 "cursor = demodb.customers.find( {\"$and\": [{\"name\": \"T. Hanks\"}, {\"rating\": 750}]}, {\"name\": 1, \"rating\":1, \"_id\": 0 } )\n",
624 "print(dumps(cursor, indent=2))"
625 ]
626 },
627 {
628 "cell_type": "code",
629 "execution_count": 26,
630 "metadata": {},
631 "outputs": [
632 {
633 "name": "stdout",
634 "output_type": "stream",
635 "text": [
636 "[\n",
637 " {\n",
638 " \"name\": \"T. Hanks\",\n",
639 " \"rating\": 750\n",
640 " }\n",
641 "]\n"
642 ]
643 }
644 ],
645 "source": [
646 "# SELECT name, rating FROM customers WHERE rating = 750 AND name = 'T. Hanks' -- same query, take two :-)\n",
647 "\n",
648 "cursor = demodb.customers.find( {\"name\": \"T. Hanks\", \"rating\": 750}, {\"_id\": 0, \"name\": 1, \"rating\":1} )\n",
649 "print(dumps(cursor, indent=2))"
650 ]
651 },
652 {
653 "cell_type": "code",
654 "execution_count": 27,
655 "metadata": {
656 "scrolled": true
657 },
658 "outputs": [
659 {
660 "name": "stdout",
661 "output_type": "stream",
662 "text": [
663 "[\n",
664 " {\n",
665 " \"name\": \"T. Cruise\",\n",
666 " \"rating\": 750\n",
667 " },\n",
668 " {\n",
669 " \"name\": \"T. Hanks\",\n",
670 " \"rating\": 750\n",
671 " }\n",
672 "]\n"
673 ]
674 }
675 ],
676 "source": [
677 "# SELECT name, rating FROM customers WHERE rating = 750 OR name = 'T. Hanks'\n",
678 "\n",
679 "cursor = demodb.customers.find( {\"$or\": [{\"name\": \"T. Hanks\"}, {\"rating\": 750}]}, {\"_id\": 0, \"name\": 1, \"rating\":1} )\n",
680 "print(dumps(cursor, indent=2))"
681 ]
682 },
683 {
684 "cell_type": "code",
685 "execution_count": null,
686 "metadata": {},
687 "outputs": [],
688 "source": [
689 "#-----------------------------------------------------------------------------------\n",
690 "# now let's try a few SQL \"bells and whistles\" (and beyond SQL) queries\n",
691 "#-----------------------------------------------------------------------------------"
692 ]
693 },
694 {
695 "cell_type": "code",
696 "execution_count": 28,
697 "metadata": {},
698 "outputs": [
699 {
700 "name": "stdout",
701 "output_type": "stream",
702 "text": [
703 "[\n",
704 " {\n",
705 " \"name\": \"T. Cruise\",\n",
706 " \"rating\": 750\n",
707 " },\n",
708 " {\n",
709 " \"name\": \"T. Hanks\",\n",
710 " \"rating\": 750\n",
711 " }\n",
712 "]\n"
713 ]
714 }
715 ],
716 "source": [
717 "# SELECT name, rating FROM customers WHERE name LIKE 'T%'\n",
718 "\n",
719 "cursor = demodb.customers.find( {\"name\": {\"$regex\": \"^T.*\"}}, {\"_id\": 0, \"name\": 1, \"rating\":1} )\n",
720 "print(dumps(cursor, indent=2))"
721 ]
722 },
723 {
724 "cell_type": "code",
725 "execution_count": 29,
726 "metadata": {},
727 "outputs": [
728 {
729 "name": "stdout",
730 "output_type": "stream",
731 "text": [
732 "[\n",
733 " {\n",
734 " \"name\": \"T. Hanks\",\n",
735 " \"rating\": 750\n",
736 " },\n",
737 " {\n",
738 " \"name\": \"T. Cruise\",\n",
739 " \"rating\": 750\n",
740 " },\n",
741 " {\n",
742 " \"name\": \"M. Streep\",\n",
743 " \"rating\": 690\n",
744 " }\n",
745 "]\n"
746 ]
747 }
748 ],
749 "source": [
750 "# SELECT name, rating FROM customers ORDER BY rating DESC LIMIT 3\n",
751 "\n",
752 "cursor = demodb.customers.find( { }, {\"_id\": 0, \"name\": 1, \"rating\":1} ).sort(\"rating\", -1 ).limit(3)\n",
753 "print(dumps(cursor, indent=2))"
754 ]
755 },
756 {
757 "cell_type": "code",
758 "execution_count": 30,
759 "metadata": {},
760 "outputs": [
761 {
762 "name": "stdout",
763 "output_type": "stream",
764 "text": [
765 "[\n",
766 " {\n",
767 " \"name\": \"M. Streep\",\n",
768 " \"rating\": 690\n",
769 " },\n",
770 " {\n",
771 " \"name\": \"R. Duvall\",\n",
772 " \"rating\": 640\n",
773 " },\n",
774 " {\n",
775 " \"name\": \"S. Loren\",\n",
776 " \"rating\": 625\n",
777 " }\n",
778 "]\n"
779 ]
780 }
781 ],
782 "source": [
783 "# SELECT name, rating FROM customers ORDER BY rating DESC LIMIT 3 OFFSET 2\n",
784 "\n",
785 "cursor = demodb.customers.find( { }, {\"_id\": 0, \"name\": 1, \"rating\":1} ).sort(\"rating\", -1 ).skip(2).limit(3)\n",
786 "print(dumps(cursor, indent=2))"
787 ]
788 },
789 {
790 "cell_type": "code",
791 "execution_count": 31,
792 "metadata": {
793 "scrolled": true
794 },
795 "outputs": [
796 {
797 "name": "stdout",
798 "output_type": "stream",
799 "text": [
800 "[\n",
801 " {\n",
802 " \"name\": \"S. Loren\",\n",
803 " \"rating\": 625\n",
804 " },\n",
805 " {\n",
806 " \"name\": \"M. Streep\",\n",
807 " \"rating\": 690\n",
808 " }\n",
809 "]\n"
810 ]
811 }
812 ],
813 "source": [
814 "# SELECT name, rating FROM customers WHERE name LIKE '%S%' ORDER BY rating\n",
815 "\n",
816 "cursor = demodb.customers.find( {\"name\": {\"$regex\": \".*S.*\"}}, {\"_id\": 0, \"name\": 1, \"rating\":1} ).sort(\"rating\")\n",
817 "print(dumps(cursor, indent=2))"
818 ]
819 },
820 {
821 "cell_type": "code",
822 "execution_count": 32,
823 "metadata": {},
824 "outputs": [
825 {
826 "name": "stdout",
827 "output_type": "stream",
828 "text": [
829 "[\n",
830 " {\n",
831 " \"name\": \"S. Loren\",\n",
832 " \"rating\": 625\n",
833 " },\n",
834 " {\n",
835 " \"name\": \"M. Streep\",\n",
836 " \"rating\": 690\n",
837 " }\n",
838 "]\n"
839 ]
840 }
841 ],
842 "source": [
843 "# SELECT name, rating FROM customers WHERE name LIKE '%S%' ORDER BY rating -- same query, take two :-)\n",
844 "\n",
845 "cursor = demodb.customers.find( {\"name\": {\"$regex\": \"S\"}}, {\"_id\": 0, \"name\": 1, \"rating\":1} ).sort(\"rating\")\n",
846 "print(dumps(cursor, indent=2))"
847 ]
848 },
849 {
850 "cell_type": "code",
851 "execution_count": 33,
852 "metadata": {},
853 "outputs": [
854 {
855 "name": "stdout",
856 "output_type": "stream",
857 "text": [
858 "[\n",
859 " {\n",
860 " \"custid\": \"C41\",\n",
861 " \"name\": \"R. Duvall\",\n",
862 " \"address\": {\n",
863 " \"street\": \"150 Market St.\",\n",
864 " \"city\": \"St. Louis, MO\",\n",
865 " \"zipcode\": \"63101\"\n",
866 " },\n",
867 " \"rating\": 640\n",
868 " }\n",
869 "]\n"
870 ]
871 }
872 ],
873 "source": [
874 "# SELECT * FROM customers\n",
875 "# WHERE address_street = '150 Market St.' AND address_city = 'St. Louis, MO' AND address_zipcode = '63101'\n",
876 "\n",
877 "cursor = demodb.customers.find( {\"address\": { \"street\": \"150 Market St.\",\n",
878 " \"city\": \"St. Louis, MO\",\n",
879 " \"zipcode\": \"63101\" } }, {\"_id\": 0,} )\n",
880 "print(dumps(cursor, indent=2))"
881 ]
882 },
883 {
884 "cell_type": "code",
885 "execution_count": 34,
886 "metadata": {},
887 "outputs": [
888 {
889 "name": "stdout",
890 "output_type": "stream",
891 "text": [
892 "[\n",
893 " {\n",
894 " \"custid\": \"C13\",\n",
895 " \"name\": \"T. Cruise\",\n",
896 " \"address\": {\n",
897 " \"street\": \"201 Main St.\",\n",
898 " \"city\": \"St. Louis, MO\",\n",
899 " \"zipcode\": \"63101\"\n",
900 " },\n",
901 " \"rating\": 750\n",
902 " },\n",
903 " {\n",
904 " \"custid\": \"C31\",\n",
905 " \"name\": \"B. Pitt\",\n",
906 " \"address\": {\n",
907 " \"street\": \"360 Mountain Ave.\",\n",
908 " \"city\": \"St. Louis, MO\",\n",
909 " \"zipcode\": \"63101\"\n",
910 " }\n",
911 " },\n",
912 " {\n",
913 " \"custid\": \"C41\",\n",
914 " \"name\": \"R. Duvall\",\n",
915 " \"address\": {\n",
916 " \"street\": \"150 Market St.\",\n",
917 " \"city\": \"St. Louis, MO\",\n",
918 " \"zipcode\": \"63101\"\n",
919 " },\n",
920 " \"rating\": 640\n",
921 " }\n",
922 "]\n"
923 ]
924 }
925 ],
926 "source": [
927 "# SELECT * FROM customers WHERE address_zipcode = '63101' -- zipode is a nested field in this case\n",
928 "\n",
929 "cursor = demodb.customers.find( {\"address.zipcode\": \"63101\" }, {\"_id\": 0,} )\n",
930 "print(dumps(cursor, indent=2))"
931 ]
932 },
933 {
934 "cell_type": "code",
935 "execution_count": 35,
936 "metadata": {},
937 "outputs": [
938 {
939 "name": "stdout",
940 "output_type": "stream",
941 "text": [
942 "[]\n"
943 ]
944 }
945 ],
946 "source": [
947 "# SELECT * FROM customers WHERE address_zipcode = '63101' -- same query, take two? NOPE! :-)\n",
948 "\n",
949 "cursor = demodb.customers.find( {\"address\": { \"zipcode\": \"63101\" } }, {\"_id\": 0,} )\n",
950 "print(dumps(cursor, indent=2))"
951 ]
952 },
953 {
954 "cell_type": "code",
955 "execution_count": 36,
956 "metadata": {},
957 "outputs": [
958 {
959 "name": "stdout",
960 "output_type": "stream",
961 "text": [
962 "[\n",
963 " {\n",
964 " \"orderno\": 1002,\n",
965 " \"custid\": \"C13\",\n",
966 " \"order_date\": \"2017-05-01\",\n",
967 " \"ship_date\": \"2017-05-03\",\n",
968 " \"items\": [\n",
969 " {\n",
970 " \"itemno\": 460,\n",
971 " \"qty\": 95,\n",
972 " \"price\": 100.99\n",
973 " },\n",
974 " {\n",
975 " \"itemno\": 680,\n",
976 " \"qty\": 150,\n",
977 " \"price\": 8.75\n",
978 " }\n",
979 " ]\n",
980 " },\n",
981 " {\n",
982 " \"orderno\": 1007,\n",
983 " \"custid\": \"C13\",\n",
984 " \"order_date\": \"2017-09-13\",\n",
985 " \"ship_date\": \"2017-09-20\",\n",
986 " \"items\": [\n",
987 " {\n",
988 " \"itemno\": 185,\n",
989 " \"qty\": 5,\n",
990 " \"price\": 21.99\n",
991 " },\n",
992 " {\n",
993 " \"itemno\": 680,\n",
994 " \"qty\": 1,\n",
995 " \"price\": 20.5\n",
996 " }\n",
997 " ]\n",
998 " },\n",
999 " {\n",
1000 " \"orderno\": 1008,\n",
1001 " \"custid\": \"C13\",\n",
1002 " \"order_date\": \"2017-10-13\",\n",
1003 " \"items\": [\n",
1004 " {\n",
1005 " \"itemno\": 460,\n",
1006 " \"qty\": 20,\n",
1007 " \"price\": 99.99\n",
1008 " }\n",
1009 " ]\n",
1010 " }\n",
1011 "]\n"
1012 ]
1013 }
1014 ],
1015 "source": [
1016 "# SELECT * FROM orders WHERE custid = 'C13' -- now let's look at some orders!\n",
1017 "\n",
1018 "cursor = demodb.orders.find( {\"custid\": \"C13\"}, {\"_id\": 0,} )\n",
1019 "print(dumps(cursor, indent=2))"
1020 ]
1021 },
1022 {
1023 "cell_type": "code",
1024 "execution_count": 37,
1025 "metadata": {},
1026 "outputs": [
1027 {
1028 "name": "stdout",
1029 "output_type": "stream",
1030 "text": [
1031 "[\n",
1032 " {\n",
1033 " \"orderno\": 1002,\n",
1034 " \"custid\": \"C13\",\n",
1035 " \"order_date\": \"2017-05-01\",\n",
1036 " \"ship_date\": \"2017-05-03\",\n",
1037 " \"items\": [\n",
1038 " {\n",
1039 " \"itemno\": 460,\n",
1040 " \"qty\": 95,\n",
1041 " \"price\": 100.99\n",
1042 " },\n",
1043 " {\n",
1044 " \"itemno\": 680,\n",
1045 " \"qty\": 150,\n",
1046 " \"price\": 8.75\n",
1047 " }\n",
1048 " ]\n",
1049 " },\n",
1050 " {\n",
1051 " \"orderno\": 1008,\n",
1052 " \"custid\": \"C13\",\n",
1053 " \"order_date\": \"2017-10-13\",\n",
1054 " \"items\": [\n",
1055 " {\n",
1056 " \"itemno\": 460,\n",
1057 " \"qty\": 20,\n",
1058 " \"price\": 99.99\n",
1059 " }\n",
1060 " ]\n",
1061 " }\n",
1062 "]\n"
1063 ]
1064 }
1065 ],
1066 "source": [
1067 "# SELECT * FROM orders o WHERE o.custid = 'C13' AND\n",
1068 "# EXISTS (SELECT * FROM items i WHERE i.custid = o.custid AND i.itemno = 460)\n",
1069 "\n",
1070 "cursor = demodb.orders.find( {\"custid\": \"C13\", \"items.itemno\": 460}, {\"_id\": 0,} )\n",
1071 "print(dumps(cursor, indent=2))"
1072 ]
1073 },
1074 {
1075 "cell_type": "code",
1076 "execution_count": null,
1077 "metadata": {},
1078 "outputs": [],
1079 "source": [
1080 "#-----------------------------------------------------------------------------------\n",
1081 "# now let's do some single-collection SQL-like CRUD operations\n",
1082 "#-----------------------------------------------------------------------------------"
1083 ]
1084 },
1085 {
1086 "cell_type": "code",
1087 "execution_count": 38,
1088 "metadata": {},
1089 "outputs": [
1090 {
1091 "data": {
1092 "text/plain": [
1093 "<pymongo.results.InsertOneResult at 0x7fb8e00b65c0>"
1094 ]
1095 },
1096 "execution_count": 38,
1097 "metadata": {},
1098 "output_type": "execute_result"
1099 }
1100 ],
1101 "source": [
1102 "# INSERT INTO customers (custid, name, address_street, address_city, address_zipcode, rating, catcount)\n",
1103 "# VALUES (\"C01\", \"M. Carey\", \"111 State St.\", \"Ithaca, MY\", \"14850\", 775, 4)\n",
1104 "\n",
1105 "new_customer = {\"custid\": \"C01\", \"name\": \"M. Carey\", \"address\": { \"street\": \"111 State St.\", \"city\": \"Ithaca, MY\", \"zipcode\": \"14850\" }, \"rating\": 775, \"catcount\": 4}\n",
1106 "\n",
1107 "demodb.customers.insert_one(new_customer)\n"
1108 ]
1109 },
1110 {
1111 "cell_type": "code",
1112 "execution_count": 39,
1113 "metadata": {},
1114 "outputs": [
1115 {
1116 "name": "stdout",
1117 "output_type": "stream",
1118 "text": [
1119 "[\n",
1120 " {\n",
1121 " \"custid\": \"C01\",\n",
1122 " \"name\": \"M. Carey\",\n",
1123 " \"address\": {\n",
1124 " \"street\": \"111 State St.\",\n",
1125 " \"city\": \"Ithaca, MY\",\n",
1126 " \"zipcode\": \"14850\"\n",
1127 " },\n",
1128 " \"rating\": 775,\n",
1129 " \"catcount\": 4\n",
1130 " },\n",
1131 " {\n",
1132 " \"custid\": \"C37\",\n",
1133 " \"name\": \"T. Hanks\",\n",
1134 " \"address\": {\n",
1135 " \"street\": \"120 Harbor Blvd.\",\n",
1136 " \"city\": \"Boston, MA\",\n",
1137 " \"zipcode\": \"02115\"\n",
1138 " },\n",
1139 " \"rating\": 750\n",
1140 " }\n",
1141 "]\n"
1142 ]
1143 }
1144 ],
1145 "source": [
1146 "# verify the result of the INSERT operation\n",
1147 "\n",
1148 "cursor = demodb.customers.find( { }, {\"_id\": 0} ).sort(\"rating\", -1 ).limit(2)\n",
1149 "print(dumps(cursor, indent=2))"
1150 ]
1151 },
1152 {
1153 "cell_type": "code",
1154 "execution_count": 40,
1155 "metadata": {},
1156 "outputs": [
1157 {
1158 "data": {
1159 "text/plain": [
1160 "<pymongo.results.UpdateResult at 0x7fb9100d46c0>"
1161 ]
1162 },
1163 "execution_count": 40,
1164 "metadata": {},
1165 "output_type": "execute_result"
1166 }
1167 ],
1168 "source": [
1169 "# UPDATE customers SET rating = rating + 25 WHERE custid = 'C01'\n",
1170 "\n",
1171 "demodb.customers.update_one( {\"custid\": \"C01\"}, {\"$inc\": {\"rating\": 25}} )"
1172 ]
1173 },
1174 {
1175 "cell_type": "code",
1176 "execution_count": 41,
1177 "metadata": {},
1178 "outputs": [
1179 {
1180 "name": "stdout",
1181 "output_type": "stream",
1182 "text": [
1183 "[\n",
1184 " {\n",
1185 " \"_id\": {\n",
1186 " \"$oid\": \"62672ecb4ac934b62d707e77\"\n",
1187 " },\n",
1188 " \"custid\": \"C01\",\n",
1189 " \"name\": \"M. Carey\",\n",
1190 " \"address\": {\n",
1191 " \"street\": \"111 State St.\",\n",
1192 " \"city\": \"Ithaca, MY\",\n",
1193 " \"zipcode\": \"14850\"\n",
1194 " },\n",
1195 " \"rating\": 800,\n",
1196 " \"catcount\": 4\n",
1197 " }\n",
1198 "]\n"
1199 ]
1200 }
1201 ],
1202 "source": [
1203 "# verify the result of the rating UPDATE operation\n",
1204 "\n",
1205 "cursor = demodb.customers.find( {\"custid\": \"C01\"} )\n",
1206 "print(dumps(cursor, indent=2))"
1207 ]
1208 },
1209 {
1210 "cell_type": "code",
1211 "execution_count": 42,
1212 "metadata": {},
1213 "outputs": [
1214 {
1215 "data": {
1216 "text/plain": [
1217 "<pymongo.results.UpdateResult at 0x7fb8e00adf00>"
1218 ]
1219 },
1220 "execution_count": 42,
1221 "metadata": {},
1222 "output_type": "execute_result"
1223 }
1224 ],
1225 "source": [
1226 "# UPDATE customers SET address_city = 'Ithaca, NY' WHERE custid = 'C01'\n",
1227 "\n",
1228 "demodb.customers.update_many( {\"custid\": \"C01\"}, {\"$set\": {\"address.city\": \"Ithaca, NY\"}} )"
1229 ]
1230 },
1231 {
1232 "cell_type": "code",
1233 "execution_count": 43,
1234 "metadata": {},
1235 "outputs": [
1236 {
1237 "name": "stdout",
1238 "output_type": "stream",
1239 "text": [
1240 "[\n",
1241 " {\n",
1242 " \"_id\": {\n",
1243 " \"$oid\": \"62672ecb4ac934b62d707e77\"\n",
1244 " },\n",
1245 " \"custid\": \"C01\",\n",
1246 " \"name\": \"M. Carey\",\n",
1247 " \"address\": {\n",
1248 " \"street\": \"111 State St.\",\n",
1249 " \"city\": \"Ithaca, NY\",\n",
1250 " \"zipcode\": \"14850\"\n",
1251 " },\n",
1252 " \"rating\": 800,\n",
1253 " \"catcount\": 4\n",
1254 " }\n",
1255 "]\n"
1256 ]
1257 }
1258 ],
1259 "source": [
1260 "# verify the result of the address UPDATE operation\n",
1261 "\n",
1262 "cursor = demodb.customers.find( {\"custid\": \"C01\"} )\n",
1263 "print(dumps(cursor, indent=2))"
1264 ]
1265 },
1266 {
1267 "cell_type": "code",
1268 "execution_count": 44,
1269 "metadata": {},
1270 "outputs": [
1271 {
1272 "data": {
1273 "text/plain": [
1274 "<pymongo.results.DeleteResult at 0x7fb9100cdf80>"
1275 ]
1276 },
1277 "execution_count": 44,
1278 "metadata": {},
1279 "output_type": "execute_result"
1280 }
1281 ],
1282 "source": [
1283 "# DELETE customers WHERE custid = 'C01'\n",
1284 "\n",
1285 "demodb.customers.delete_one( {\"custid\": \"C01\"} )"
1286 ]
1287 },
1288 {
1289 "cell_type": "code",
1290 "execution_count": 45,
1291 "metadata": {},
1292 "outputs": [
1293 {
1294 "name": "stdout",
1295 "output_type": "stream",
1296 "text": [
1297 "[]\n"
1298 ]
1299 }
1300 ],
1301 "source": [
1302 "# verify the result of the DELETE operation\n",
1303 "\n",
1304 "cursor = demodb.customers.find( {\"custid\": \"C01\"} )\n",
1305 "print(dumps(cursor, indent=2))"
1306 ]
1307 },
1308 {
1309 "cell_type": "code",
1310 "execution_count": null,
1311 "metadata": {},
1312 "outputs": [],
1313 "source": [
1314 "# REMINDER: Return to the slide deck now to introduce MongoDB's aggregation pipeline API and concepts!"
1315 ]
1316 },
1317 {
1318 "cell_type": "code",
1319 "execution_count": 46,
1320 "metadata": {},
1321 "outputs": [],
1322 "source": [
1323 "#-----------------------------------------------------------------------------------\n",
1324 "# now let's transition from find() queries to aggregate() queries...!\n",
1325 "#-----------------------------------------------------------------------------------"
1326 ]
1327 },
1328 {
1329 "cell_type": "code",
1330 "execution_count": 47,
1331 "metadata": {},
1332 "outputs": [],
1333 "source": [
1334 "#-----------------------------------------------------------------------------------\n",
1335 "# we'll start by gently exploring the pipeline concept!\n",
1336 "#-----------------------------------------------------------------------------------"
1337 ]
1338 },
1339 {
1340 "cell_type": "code",
1341 "execution_count": 48,
1342 "metadata": {},
1343 "outputs": [
1344 {
1345 "name": "stdout",
1346 "output_type": "stream",
1347 "text": [
1348 "[\n",
1349 " {\n",
1350 " \"_id\": {\n",
1351 " \"$oid\": \"626725e44ac934b62d707e68\"\n",
1352 " },\n",
1353 " \"custid\": \"C13\",\n",
1354 " \"name\": \"T. Cruise\",\n",
1355 " \"address\": {\n",
1356 " \"street\": \"201 Main St.\",\n",
1357 " \"city\": \"St. Louis, MO\",\n",
1358 " \"zipcode\": \"63101\"\n",
1359 " },\n",
1360 " \"rating\": 750\n",
1361 " },\n",
1362 " {\n",
1363 " \"_id\": {\n",
1364 " \"$oid\": \"626725e44ac934b62d707e6c\"\n",
1365 " },\n",
1366 " \"custid\": \"C37\",\n",
1367 " \"name\": \"T. Hanks\",\n",
1368 " \"address\": {\n",
1369 " \"street\": \"120 Harbor Blvd.\",\n",
1370 " \"city\": \"Boston, MA\",\n",
1371 " \"zipcode\": \"02115\"\n",
1372 " },\n",
1373 " \"rating\": 750\n",
1374 " }\n",
1375 "]\n"
1376 ]
1377 }
1378 ],
1379 "source": [
1380 "# SELECT * FROM customers WHERE name LIKE 'T%'\n",
1381 "\n",
1382 "match_stage = {\"$match\": {\"name\": {\"$regex\": \"^T.*\"}} }\n",
1383 "pipeline = [ match_stage ]\n",
1384 "cursor = demodb.customers.aggregate( pipeline )\n",
1385 "print(dumps(cursor, indent=2))"
1386 ]
1387 },
1388 {
1389 "cell_type": "code",
1390 "execution_count": 49,
1391 "metadata": {},
1392 "outputs": [
1393 {
1394 "name": "stdout",
1395 "output_type": "stream",
1396 "text": [
1397 "[\n",
1398 " {\n",
1399 " \"name\": \"T. Cruise\",\n",
1400 " \"rating\": 750\n",
1401 " },\n",
1402 " {\n",
1403 " \"name\": \"T. Hanks\",\n",
1404 " \"rating\": 750\n",
1405 " }\n",
1406 "]\n"
1407 ]
1408 }
1409 ],
1410 "source": [
1411 "# SELECT name, rating FROM customers WHERE name LIKE 'T%'\n",
1412 "\n",
1413 "match_stage = {\"$match\": {\"name\": {\"$regex\": \"^T.*\"}} }\n",
1414 "project_stage = {\"$project\": {\"_id\": 0, \"name\": 1, \"rating\":1} }\n",
1415 "pipeline = [ match_stage, project_stage ]\n",
1416 "cursor = demodb.customers.aggregate( pipeline )\n",
1417 "print(dumps(cursor, indent=2))"
1418 ]
1419 },
1420 {
1421 "cell_type": "code",
1422 "execution_count": 51,
1423 "metadata": {},
1424 "outputs": [
1425 {
1426 "name": "stdout",
1427 "output_type": "stream",
1428 "text": [
1429 "[\n",
1430 " {\n",
1431 " \"name\": \"T. Cruise\",\n",
1432 " \"rating\": 750\n",
1433 " }\n",
1434 "]\n"
1435 ]
1436 }
1437 ],
1438 "source": [
1439 "# SELECT name, rating FROM customers WHERE name LIKE 'T%' LIMIT 1\n",
1440 "\n",
1441 "match_stage = {\"$match\": {\"name\": {\"$regex\": \"^T.*\"}} }\n",
1442 "project_stage = {\"$project\": {\"_id\": 0, \"name\": 1, \"rating\":1} }\n",
1443 "limit_stage = {\"$limit\": 1}\n",
1444 "cursor = demodb.customers.aggregate( [ match_stage, project_stage, limit_stage ] )\n",
1445 "print(dumps(cursor, indent=2))"
1446 ]
1447 },
1448 {
1449 "cell_type": "code",
1450 "execution_count": null,
1451 "metadata": {},
1452 "outputs": [],
1453 "source": [
1454 "# QUESTION: What does this remind you of...? (Stop the video and reminisce about CS122A... :-))"
1455 ]
1456 },
1457 {
1458 "cell_type": "code",
1459 "execution_count": null,
1460 "metadata": {},
1461 "outputs": [],
1462 "source": [
1463 "# --------- STOPPED HERE ON MONDAY :-) ---------"
1464 ]
1465 },
1466 {
1467 "cell_type": "code",
1468 "execution_count": null,
1469 "metadata": {},
1470 "outputs": [],
1471 "source": [
1472 "#-----------------------------------------------------------------------------------\n",
1473 "# the purpose of a MongoDB pipeline comes from the name of its function (duh... :-))\n",
1474 "#-----------------------------------------------------------------------------------"
1475 ]
1476 },
1477 {
1478 "cell_type": "code",
1479 "execution_count": null,
1480 "metadata": {},
1481 "outputs": [],
1482 "source": [
1483 "# SELECT custid, COUNT(*) as ordercnt GROUP BY custid FROM orders\n",
1484 "\n",
1485 "cursor = demodb.orders.aggregate( [ {\"$group\": {\"_id\": \"$custid\", \"ordercnt\": {\"$sum\": 1 }}} ] )\n",
1486 "print(dumps(cursor, indent=2))"
1487 ]
1488 },
1489 {
1490 "cell_type": "code",
1491 "execution_count": null,
1492 "metadata": {},
1493 "outputs": [],
1494 "source": [
1495 "# SELECT 'custid', COUNT(*) as ordercnt FROM orders -- beware of easily made typos ('custid')!!!\n",
1496 "\n",
1497 "cursor = demodb.orders.aggregate( [ {\"$group\": {\"_id\": \"custid\", \"ordercnt\": {\"$sum\": 1 }}} ] )\n",
1498 "print(dumps(cursor, indent=2))"
1499 ]
1500 },
1501 {
1502 "cell_type": "code",
1503 "execution_count": null,
1504 "metadata": {},
1505 "outputs": [],
1506 "source": [
1507 "# SELECT COUNT(*) as ordercnt FROM orders\n",
1508 "\n",
1509 "group_stage = {\"$group\": {\"_id\": None, \"ordercnt\": {\"$sum\": 1 }}}\n",
1510 "project_stage = {\"$project\": {\"_id\": 0}}\n",
1511 "cursor = demodb.orders.aggregate( [ group_stage, project_stage ] )\n",
1512 "print(dumps(cursor, indent=2))"
1513 ]
1514 },
1515 {
1516 "cell_type": "code",
1517 "execution_count": null,
1518 "metadata": {},
1519 "outputs": [],
1520 "source": [
1521 "# SELECT custid AS custid, MIN(orderno) as first_order, MAX(orderno) as last_order FROM orders\n",
1522 "# GROUP BY custid\n",
1523 "\n",
1524 "group_stage = {\"$group\": {\"_id\": \"$custid\", \"first_order\": {\"$min\": \"$orderno\" }, \"last_order\": {\"$max\": \"$orderno\" }}} \n",
1525 "cursor = demodb.orders.aggregate( [ group_stage ] )\n",
1526 "print(dumps(cursor, indent=2))"
1527 ]
1528 },
1529 {
1530 "cell_type": "code",
1531 "execution_count": null,
1532 "metadata": {},
1533 "outputs": [],
1534 "source": [
1535 "# SELECT custid AS custid, MIN(orderno) as first_order, MAX(orderno) as last_order FROM orders -- with renaming!\n",
1536 "# GROUP BY custid\n",
1537 "\n",
1538 "group_stage = {\"$group\": {\"_id\": \"$custid\", \"first_order\": {\"$min\": \"$orderno\" }, \"last_order\": {\"$max\": \"$orderno\" }}} \n",
1539 "rename_stage = {\"$project\": {\"custid\": \"$_id\", \"firstno\": \"$first_order\", \"lastno\": \"$last_order\", \"_id\": 0}}\n",
1540 "cursor = demodb.orders.aggregate( [ group_stage, rename_stage ] )\n",
1541 "print(dumps(cursor, indent=2))"
1542 ]
1543 },
1544 {
1545 "cell_type": "code",
1546 "execution_count": null,
1547 "metadata": {},
1548 "outputs": [],
1549 "source": [
1550 "#-----------------------------------------------------------------------------------\n",
1551 "# now let's look at how we can perform aggregations over *nested* data\n",
1552 "#-----------------------------------------------------------------------------------"
1553 ]
1554 },
1555 {
1556 "cell_type": "code",
1557 "execution_count": null,
1558 "metadata": {},
1559 "outputs": [],
1560 "source": [
1561 "# SELECT * FROM orders JOIN o.items -- loosely speaking (due to nesting/naming issues)\n",
1562 "\n",
1563 "cursor = demodb.orders.aggregate( [ {\"$unwind\": \"$items\"} ] )\n",
1564 "print(dumps(cursor, indent=2))"
1565 ]
1566 },
1567 {
1568 "cell_type": "code",
1569 "execution_count": null,
1570 "metadata": {},
1571 "outputs": [],
1572 "source": [
1573 "# SELECT i.itemno, SUM(i.qty) AS item_cnt -- a classic grouped-aggregate query, but over nested data\n",
1574 "# FROM orders o JOIN o.items i\n",
1575 "# GROUP BY i.itemno\n",
1576 "# HAVING item_cnt > 100\n",
1577 "# ORDER BY item_cnt DESC\n",
1578 "\n",
1579 "unwind_stage = {\"$unwind\": \"$items\"}\n",
1580 "group_stage = {\"$group\": {\"_id\": \"$items.itemno\", \"item_cnt\": {\"$sum\": \"$items.qty\" }}}\n",
1581 "renaming_stage = {\"$addFields\": {\"itemno\": \"$_id\"}}\n",
1582 "project_stage = {\"$project\": {\"_id\": 0}}\n",
1583 "sort_stage = {\"$sort\": {\"item_cnt\": -1}}\n",
1584 "match_stage = {\"$match\": {\"item_cnt\": {\"$gt\": 100}}}\n",
1585 "pipeline = [ unwind_stage, group_stage, renaming_stage, project_stage, sort_stage, match_stage ]\n",
1586 "cursor = demodb.orders.aggregate( pipeline )\n",
1587 "print(dumps(cursor, indent=2))"
1588 ]
1589 },
1590 {
1591 "cell_type": "code",
1592 "execution_count": null,
1593 "metadata": {},
1594 "outputs": [],
1595 "source": [
1596 "# SELECT i.itemno, SUM(i.qty) AS item_cnt -- take two, changing the ordering of the last two pipeline operations\n",
1597 "# FROM orders o JOIN o.items i\n",
1598 "# GROUP BY i.itemno\n",
1599 "# HAVING item_cnt > 100\n",
1600 "# ORDER BY item_cnt DESC\n",
1601 "\n",
1602 "pipeline = [ unwind_stage, group_stage, renaming_stage, project_stage, match_stage, sort_stage ]\n",
1603 "cursor = demodb.orders.aggregate( pipeline )\n",
1604 "print(dumps(cursor, indent=2))"
1605 ]
1606 },
1607 {
1608 "cell_type": "code",
1609 "execution_count": null,
1610 "metadata": {},
1611 "outputs": [],
1612 "source": [
1613 "# SELECT i.itemno, SUM(i.qty * i.price) AS item_spend -- similar, but now we want to compute something\n",
1614 "# FROM orders o JOIN o.items i\n",
1615 "# GROUP BY i.itemno\n",
1616 "# ORDER BY item_spend DESC\n",
1617 "# LIMIT 3\n",
1618 "\n",
1619 "unwind_stage = {\"$unwind\": \"$items\"}\n",
1620 "math_stage = {\"$addFields\": {\"spend\": {\"$multiply\": [\"$items.qty\", \"$items.price\"]}}}\n",
1621 "group_stage = {\"$group\": {\"_id\": \"$items.itemno\", \"item_spend\": {\"$sum\": \"$spend\"}}}\n",
1622 "renaming_stage = {\"$addFields\": {\"itemno\": \"$_id\"}}\n",
1623 "project_stage = {\"$project\": {\"_id\": 0}}\n",
1624 "sort_stage = {\"$sort\": {\"item_spend\": -1}}\n",
1625 "limit_stage = {\"$limit\": 3}\n",
1626 "pipeline = [ unwind_stage, math_stage, group_stage, renaming_stage, project_stage, sort_stage, limit_stage ]\n",
1627 "cursor = demodb.orders.aggregate( pipeline )\n",
1628 "print(dumps(cursor, indent=2))"
1629 ]
1630 },
1631 {
1632 "cell_type": "code",
1633 "execution_count": null,
1634 "metadata": {},
1635 "outputs": [],
1636 "source": [
1637 "#-----------------------------------------------------------------------------------\n",
1638 "# what about actual (i.e., cross-collection, SQL style) joins?\n",
1639 "#-----------------------------------------------------------------------------------"
1640 ]
1641 },
1642 {
1643 "cell_type": "code",
1644 "execution_count": null,
1645 "metadata": {},
1646 "outputs": [],
1647 "source": [
1648 "demodb.customers.find_one( {} )"
1649 ]
1650 },
1651 {
1652 "cell_type": "code",
1653 "execution_count": null,
1654 "metadata": {},
1655 "outputs": [],
1656 "source": [
1657 "demodb.orders.find_one( {} )"
1658 ]
1659 },
1660 {
1661 "cell_type": "code",
1662 "execution_count": null,
1663 "metadata": {},
1664 "outputs": [],
1665 "source": [
1666 "# SELECT * FROM customers c LEFT OUTER JOIN orders o WHERE c.name LIKE 'T%' AND c._id = o.custid\n",
1667 "\n",
1668 "match_stage = {\"$match\": {\"name\": {\"$regex\": \"^T.*\"}} }\n",
1669 "lookup_stage = {\"$lookup\": {\"from\": \"orders\", \"localField\": \"custid\", \"foreignField\": \"custid\", \"as\": \"orders\"}}\n",
1670 "project_stage = {\"$project\": {\"_id\": 0}}\n",
1671 "cursor = demodb.customers.aggregate( [ match_stage, lookup_stage, project_stage ] )\n",
1672 "print(dumps(cursor, indent=2))"
1673 ]
1674 },
1675 {
1676 "cell_type": "code",
1677 "execution_count": null,
1678 "metadata": {},
1679 "outputs": [],
1680 "source": [
1681 "# ---> now we've turned linking into embedding above and can proceed as previously shown (using $unwind, etc.) <---"
1682 ]
1683 },
1684 {
1685 "cell_type": "code",
1686 "execution_count": null,
1687 "metadata": {},
1688 "outputs": [],
1689 "source": [
1690 "#-----------------------------------------------------------------------------------\n",
1691 "# what about cross-collection joins involving more than one large collection?\n",
1692 "#-----------------------------------------------------------------------------------"
1693 ]
1694 },
1695 {
1696 "cell_type": "code",
1697 "execution_count": null,
1698 "metadata": {},
1699 "outputs": [],
1700 "source": [
1701 "# SELECT * FROM customers c LEFT OUTER JOIN orders o WHERE c.name LIKE 'T%' AND c._id = o.custid\n",
1702 "# (client-side application program version, formerly required for \"scaling out\" to multiple MongoDB shards)\n",
1703 "\n",
1704 "custs = list(demodb.customers.find( {\"name\": {\"$regex\": \"^T.*\"}}, {\"_id\": 0} ))\n",
1705 "for i, customer in enumerate(custs):\n",
1706 " cust_orders = demodb.orders.find( {\"custid\": customer[\"custid\"]} )\n",
1707 " custs[i][\"orders\"] = cust_orders\n",
1708 "print(dumps(custs, indent=2))"
1709 ]
1710 },
1711 {
1712 "cell_type": "code",
1713 "execution_count": null,
1714 "metadata": {},
1715 "outputs": [],
1716 "source": [
1717 "# REMINDER: Return to the slide deck once more to briefly cover MongoDB's approach to replication and consistency"
1718 ]
1719 }
1720 ],
1721 "metadata": {
1722 "kernelspec": {
1723 "display_name": "Python 3",
1724 "language": "python",
1725 "name": "python3"
1726 },
1727 "language_info": {
1728 "codemirror_mode": {
1729 "name": "ipython",
1730 "version": 3
1731 },
1732 "file_extension": ".py",
1733 "mimetype": "text/x-python",
1734 "name": "python",
1735 "nbconvert_exporter": "python",
1736 "pygments_lexer": "ipython3",
1737 "version": "3.8.3"
1738 }
1739 },
1740 "nbformat": 4,
1741 "nbformat_minor": 2
1742}