diff --git a/_site.yml b/_site.yml index 039f4b2..0c97af1 100644 --- a/_site.yml +++ b/_site.yml @@ -40,10 +40,8 @@ navbar: href: slides/12-Generalized_Linear_Mixed_Models.html - text: "13-Generalized Additive Models" href: slides/13-Generalized_Additive_Models.html - - text: "14-Cluster Analysis" - href: slides/14-Cluster_Analysis.html - - text: "15-Ordination" - href: slides/15-Ordination.html + - text: "14-Classification" + href: slides/14-Classification.html - text: "Labs" menu: - text: "01-Introduction" @@ -72,8 +70,6 @@ navbar: href: exercises/12-Generalized_Linear_Mixed_Models.html - text: "13-Generalized Additive Models" href: exercises/13-Generalized_Additive_Models.html - - text: "14-Cluster Analysis" - href: exercises/14-Cluster_Analysis.html - - text: "15-Ordination" - href: exercises/15-Ordination.html + - text: "14-Classification" + href: exercises/14-Classification.html output: distill::distill_article diff --git a/datasets/darts_and_arrows.csv b/datasets/darts_and_arrows.csv new file mode 100644 index 0000000..375e5a2 --- /dev/null +++ b/datasets/darts_and_arrows.csv @@ -0,0 +1,173 @@ +id,length,width,thickness,neck,provenience,type +349188,41.1,17.4,3.4,8.3,"White Dog Cave, Arizona",dart +1955815,35.8,24.2,7.2,15.3,"Allred Shelter, Missouri",dart +D2429,81.7,20.1,6.4,21.9,"Kimberley, NW Australia",dart +97179,21.8,14,2.9,9.8,"Steamboat Cave, New Mexico",dart +97179,39.5,20.5,4.3,11.2,"Steamboat Cave, New Mexico",dart +A4515-1/6,55.8,22.6,4.2,13.6,"Cave 2, San Juan County, Utah",dart +A4515-1/7,70.4,20,4.2,14.9,"Cave 2, San Juan County, Utah",dart +A4515-1/8,60.4,20.2,4,13.9,"Cave 2, San Juan County, Utah",dart +A4515-1/9,54.1,20.3,4.9,15.3,"Cave 2, San Juan County, Utah",dart +A4515-1/10,60.4,20.5,4.6,10.4,"Cave 2, San Juan County, Utah",dart +A4515-1/11,60.7,24.9,4.5,18.8,"Cave 2, San Juan County, Utah",dart +A4515.30-47,34.3,20.4,4.8,13.4,"Cave 2, San Juan County, Utah",dart +A4515.30-48,26.5,23.7,5.2,16.7,"Cave 2, San Juan County, Utah",dart +NA-7031,42.3,16.2,5.4,11.4,"Point Barrow, Alaska",dart +SA-3758,38,18,5,16.5,"Nazca vicinity, Peru",dart +P29-30.4G1,44.1,24,5.2,17.5,"Sand Dune Cave, Utah",dart +P29-30.4G2,54.6,27.5,4.8,15.6,"Sand Dune Cave, Utah",dart +P29-30.4G3,49.2,24.6,5.7,19.1,"Sand Dune Cave, Utah",dart +P29-30.4G4,48.5,17.4,4.6,13.3,"Sand Dune Cave, Utah",dart +P29-30.4G5,57.4,23.4,5.4,17.7,"Sand Dune Cave, Utah",dart +P29-30.4G6,63,23.1,5.1,19.1,"Sand Dune Cave, Utah",dart +66.56.3.3,57.2,27.4,4,17.9,"San Juan County, Utah",dart +66.55.3.3,58.8,28,5.5,18.2,"San Juan County, Utah",dart +66.56.5.1,60.4,30,5.8,19.2,"San Juan County, Utah",dart +66.55.3.1,52.8,27.5,5.1,17.4,"San Juan County, Utah",dart +66.56.3.1,85.3,32,5.8,19.3,"San Juan County, Utah",dart +66.55.3.2,66.5,28.4,5,13.8,"San Juan County, Utah",dart +66.56.5.2,67.1,29.7,5.5,19.9,"San Juan County, Utah",dart +95.2.147.1,48.9,22.5,5.5,12.5,"San Juan County, Utah",dart +95.2.115.1,61.3,24.6,6,16.8,"San Juan County, Utah",dart +A3048,38.3,19.7,3.3,11.2,"White Dog Cave, Arizona",dart +A3117,56.4,25,4.8,16.1,"White Dog Cave, Arizona",dart +A2814,39.1,19.4,4.4,10.9,"White Dog Cave, Arizona",dart +A3048,39.9,15.4,3.1,10.3,"White Dog Cave, Arizona",dart +96746,50.6,26.2,7.2,15.1,"Ceremonial Cave, Texas",dart +96745,39,19,6.7,11.4,"Ceremonial Cave, Texas",dart +A5582,57.3,29.2,4.7,16.9,"Broken Roof Cave, Arizona",dart +A5528,65.4,29.6,4.1,16.3,"Broken Roof Cave, Arizona",dart +97179,42.4,25,4.2,13.4,"Steamboat Cave, New Mexico",dart +NA,34,20.3,6,15,"Potter Creek Cave, California",dart +50.2/5213E,41.1,16.5,3.6,9.7,California (?),arrow +50.2/5213F,38.2,16,4.1,8.8,California (?),arrow +50.2/5213A,44.7,15.4,3.6,7.4,California (?),arrow +50.1/6135D,33.7,12.6,3.8,8.9,"Hupa (Klamath River, California)",arrow +50.1/6135G,32.6,17.6,4.2,12.2,"Hupa (Klamath River, California)",arrow +50.1/6135B,35,19.6,5.2,14.6,"Hupa (Klamath River, California)",arrow +50.1/2206D,30.1,17.5,4.5,12.4,Blackfoot,arrow +50.2/2488,36,17.9,4.2,14.5,Unlocalized Plains,arrow +50/9144B,37.2,13.3,4.2,9.2,White Mt. Apache (Arizona),arrow +Jan-37,31,14.6,3,8.5,Mt. Indians (Coast Range California),arrow +50/1650B,27.8,14.9,4.4,11.6,Shasta (California),arrow +50.1/5902,75.9,28.5,9.8,20.9,Menomini (Wisconsin),arrow +50/284C,24.8,12.8,3.1,7.8,"Noko (Mooretown, California)",arrow +50/881A,22.5,12.4,2.2,6.5,Towallamings,arrow +50/881G,19.3,12.2,2.3,6,Towallamings,arrow +50/881H,21.6,12.5,2.9,7,Towallamings,arrow +50/881C,20.5,11.8,2.7,6.5,Towallamings,arrow +50/881E,23.3,11.6,2.7,5.2,Towallamings,arrow +50/881L,21,12.5,2.8,6.7,Towallamings,arrow +50.2/1129,37.7,29.3,8.2,13,Unlocalized North America,arrow +50/9163A,29,16.2,4.2,12,White Mt. Apache (Arizona),arrow +50/9163B,30.8,18.1,3.7,13.7,White Mt. Apache (Arizona),arrow +50.1/2625D,15,11.2,2.5,6.3,Nambe (?) New Mexico,arrow +50.2/1223,32.7,16.4,4.7,12.8,Zuni (Arizona),arrow +50.1/2625A,17,10.8,1.8,5.1,Nambe (?) New Mexico,arrow +50.2/2486,27.9,14.2,4.2,7.3,Unlocalized Plains,arrow +50/9144B,35.1,13.8,3.3,11.3,White Mt. Apache (Arizona),arrow +50/5757F,17.2,9.6,2.3,7.3,Apache,arrow +50.1/4868,21.2,15.4,4,10.7,Narez (Sonora),arrow +H/4594,32.5,10.6,3.2,7.6,Unlocalized North America,arrow +16/8642B,22.9,11.6,4.1,9.3,Thompson River,arrow +50/9144B,32.5,12.2,4.4,10.4,White Mt. Apache (Arizona),arrow +50/9144B,35.1,12.8,3.5,10.8,White Mt. Apache (Arizona),arrow +50.2/2483,18,9.9,2.7,5.9,Unlocalized Plains,arrow +50.1/2720A,19.7,11,2.5,9.2,San Idlefonso (New Mexico),arrow +50.1/2720B,18.8,11.7,3.5,8.7,San Idlefonso (New Mexico),arrow +50.1/2720C,14.9,9.2,2.6,7,San Idlefonso (New Mexico),arrow +50/9834,52.1,32.4,9,19.6,Menomini (Wisconsin),arrow +50/9144B,29,12.5,4,8.7,White Mt. Apache (Arizona),arrow +50/9144B,37.4,13.5,3.7,10.7,White Mt. Apache (Arizona),arrow +50/9144B,40.4,13.5,5,10.4,White Mt. Apache (Arizona),arrow +50/9144B,38.3,12.6,5,10.5,White Mt. Apache (Arizona),arrow +50/9144B,37.2,12.3,4.1,10.4,White Mt. Apache (Arizona),arrow +50/9144B,41.2,13.1,4.4,10.7,White Mt. Apache (Arizona),arrow +50/9144B,36.4,13.4,5,11.2,White Mt. Apache (Arizona),arrow +50/9144B,30.9,14.3,4.3,10.4,White Mt. Apache (Arizona),arrow +50/9144B,33.7,13.2,5.5,9.4,White Mt. Apache (Arizona),arrow +50/9144B,34.6,13.2,4.7,10.3,White Mt. Apache (Arizona),arrow +50/9144B,35,14.2,5.8,10.1,White Mt. Apache (Arizona),arrow +50/9144B,31,12,4.4,9.6,White Mt. Apache (Arizona),arrow +50/9144B,40.8,12.8,4.8,10.5,White Mt. Apache (Arizona),arrow +50/9144B,32.5,12.6,4.4,9.9,White Mt. Apache (Arizona),arrow +50/9116C,43.6,24.1,4.8,17.7,White Mt. Apache (Arizona),arrow +50.2/3777,21.8,19.3,3.1,8.4,"Paviotso (Pyramid Lake, Nevada)",arrow +50.2/3776,26.9,20.4,7,12.5,"Paviotso (Pyramid Lake, Nevada)",arrow +50/9144B,37.8,13.4,5.4,10.5,White Mt. Apache (Arizona),arrow +50/9144B,37.8,13,4.6,9.5,White Mt. Apache (Arizona),arrow +50/9144C,40,12.6,5.7,9.8,White Mt. Apache (Arizona),arrow +50/9144B,29,14.5,5.1,10,White Mt. Apache (Arizona),arrow +50/284B,26,12.2,3.6,6.9,"Noko (Mooretown, California)",arrow +50.1/6525A,21,10.3,5.3,9.1,Papago,arrow +A5_,43,20.9,3.7,15.3,Unlocalized North America,arrow +A7_,51.8,22.4,4.1,15.3,Unlocalized North America,arrow +A9_,43.2,18.4,4.3,11.6,Unlocalized North America,arrow +A8_,35.5,18,6.3,15,Unlocalized North America,arrow +A2_,45,18.9,2.8,12.7,Unlocalized North America,arrow +A6_,55.4,24,3.4,15.5,Unlocalized North America,arrow +A1_,42.6,20,3,15.3,Unlocalized North America,arrow +A3_,40,17.9,4.6,12.3,Unlocalized North America,arrow +50/1650A,30,16.6,4.8,9.5,Shasta (California),arrow +50/1650C,33.9,14.4,4.6,11.8,Shasta (California),arrow +50/4148A,24.9,11.7,3.2,7.8,Hot Creek? (California),arrow +50/4148C,31.4,12.8,4.2,7.6,Hot Creek? (California),arrow +50/4148B,38,12.7,4.7,9.1,Hot Creek? (California),arrow +50/4148,29.5,12.1,3,7.4,Hot Creek? (California),arrow +50/4107B,27.7,15.1,6,10,Metawhiya,arrow +50/4107E,19.8,13.3,3.8,9.4,Metawhiya,arrow +50/4109D,23.3,11.7,3.6,10.5,Yuma,arrow +50/4109E,25.6,11,3.8,8.5,Yuma,arrow +50/4109F,28.1,12.1,3.7,10.7,Yuma,arrow +50/4109B,26.2,12.9,4.2,9.7,Yuma,arrow +50/9163C,32.7,18.5,4.2,14.6,White Mt. Apache (Arizona),arrow +50/9163C,27.1,14.6,5.3,10.7,White Mt. Apache (Arizona),arrow +50/9163C,31,19.2,5.4,12.8,White Mt. Apache (Arizona),arrow +50/9163C,30,17.6,3.2,12.2,White Mt. Apache (Arizona),arrow +50/9163C,35,16.8,5.6,12,White Mt. Apache (Arizona),arrow +50/9163C,33.9,17.1,4.5,11.9,White Mt. Apache (Arizona),arrow +50/9163C,36.3,16.9,6.1,12,White Mt. Apache (Arizona),arrow +A4,40.1,21,3,14.1,Unlocalized North America,arrow +50.2/5213(H),42.4,18.9,4.3,9.5,California (?),arrow +16/1277,45.8,17.5,4.3,7,Thompson River,arrow +16/1278,39.2,17,4.6,8.2,Thompson River,arrow +50/284A,48.6,18.8,3.5,10.3,"Noko (Mooretown, California)",arrow +50.1/2625B,15.2,12,2.1,7.8,Nambe (?) New Mexico,arrow +50.1/2625C,13.4,9.4,1.7,5.1,Nambe (?) New Mexico,arrow +1/2708C,34.4,18.2,3.5,11.2,Nez Perce (Idaho),arrow +5.02/5215A,25.3,15.9,4.9,11.8,California (?),arrow +5.02/5215B,31.8,14.6,4.3,11.5,California (?),arrow +5.02/5215D,31.5,15,4.8,9.2,California (?),arrow +5.02/5215C,30.9,15.5,3.4,12.7,California (?),arrow +50.1/4875,23.2,14.2,4.1,10.5,Narez (Sonora),arrow +50/4148J,37.7,13.5,4.6,9.5,Hat Creek (?) (California?),arrow +50/4148E,29,11.2,3.5,9,Hat Creek (?) (California?),arrow +50/4148C,24.4,12.5,3.1,8.1,Hat Creek (?) (California?),arrow +50/4148H,28.2,13.7,3.7,9.1,Hat Creek (?) (California?),arrow +50/4148I,25.3,13.4,3.8,8.2,Hat Creek (?) (California?),arrow +50/4148D,44.2,13.2,6.1,9,Hat Creek (?) (California?),arrow +50/9163C,29,16.9,3.9,12.8,White Mt. Apache (Arizona),arrow +50/9163C,45,17.6,3.2,12.8,White Mt. Apache (Arizona),arrow +50/9163C,30.7,16.8,3.3,11.7,White Mt. Apache (Arizona),arrow +50/9163C,32.7,16.6,5.4,12.5,White Mt. Apache (Arizona),arrow +16/9062C,29.6,13.8,2.8,6.3,Unlocalized North America,arrow +50.1/3027,18.4,15.2,2.9,10.7,Taos (New Mexico),arrow +50.1/2857A,19.2,10.7,4.4,7.4,San Idlefonso (New Mexico),arrow +50.1/2857B,32.5,17.7,5,12.6,San Idlefonso (New Mexico),arrow +50.1/2857C,26.5,13.6,3.2,8.9,San Idlefonso (New Mexico),arrow +50.1/2857D,20.7,12.2,2.4,7.4,San Idlefonso (New Mexico),arrow +50.1/2857E,23.7,16.4,3.7,10.6,San Idlefonso (New Mexico),arrow +H-4569,22.3,10.8,2.7,6.4,Pueblo Bonito (New Mexico),arrow +H-4582,23.2,11.1,2.8,8.3,Pueblo Bonito (New Mexico),arrow +H-4575,31.3,11.8,2.7,6.8,Pueblo Bonito (New Mexico),arrow +H-4576,21.3,9.7,2.7,6.7,Pueblo Bonito (New Mexico),arrow +H-4584,21,11.3,2.3,7.3,Pueblo Bonito (New Mexico),arrow +H-4595,22.5,12.3,2.8,6.6,Pueblo Bonito (New Mexico),arrow +H-4585,24.9,10.2,2.4,6.7,Pueblo Bonito (New Mexico),arrow +H-4583,24.3,13,2.9,7.5,Pueblo Bonito (New Mexico),arrow +H-4581,27.3,12.6,2.7,8,Pueblo Bonito (New Mexico),arrow +H-4577,24.8,11.5,3.4,7.6,Pueblo Bonito (New Mexico),arrow +H-4591,25.1,10.4,2.7,7.3,Pueblo Bonito (New Mexico),arrow +H-4572,30.8,11.2,2.8,6.5,Pueblo Bonito (New Mexico),arrow +H-4586,27.7,10.8,3,6.4,Pueblo Bonito (New Mexico),arrow +H-4571,29.2,11.7,3.2,7.3,Pueblo Bonito (New Mexico),arrow diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 9e90afe..b5036f6 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -2074,8 +2074,7 @@ 11-Generalized Linear Models 3 12-Generalized Linear Mixed Models 13-Generalized Additive Models -14-Cluster Analysis -15-Ordination +14-Classification diff --git a/docs/datasets/darts_and_arrows.csv b/docs/datasets/darts_and_arrows.csv new file mode 100644 index 0000000..375e5a2 --- /dev/null +++ b/docs/datasets/darts_and_arrows.csv @@ -0,0 +1,173 @@ +id,length,width,thickness,neck,provenience,type +349188,41.1,17.4,3.4,8.3,"White Dog Cave, Arizona",dart +1955815,35.8,24.2,7.2,15.3,"Allred Shelter, Missouri",dart +D2429,81.7,20.1,6.4,21.9,"Kimberley, NW Australia",dart +97179,21.8,14,2.9,9.8,"Steamboat Cave, New Mexico",dart +97179,39.5,20.5,4.3,11.2,"Steamboat Cave, New Mexico",dart +A4515-1/6,55.8,22.6,4.2,13.6,"Cave 2, San Juan County, Utah",dart +A4515-1/7,70.4,20,4.2,14.9,"Cave 2, San Juan County, Utah",dart +A4515-1/8,60.4,20.2,4,13.9,"Cave 2, San Juan County, Utah",dart +A4515-1/9,54.1,20.3,4.9,15.3,"Cave 2, San Juan County, Utah",dart +A4515-1/10,60.4,20.5,4.6,10.4,"Cave 2, San Juan County, Utah",dart +A4515-1/11,60.7,24.9,4.5,18.8,"Cave 2, San Juan County, Utah",dart +A4515.30-47,34.3,20.4,4.8,13.4,"Cave 2, San Juan County, Utah",dart +A4515.30-48,26.5,23.7,5.2,16.7,"Cave 2, San Juan County, Utah",dart +NA-7031,42.3,16.2,5.4,11.4,"Point Barrow, Alaska",dart +SA-3758,38,18,5,16.5,"Nazca vicinity, Peru",dart +P29-30.4G1,44.1,24,5.2,17.5,"Sand Dune Cave, Utah",dart +P29-30.4G2,54.6,27.5,4.8,15.6,"Sand Dune Cave, Utah",dart +P29-30.4G3,49.2,24.6,5.7,19.1,"Sand Dune Cave, Utah",dart +P29-30.4G4,48.5,17.4,4.6,13.3,"Sand Dune Cave, Utah",dart +P29-30.4G5,57.4,23.4,5.4,17.7,"Sand Dune Cave, Utah",dart +P29-30.4G6,63,23.1,5.1,19.1,"Sand Dune Cave, Utah",dart +66.56.3.3,57.2,27.4,4,17.9,"San Juan County, Utah",dart +66.55.3.3,58.8,28,5.5,18.2,"San Juan County, Utah",dart +66.56.5.1,60.4,30,5.8,19.2,"San Juan County, Utah",dart +66.55.3.1,52.8,27.5,5.1,17.4,"San Juan County, Utah",dart +66.56.3.1,85.3,32,5.8,19.3,"San Juan County, Utah",dart +66.55.3.2,66.5,28.4,5,13.8,"San Juan County, Utah",dart +66.56.5.2,67.1,29.7,5.5,19.9,"San Juan County, Utah",dart +95.2.147.1,48.9,22.5,5.5,12.5,"San Juan County, Utah",dart +95.2.115.1,61.3,24.6,6,16.8,"San Juan County, Utah",dart +A3048,38.3,19.7,3.3,11.2,"White Dog Cave, Arizona",dart +A3117,56.4,25,4.8,16.1,"White Dog Cave, Arizona",dart +A2814,39.1,19.4,4.4,10.9,"White Dog Cave, Arizona",dart +A3048,39.9,15.4,3.1,10.3,"White Dog Cave, Arizona",dart +96746,50.6,26.2,7.2,15.1,"Ceremonial Cave, Texas",dart +96745,39,19,6.7,11.4,"Ceremonial Cave, Texas",dart +A5582,57.3,29.2,4.7,16.9,"Broken Roof Cave, Arizona",dart +A5528,65.4,29.6,4.1,16.3,"Broken Roof Cave, Arizona",dart +97179,42.4,25,4.2,13.4,"Steamboat Cave, New Mexico",dart +NA,34,20.3,6,15,"Potter Creek Cave, California",dart +50.2/5213E,41.1,16.5,3.6,9.7,California (?),arrow +50.2/5213F,38.2,16,4.1,8.8,California (?),arrow +50.2/5213A,44.7,15.4,3.6,7.4,California (?),arrow +50.1/6135D,33.7,12.6,3.8,8.9,"Hupa (Klamath River, California)",arrow +50.1/6135G,32.6,17.6,4.2,12.2,"Hupa (Klamath River, California)",arrow +50.1/6135B,35,19.6,5.2,14.6,"Hupa (Klamath River, California)",arrow +50.1/2206D,30.1,17.5,4.5,12.4,Blackfoot,arrow +50.2/2488,36,17.9,4.2,14.5,Unlocalized Plains,arrow +50/9144B,37.2,13.3,4.2,9.2,White Mt. Apache (Arizona),arrow +Jan-37,31,14.6,3,8.5,Mt. Indians (Coast Range California),arrow +50/1650B,27.8,14.9,4.4,11.6,Shasta (California),arrow +50.1/5902,75.9,28.5,9.8,20.9,Menomini (Wisconsin),arrow +50/284C,24.8,12.8,3.1,7.8,"Noko (Mooretown, California)",arrow +50/881A,22.5,12.4,2.2,6.5,Towallamings,arrow +50/881G,19.3,12.2,2.3,6,Towallamings,arrow +50/881H,21.6,12.5,2.9,7,Towallamings,arrow +50/881C,20.5,11.8,2.7,6.5,Towallamings,arrow +50/881E,23.3,11.6,2.7,5.2,Towallamings,arrow +50/881L,21,12.5,2.8,6.7,Towallamings,arrow +50.2/1129,37.7,29.3,8.2,13,Unlocalized North America,arrow +50/9163A,29,16.2,4.2,12,White Mt. Apache (Arizona),arrow +50/9163B,30.8,18.1,3.7,13.7,White Mt. Apache (Arizona),arrow +50.1/2625D,15,11.2,2.5,6.3,Nambe (?) New Mexico,arrow +50.2/1223,32.7,16.4,4.7,12.8,Zuni (Arizona),arrow +50.1/2625A,17,10.8,1.8,5.1,Nambe (?) New Mexico,arrow +50.2/2486,27.9,14.2,4.2,7.3,Unlocalized Plains,arrow +50/9144B,35.1,13.8,3.3,11.3,White Mt. Apache (Arizona),arrow +50/5757F,17.2,9.6,2.3,7.3,Apache,arrow +50.1/4868,21.2,15.4,4,10.7,Narez (Sonora),arrow +H/4594,32.5,10.6,3.2,7.6,Unlocalized North America,arrow +16/8642B,22.9,11.6,4.1,9.3,Thompson River,arrow +50/9144B,32.5,12.2,4.4,10.4,White Mt. Apache (Arizona),arrow +50/9144B,35.1,12.8,3.5,10.8,White Mt. Apache (Arizona),arrow +50.2/2483,18,9.9,2.7,5.9,Unlocalized Plains,arrow +50.1/2720A,19.7,11,2.5,9.2,San Idlefonso (New Mexico),arrow +50.1/2720B,18.8,11.7,3.5,8.7,San Idlefonso (New Mexico),arrow +50.1/2720C,14.9,9.2,2.6,7,San Idlefonso (New Mexico),arrow +50/9834,52.1,32.4,9,19.6,Menomini (Wisconsin),arrow +50/9144B,29,12.5,4,8.7,White Mt. Apache (Arizona),arrow +50/9144B,37.4,13.5,3.7,10.7,White Mt. Apache (Arizona),arrow +50/9144B,40.4,13.5,5,10.4,White Mt. Apache (Arizona),arrow +50/9144B,38.3,12.6,5,10.5,White Mt. Apache (Arizona),arrow +50/9144B,37.2,12.3,4.1,10.4,White Mt. Apache (Arizona),arrow +50/9144B,41.2,13.1,4.4,10.7,White Mt. Apache (Arizona),arrow +50/9144B,36.4,13.4,5,11.2,White Mt. Apache (Arizona),arrow +50/9144B,30.9,14.3,4.3,10.4,White Mt. Apache (Arizona),arrow +50/9144B,33.7,13.2,5.5,9.4,White Mt. Apache (Arizona),arrow +50/9144B,34.6,13.2,4.7,10.3,White Mt. Apache (Arizona),arrow +50/9144B,35,14.2,5.8,10.1,White Mt. Apache (Arizona),arrow +50/9144B,31,12,4.4,9.6,White Mt. Apache (Arizona),arrow +50/9144B,40.8,12.8,4.8,10.5,White Mt. Apache (Arizona),arrow +50/9144B,32.5,12.6,4.4,9.9,White Mt. Apache (Arizona),arrow +50/9116C,43.6,24.1,4.8,17.7,White Mt. Apache (Arizona),arrow +50.2/3777,21.8,19.3,3.1,8.4,"Paviotso (Pyramid Lake, Nevada)",arrow +50.2/3776,26.9,20.4,7,12.5,"Paviotso (Pyramid Lake, Nevada)",arrow +50/9144B,37.8,13.4,5.4,10.5,White Mt. Apache (Arizona),arrow +50/9144B,37.8,13,4.6,9.5,White Mt. Apache (Arizona),arrow +50/9144C,40,12.6,5.7,9.8,White Mt. Apache (Arizona),arrow +50/9144B,29,14.5,5.1,10,White Mt. Apache (Arizona),arrow +50/284B,26,12.2,3.6,6.9,"Noko (Mooretown, California)",arrow +50.1/6525A,21,10.3,5.3,9.1,Papago,arrow +A5_,43,20.9,3.7,15.3,Unlocalized North America,arrow +A7_,51.8,22.4,4.1,15.3,Unlocalized North America,arrow +A9_,43.2,18.4,4.3,11.6,Unlocalized North America,arrow +A8_,35.5,18,6.3,15,Unlocalized North America,arrow +A2_,45,18.9,2.8,12.7,Unlocalized North America,arrow +A6_,55.4,24,3.4,15.5,Unlocalized North America,arrow +A1_,42.6,20,3,15.3,Unlocalized North America,arrow +A3_,40,17.9,4.6,12.3,Unlocalized North America,arrow +50/1650A,30,16.6,4.8,9.5,Shasta (California),arrow +50/1650C,33.9,14.4,4.6,11.8,Shasta (California),arrow +50/4148A,24.9,11.7,3.2,7.8,Hot Creek? (California),arrow +50/4148C,31.4,12.8,4.2,7.6,Hot Creek? (California),arrow +50/4148B,38,12.7,4.7,9.1,Hot Creek? (California),arrow +50/4148,29.5,12.1,3,7.4,Hot Creek? (California),arrow +50/4107B,27.7,15.1,6,10,Metawhiya,arrow +50/4107E,19.8,13.3,3.8,9.4,Metawhiya,arrow +50/4109D,23.3,11.7,3.6,10.5,Yuma,arrow +50/4109E,25.6,11,3.8,8.5,Yuma,arrow +50/4109F,28.1,12.1,3.7,10.7,Yuma,arrow +50/4109B,26.2,12.9,4.2,9.7,Yuma,arrow +50/9163C,32.7,18.5,4.2,14.6,White Mt. Apache (Arizona),arrow +50/9163C,27.1,14.6,5.3,10.7,White Mt. Apache (Arizona),arrow +50/9163C,31,19.2,5.4,12.8,White Mt. Apache (Arizona),arrow +50/9163C,30,17.6,3.2,12.2,White Mt. Apache (Arizona),arrow +50/9163C,35,16.8,5.6,12,White Mt. Apache (Arizona),arrow +50/9163C,33.9,17.1,4.5,11.9,White Mt. Apache (Arizona),arrow +50/9163C,36.3,16.9,6.1,12,White Mt. Apache (Arizona),arrow +A4,40.1,21,3,14.1,Unlocalized North America,arrow +50.2/5213(H),42.4,18.9,4.3,9.5,California (?),arrow +16/1277,45.8,17.5,4.3,7,Thompson River,arrow +16/1278,39.2,17,4.6,8.2,Thompson River,arrow +50/284A,48.6,18.8,3.5,10.3,"Noko (Mooretown, California)",arrow +50.1/2625B,15.2,12,2.1,7.8,Nambe (?) New Mexico,arrow +50.1/2625C,13.4,9.4,1.7,5.1,Nambe (?) New Mexico,arrow +1/2708C,34.4,18.2,3.5,11.2,Nez Perce (Idaho),arrow +5.02/5215A,25.3,15.9,4.9,11.8,California (?),arrow +5.02/5215B,31.8,14.6,4.3,11.5,California (?),arrow +5.02/5215D,31.5,15,4.8,9.2,California (?),arrow +5.02/5215C,30.9,15.5,3.4,12.7,California (?),arrow +50.1/4875,23.2,14.2,4.1,10.5,Narez (Sonora),arrow +50/4148J,37.7,13.5,4.6,9.5,Hat Creek (?) (California?),arrow +50/4148E,29,11.2,3.5,9,Hat Creek (?) (California?),arrow +50/4148C,24.4,12.5,3.1,8.1,Hat Creek (?) (California?),arrow +50/4148H,28.2,13.7,3.7,9.1,Hat Creek (?) (California?),arrow +50/4148I,25.3,13.4,3.8,8.2,Hat Creek (?) (California?),arrow +50/4148D,44.2,13.2,6.1,9,Hat Creek (?) (California?),arrow +50/9163C,29,16.9,3.9,12.8,White Mt. Apache (Arizona),arrow +50/9163C,45,17.6,3.2,12.8,White Mt. Apache (Arizona),arrow +50/9163C,30.7,16.8,3.3,11.7,White Mt. Apache (Arizona),arrow +50/9163C,32.7,16.6,5.4,12.5,White Mt. Apache (Arizona),arrow +16/9062C,29.6,13.8,2.8,6.3,Unlocalized North America,arrow +50.1/3027,18.4,15.2,2.9,10.7,Taos (New Mexico),arrow +50.1/2857A,19.2,10.7,4.4,7.4,San Idlefonso (New Mexico),arrow +50.1/2857B,32.5,17.7,5,12.6,San Idlefonso (New Mexico),arrow +50.1/2857C,26.5,13.6,3.2,8.9,San Idlefonso (New Mexico),arrow +50.1/2857D,20.7,12.2,2.4,7.4,San Idlefonso (New Mexico),arrow +50.1/2857E,23.7,16.4,3.7,10.6,San Idlefonso (New Mexico),arrow +H-4569,22.3,10.8,2.7,6.4,Pueblo Bonito (New Mexico),arrow +H-4582,23.2,11.1,2.8,8.3,Pueblo Bonito (New Mexico),arrow +H-4575,31.3,11.8,2.7,6.8,Pueblo Bonito (New Mexico),arrow +H-4576,21.3,9.7,2.7,6.7,Pueblo Bonito (New Mexico),arrow +H-4584,21,11.3,2.3,7.3,Pueblo Bonito (New Mexico),arrow +H-4595,22.5,12.3,2.8,6.6,Pueblo Bonito (New Mexico),arrow +H-4585,24.9,10.2,2.4,6.7,Pueblo Bonito (New Mexico),arrow +H-4583,24.3,13,2.9,7.5,Pueblo Bonito (New Mexico),arrow +H-4581,27.3,12.6,2.7,8,Pueblo Bonito (New Mexico),arrow +H-4577,24.8,11.5,3.4,7.6,Pueblo Bonito (New Mexico),arrow +H-4591,25.1,10.4,2.7,7.3,Pueblo Bonito (New Mexico),arrow +H-4572,30.8,11.2,2.8,6.5,Pueblo Bonito (New Mexico),arrow +H-4586,27.7,10.8,3,6.4,Pueblo Bonito (New Mexico),arrow +H-4571,29.2,11.7,3.2,7.3,Pueblo Bonito (New Mexico),arrow diff --git a/docs/help.html b/docs/help.html index 4851acf..36e72a3 100644 --- a/docs/help.html +++ b/docs/help.html @@ -2151,8 +2151,7 @@ 11-Generalized Linear Models 3 12-Generalized Linear Mixed Models 13-Generalized Additive Models -14-Cluster Analysis -15-Ordination +14-Classification diff --git a/docs/index.html b/docs/index.html index 849632e..a3bbdd2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -2148,8 +2148,7 @@ 11-Generalized Linear Models 3 12-Generalized Linear Mixed Models 13-Generalized Additive Models -14-Cluster Analysis -15-Ordination +14-Classification diff --git a/docs/search.json b/docs/search.json index 8b1b492..e3456ff 100644 --- a/docs/search.json +++ b/docs/search.json @@ -6,20 +6,20 @@ "description": "", "author": [], "contents": "\r\n\r\nContents\r\nThe Google Search\r\nParadox\r\nR helpers\r\nRStudio helpers\r\nR Community helpers\r\nOther resources\r\nReproducible examples\r\n\r\n\r\n\r\n\r\n\r\n\r\nFigure 1: Wisdom of the Ancients (xkcd 979).\r\n\r\n\r\n\r\n\r\nThe Google Search Paradox\r\n\r\nAs you make your first tentative forays into the R programming\r\nenvironment, you will on occasion experience the jarring dislocation of\r\nan R error, a typically bright red eruption of your R console, perhaps\r\nsymbolic of your code exploding before your eyes. Here is one of the\r\nmore infamous errors you are likely to encounter:\r\nobject of type 'closure' is not subsettable\r\nNever mind what this particular error means.* The point is that it\r\ncan be terribly frustrating when you encounter it or one of its kin.\r\n\r\n\r\n* Though check out Jenny Bryan’s talk at the 2020 RStudio Conference: https://rstudio.com/resources/rstudioconf-2020/object-of-type-closure-is-not-subsettable/\r\nOften troubleshooting these errors can be an additional source of\r\nanxiety and frustration, especially early on, as you simply lack the\r\nwords required to describe your problem accurately and, thus, to\r\nformulate the question whose answer you so desperately need. I like to\r\nrefer to this unhappy circumstance as the Google Search Paradox\r\nbecause you will inevitably find yourself staring at an empty search\r\nbar, wondering what words to give to Google. It’s also a bit like Meno’s\r\nParadox, or the Paradox of Inquiry. For if you could properly frame\r\nyour question, it’s probably the case that you already know the answer.\r\nSo, you either know and thus don’t need to ask, or need to ask, but\r\ndon’t know how.\r\nOf course, the situation is not nearly so dire as this. In truth, you\r\nalways know at least a little about your problem - you do have the error\r\nitself after all! - and can thus Google your way through to an answer -\r\neventually, anyway. But life is fleeting, as they say, and time is\r\nshort, so you should probably avoid the brute force approach, relying\r\ninstead on searching efficiently. To help you with that (and to\r\nhelp you get better with that), this page provides a brief\r\nannotated list of where to look for answers, starting from within R\r\nitself!\r\nR helpers\r\nTypically, though not always, R code will have lots of supporting\r\ndocumentation. These come in two varieties: function help pages and\r\nvignettes. If you are having trouble a single function to work properly,\r\nyou may find its help page more useful. If you are having trouble\r\ngetting through some analysis and you cannot pinpoint the exact reason\r\nfor your trouble, the vignettes are probably where you should look.\r\nThere are a couple of ways to access this documentation.\r\nFrom within R, you can use the help() and\r\n?... functions to access short-form documentation. Examples\r\ninclude help(\"plot\") and ?plot. This will call\r\nup the function’s documentation page and display it in your computer’s\r\ngraphical device.\r\nhelp.search(...) and ??... both provide\r\nmeans of searching through help pages to find multiple functions with\r\nthe same name (and potentially the same or similar uses). Simply replace\r\nthe ellipses (three dots) with a character string and these functions\r\nwill return all help pages with that string. So if you want to carry out\r\na cluster analysis, typing ??cluster will search for any\r\nfunctions in your installed packages that use the word cluster.\r\nThe rdrr.io website provides\r\naccess to all function help pages online. If you Google an R function, a\r\nlink to its documentation on this website is typically the first that\r\nyou will see. For the best search results, I recommend Googling “R\r\n .”\r\nFrom within R, you can also access the vignettes using some\r\ncombination of vignette(), browseVignettes(),\r\nand RShowDoc().\r\nThe function vignette() with no argument specified will\r\nbring up a list of all available vignettes, organized by package. If you\r\nwant the vignettes for a particular R package, you can also type\r\nvignette(package = ...), for example,\r\nvignette(package = \"grid\") will bring up the vignettes for\r\nthe grid package.\r\nbrowseVignettes() will open a locally hosted HTML page\r\nin your browser with links to all available R vignettes. This is\r\nactually quite helpful, and you should give it a try when you get a\r\nchance. Just browsing through these vignettes will give you a great feel\r\nfor all that you can do in R.\r\nRShowDoc() is mostly for opening a single vignette.\r\nThis is usefully paired with vignette(), which will give\r\nyou the name of the vignette and package, so that you can, for example,\r\ncall RShowDoc(what = \"plotExample\", package = \"grid\"). This\r\nwill bring up the “plotExample” vignette from the grid\r\npackage.\r\n\r\nPackage authors have lots of resources for sharing their\r\ndocumentation now, including websites designed specifically to present\r\nboth function help pages and vignettes. Here is an example of the website for the\r\ncolorspace package.\r\nFinally, you can access all available documentation for official\r\nR packages by navigating the Comprehensive R Archive Network (CRAN)\r\nwebsite, here: https://cran.r-project.org/.\r\nRStudio helpers\r\nWhile RStudio provides loads of support to R users, here we mention\r\nsome of the more important ones.\r\nRStudio\r\nHow To Articles provide loads of how-to guides for working with R\r\nand RStudio. This is a very comprehensive suite of useful\r\ndocumentation.\r\nRStudio\r\nCheatsheets strive to communicate package information in a single,\r\nconcise poster format with lots of visual queues and simple definitions.\r\nThese can be really helpful when you need a quick refresher on the use\r\nof some bit of code.\r\nRStudio Community is\r\nan online forum where individuals ask and answer questions about R and\r\nRStudio. They have a very strict code of conduct for their members that\r\nemphasizes mutual respect and inclusivity, so you will generally find\r\nthe discussions here much more friendly and supportive. Use of this\r\nforum is highly recommended.\r\nRStudio Education is\r\na very, very recent development by RStudio (it came online in 2020), and\r\nit is simply amazing as a resource for not only learning R itself, but\r\nalso learning how to teach R. Please note that, with the\r\nexception of number 4, these RStudio help tools can be accessed within\r\nthe RStudio IDE under the Help tab.\r\nR Community helpers\r\nThe R community refers to R users who are actively communicating with\r\nand supporting other R users. As there are lots and lots of engaged R\r\nusers these days, and more and more every day, the community is\r\ndefinitely thriving. There is also an expanding ethos within this\r\ncommunity driven largely by RStudio and its code of conduct, so you will\r\ngenerally find R users to be a friendly bunch (if a little hoity-toity).\r\nSo, let’s talk about where you can engage with this community. We have\r\nalready mentioned one, RStudio Community, but here we will list some\r\nmore.\r\nStack Overflow is a\r\nforum for programmers in all programming languages to ask and answer\r\nquestions, much like RStudio Community. It’s just been around longer\r\n(2008 to be exact), which means its code of conduct has evolved over\r\ntime to address a number of unanticipated issues. The consequence is\r\nthat answers to questions will run the gamut from being respectful and\r\nclear to downright insulting. Still, it is a rich resource for\r\naddressing your R coding issues. And it has gotten a lot\r\nbetter.\r\nROpenSci is an R programming\r\ncommunity focused on promoting open and reproducible research in\r\nscience. They have a forum\r\nmuch like RStudio Community, a blog with helpful news and\r\noverviews of the packages in their ecosystem, and a rich suite of webpages for their\r\nsupported R packages, which you can explore here.\r\nR-bloggers is a\r\nclearinghouse for R related content, basically an aggregator of content\r\nfrom individual blogs. It is worth perusing every now and then to pick\r\nup the occasional gem of R understanding.\r\nThe #rstats\r\nTwitter community is something. Use this if you use Twitter, I\r\nguess…\r\nThe rstats\r\nsubreddit is a helpful community of Redditors that are pretty good\r\nabout answering questions you might have.\r\nOther resources\r\nThe UCLA Institute for Digital Research & Education offers Statistical Consulting geared\r\ntoward R. This is a tremendous resource for both R and statistics and is\r\nhighly recommended.\r\nReproducible examples\r\nOthers have likely asked the same question you want to ask, so you\r\nwill not always need to make a post yourself. But, in the off chance\r\nthat you do find yourself confronted with a question never asked before,\r\nyou need to make sure you provide R users with all the information and\r\nresources they need to help troubleshoot your code and to do so with the\r\nleast effort possible. This involves providing a “reproducible\r\nexample” or reprex. There are two essential\r\ningredients to a reprex:\r\nIt needs to be reproducible, obviously. That means\r\nyou need to make sure you provide everything needed to reproduce your\r\nerror as is, for instance, all library() calls in your\r\ncode.\r\nIt needs to be minimal. In other words, do not\r\ninclude anything extraneous or burdensome, like a 400 MB data object. A\r\nmuch smaller R object should suffice.\r\nA lot has been written about how to put together a reprex, so rather\r\nthan belabor the point here, it is perhaps best to direct you to Jenny\r\nBryan’s reprex package,\r\nwhich will walk you through the process of submitting a help request on\r\nthe various forums mentioned above.\r\n\r\n\r\n\r\n", - "last_modified": "2022-04-12T13:02:34-06:00" + "last_modified": "2022-04-19T12:57:12-06:00" }, { "path": "index.html", "title": "Quantitative Analysis of Archaeological Data", "author": [], "contents": "\r\nWelcome!\r\nThis is a Github page setup to host lectures and other content for\r\nthe University of Utah course ANTH 5850: Quantitative Analysis\r\nof Archaeological Data. Mostly, you’ll find the lecture slides\r\nand labs for this course. You can see a link to them in the navbar. The\r\nsite was built using the R package distill. The source code\r\nfor this website, along with the lecture slides and lab exercises, can\r\nbe found at the associated Github repository.\r\nInspiration?\r\nI can’t take credit for all of the content in this course. The\r\nlecture slides, in particular, are adapted from the lectures of Dr. Simon\r\nBrewer in the Department of Geography at the University of Utah. The\r\nR labs, at least the parts of them concerned with data science rather\r\nthan statistics, draw heavily on the very popular book R for Data Science by\r\nHadley Wickham and Garrett Grolemund.\r\nIt probably goes without saying, of course, but those folks are way\r\nsmarter than I could ever hope to be, so any errors or confusions that\r\noccur here are definitely, one-hundred percent, without a doubt my\r\nown.\r\nReuse\r\nText and figures are licensed under Creative Commons Attribution CC BY 4.0. Any\r\ncomputer code (R, HTML, CSS, etc.) in slides and worksheets, including\r\nin slide and worksheet sources, is also licensed under MIT. Note that\r\nfigures in slides may be pulled in from external sources and may be\r\nlicensed under different terms. For such images, image credits are\r\navailable in the slide notes, accessible via pressing the letter\r\n‘p’.\r\n\r\n\r\n\r\n", - "last_modified": "2022-04-12T13:02:36-06:00" + "last_modified": "2022-04-19T12:57:14-06:00" }, { "path": "LICENSE.html", "author": [], "contents": "\r\nMIT License\r\nCopyright (c) 2021 Kenneth Blake Vernon\r\nPermission is hereby granted, free of charge, to any person obtaining\r\na copy of this software and associated documentation files (the\r\n“Software”), to deal in the Software without restriction, including\r\nwithout limitation the rights to use, copy, modify, merge, publish,\r\ndistribute, sublicense, and/or sell copies of the Software, and to\r\npermit persons to whom the Software is furnished to do so, subject to\r\nthe following conditions:\r\nThe above copyright notice and this permission notice shall be\r\nincluded in all copies or substantial portions of the Software.\r\nTHE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,\r\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\r\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\r\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\r\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\r\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r\n\r\n\r\n", - "last_modified": "2022-04-12T13:02:37-06:00" + "last_modified": "2022-04-19T12:57:15-06:00" } ], "collections": [] diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 2787b93..740ba09 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -10,6 +10,6 @@ ./LICENSE.html - 2022-04-12T13:02:37-06:00 + 2022-04-19T12:57:15-06:00 diff --git a/docs/slides/14-Classification.html b/docs/slides/14-Classification.html new file mode 100644 index 0000000..dcf738a --- /dev/null +++ b/docs/slides/14-Classification.html @@ -0,0 +1,567 @@ + + + + Quantitative Analysis of Archaeological Data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-10-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-10-1.png new file mode 100644 index 0000000..4050aa5 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-11-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-11-1.png new file mode 100644 index 0000000..06309d5 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-13-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 0000000..45f0ffb Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-15-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-15-1.png new file mode 100644 index 0000000..cf66a05 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-16-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-16-1.png new file mode 100644 index 0000000..32d1e15 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-17-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-17-1.png new file mode 100644 index 0000000..366cf83 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-18-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-18-1.png new file mode 100644 index 0000000..06309d5 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-18-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-3-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..3ea4574 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-5-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 0000000..08c7dfb Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-7-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-7-1.png new file mode 100644 index 0000000..ce21f6a Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/slides/14-Classification_files/figure-html/unnamed-chunk-9-1.png b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-9-1.png new file mode 100644 index 0000000..850e772 Binary files /dev/null and b/docs/slides/14-Classification_files/figure-html/unnamed-chunk-9-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-10-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-10-1.png new file mode 100644 index 0000000..4050aa5 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-10-2.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-10-2.png new file mode 100644 index 0000000..29bd122 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-10-2.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-11-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-11-1.png new file mode 100644 index 0000000..06309d5 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-13-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 0000000..45f0ffb Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-15-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-15-1.png new file mode 100644 index 0000000..cf66a05 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-16-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-16-1.png new file mode 100644 index 0000000..32d1e15 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-17-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-17-1.png new file mode 100644 index 0000000..b881b37 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-3-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 0000000..3ea4574 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-5-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 0000000..08c7dfb Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-7-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-7-1.png new file mode 100644 index 0000000..ce21f6a Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-8-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-8-1.png new file mode 100644 index 0000000..ef5d15f Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-9-1.png b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-9-1.png new file mode 100644 index 0000000..850e772 Binary files /dev/null and b/docs/slides/14-Cluster_Analysis_files/figure-html/unnamed-chunk-9-1.png differ diff --git a/docs/slides/images/12_10.png b/docs/slides/images/12_10.png new file mode 100644 index 0000000..078227c Binary files /dev/null and b/docs/slides/images/12_10.png differ diff --git a/docs/slides/images/12_11.png b/docs/slides/images/12_11.png new file mode 100644 index 0000000..07bf26c Binary files /dev/null and b/docs/slides/images/12_11.png differ diff --git a/docs/slides/images/12_13.png b/docs/slides/images/12_13.png new file mode 100644 index 0000000..917042d Binary files /dev/null and b/docs/slides/images/12_13.png differ diff --git a/docs/slides/images/12_8.png b/docs/slides/images/12_8.png new file mode 100644 index 0000000..2a0f45f Binary files /dev/null and b/docs/slides/images/12_8.png differ diff --git a/docs/slides/images/yenn.jpg b/docs/slides/images/yenn.jpg new file mode 100644 index 0000000..726b63d Binary files /dev/null and b/docs/slides/images/yenn.jpg differ diff --git a/slides/14-Classification.Rmd b/slides/14-Classification.Rmd new file mode 100644 index 0000000..f2dd6dd --- /dev/null +++ b/slides/14-Classification.Rmd @@ -0,0 +1,986 @@ +--- +title: "Quantitative Analysis of Archaeological Data" +subtitle: "Lecture 14: Classification" +date: 'Last updated: `r Sys.Date()`' +output: + xaringan::moon_reader: + lib_dir: libs + css: [xaringan-themer.css, custom_style.css] + seal: TRUE + nature: + highlightStyle: magula + highlightLines: TRUE + countIncrementalSlides: TRUE + ratio: '16:9' +--- + +```{r} +#| include = FALSE, +#| code = xfun::read_utf8(here::here("slides", "before_chunk.R")) +``` + +```{r} + +darts <- here("datasets", "darts_and_arrows.csv") %>% + read_csv() %>% + mutate( + dart = ifelse(type == "dart", 1, 0) + ) + +fakes <- iris %>% + as_tibble() %>% + rename( + "length" = Sepal.Length, + "width" = Sepal.Width, + "neck" = Petal.Length, + "thickness" = Petal.Width, + "type" = Species + ) %>% + mutate( + type = case_when( + type == "setosa" ~ "derp", + type == "versicolor" ~ "flerp", + TRUE ~ "merp" + ), + length = case_when( + type == "derp" ~ length + 0.3, + type == "flerp" ~ length - 0.3, + TRUE ~ length + 0.1 + ), + width = case_when( + type == "derp" ~ width + 1.3, + type == "flerp" ~ width + 0.4, + TRUE ~ width + 1.2 + ) + ) + +``` + + +## Outline + +1. Supervised classifiers (with response) + - Logistic regression + - Linear Discriminant Analysis (LDA) +2. Unsupervised classifiers (without response) + - Principal Component Analysis (PCA) + - k-means + - hierarchical clustering + + +--- +class: center middle + +## Is it a dart? + +```{r} +#| fig.asp = 0.75, +#| out.width = "60%" + +darts_long <- darts %>% + pivot_longer( + cols = c(length:neck), + names_to = "measure", + values_to = "value" + ) + +ggplot(darts_long, aes(type, value)) + + geom_boxplot() + + facet_wrap(~measure, scale = "free_y") + + labs( + x = NULL, + y = "Millimeters" + ) + +``` + +--- + +## Logistic Regression + +```{r} + +darts_glm <- glm( + dart ~ length + width + thickness + neck, + family = binomial, + data = darts +) + +responses <- bind_rows( + ggpredict(darts_glm, "length [10:90, by=0.5]") %>% mutate(measure = "length"), + ggpredict(darts_glm, "width [5:35, by=0.5]") %>% mutate(measure = "width") +) %>% + as_tibble() + +``` + +.pull-left[ + +```{r} +#| fig.asp = 1 + +ggplot() + + geom_ribbon( + data = responses, + aes(x, ymin = conf.low, ymax = conf.high), + fill = "gray90" + ) + + geom_point( + data = darts_long %>% filter(measure %in% c("length", "width")), + aes(value, dart), + size = 3, + alpha = 0.5 + ) + + geom_line( + data = responses, + aes(x, predicted) + ) + + facet_wrap( + ~measure, + scale = "free_x", + nrow = 2 + ) + + scale_y_continuous( + breaks = c(0, 1), + labels = c("arrow", "dart") + ) + + labs( + x = "Millimeters", + y = NULL + ) + +``` + +] + +.pull-right[ + +```{r} + +broom::tidy(darts_glm) %>% + rename("z.value" = statistic) %>% + kbl(table.attr = "class='table-model table-fullwidth'") %>% + kable_paper(c("striped", "hover")) + +remove(darts_glm, darts_long, responses) + +``` + +Note that estimates are on the logit scale! + +] + +--- + +## LDA with one predictor + +.pull-left[ + +```{r} +#| fig.asp = 0.5 + +length_hist <- ggplot( + darts, + aes(length, color = type, fill = type) +) + + geom_histogram( + bins = 15, + center = 0, + position = "identity" + ) + + scale_color_viridis( + name = NULL, + direction = -1, + discrete = TRUE + ) + + scale_fill_viridis( + name = NULL, + direction = -1, + alpha = 0.5, + discrete = TRUE + ) + + scale_y_continuous( + labels = scales::number_format(accuracy = 0.1) + ) + + theme( + legend.position = c(0.97, 0.98), + legend.justification = c("right", "top") + ) + + labs( + x = "Length", + y = "Count" + ) + +length_hist + +``` + +] + +.pull-right[ + +Uses the distribution of each X to define a **discriminant function** $f_k$ for each group $k$. + +For each observation $i$, $f_k$ determines the probability that $i$ is in $k$. + +Linear assumptions for Xs! Homoscedasticity, normality, independence, no multi-collinearity. + +] + + + +--- +count: false + +## LDA with one predictor + +```{r} + +darts_lda <- lda( + type ~ length, + data = darts +) + +new_data <- tibble(length = with(darts, seq(min(length), max(length), length = 100))) + +predictions <- predict(darts_lda, newdata = new_data) %>% lapply(as_tibble) + +decision <- bind_cols(new_data, predictions$posterior) %>% + pivot_longer( + cols = c("dart", "arrow"), + names_to = "type", + values_to = "posterior" + ) + +boundary_length <- predictions$class %>% + mutate(length = new_data$length) %>% + group_by(value) %>% + summarize( + max_length = max(length), + min_length = min(length) + ) + +boundary_length <- with(boundary_length, (max_length[[1]] + min_length[[2]])/2) + +``` + +.pull-left[ + +```{r} +#| fig.asp = 0.925 + +length_hist <- length_hist + + geom_vline( + xintercept = boundary_length, + linetype = "dashed", + color = "gray20", + size = 1 + ) + + theme( + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank() + ) + +length_prob <- ggplot( + decision, + aes(length, color = type, fill = type) +) + + geom_histogram( + bins = 15, + center = 0, + position = "identity", + color = "transparent", + fill = "transparent" + ) + + geom_ribbon( + aes(ymin = 0, ymax = posterior), + color = "transparent" + ) + + geom_line( + aes(y = posterior), + size = 1.3 + ) + + scale_color_viridis( + name = NULL, + direction = -1, + discrete = TRUE + ) + + scale_fill_viridis( + name = NULL, + direction = -1, + alpha = 0.5, + discrete = TRUE + ) + + theme( + legend.position = "none" + ) + + labs( + x = "length", + y = "Probability" + ) + + scale_y_continuous( + limits = c(0, 1), + breaks = c(0, 0.5, 1) + ) + + geom_vline( + xintercept = boundary_length, + linetype = "dashed", + color = "gray20", + size = 1 + ) + +length_hist / length_prob + +remove(train, darts_lda, predictions, decision, boundary_length) + +``` + +] + +.pull-right[ + +Uses the distribution of each X to define a **discriminant function** $f_k$ for each group $k$. + +For each observation $i$, $f_k$ determines the probability that $i$ is in $k$. + +Linear assumptions for Xs! Homoscedasticity, normality, independence, no multi-collinearity. + +Decision boundary (dashed line) between group 1 and 2 is the point where $f_1 = f_2$. + +] + +--- +class: center middle + +## What type is it? + +```{r} +#| fig.asp = 0.75, +#| out.width = "60%" + +ggplot(fakes %>% pivot_longer(cols = -type), aes(type, value)) + + geom_boxplot() + + facet_wrap(~name, nrow = 2, scale = "free_x") + + coord_flip() + + theme( + plot.margin = margin(5, 9, 5, 5) + ) + + labs( + y = "Millimeters", + x = NULL + ) + +``` + +--- + +## LDA with multiple predictors + +.pull-left[ + +```{r} + +n <- 200 + +exp <- 0.33 + +new_data <- with( + fakes, + expand_grid( + length = seq(min(length)-exp, max(length)+exp, length = n), + width = seq(min(width)-exp, max(width)+exp, length = n), + thickness = mean(thickness), + neck = mean(neck) + ) +) + +ggplot(fakes, aes(length, width, color = type)) + + geom_point( + size = 4, + alpha = 0.8 + ) + + scale_color_viridis( + name = NULL, + discrete = TRUE + ) + + labs( + x = "Length", + y = "Width" + ) + + coord_cartesian( + xlim = with(new_data, c(min(length), max(length))), + ylim = with(new_data, c(min(width), max(width))), + expand = FALSE + ) + + theme( + legend.background = element_blank(), + legend.position = c(0.01, 0.99), + legend.justification = c("left", "top"), + plot.margin = margin(5,5,5,5) + ) + +``` + +] + +.pull-right[ + +Works the same way with multiple predictors, but uses a multivariate probability distribution. + +Note that it still assumes linearity! In fact, LDA is equivalent to simple linear regression! + +] + +--- + +## LDA with multiple predictors + +```{r} + +darts_lda <- lda( + type ~ length + width, + data = fakes +) + +``` + +.pull-left[ + +```{r} + +predictions <- predict(darts_lda, newdata = new_data) + +decision <- new_data %>% + bind_cols(class = predictions$class) %>% + mutate(class = class) + +ggplot() + + geom_point( + data = decision, + aes(length, width, color = class), + size = 0.2, + alpha = 0.1 + ) + + geom_contour( + data = decision, + aes(length, width, z = as.integer(class)), + breaks = c(1.9, 2.1), + color = qaad_colors("rufous_red"), + size = 1 + ) + + geom_point( + data = fakes, + aes(length, width, fill = type), + shape = 21, + color = "black", + size = 4, + stroke = 0.8 + ) + + stat_ellipse( + data = fakes, + aes(length, width, group = type), + color = "gray20", + size = 1.65 + ) + + stat_ellipse( + data = fakes, + aes(length, width, group = type), + color = "white", + size = 1.1 + ) + + scale_color_viridis( + name = NULL, + discrete = TRUE + ) + + scale_fill_viridis( + name = NULL, + alpha = 0.8, + discrete = TRUE + ) + + labs( + x = "Length", + y = "Width" + ) + + coord_cartesian( + xlim = with(new_data, c(min(length), max(length))), + ylim = with(new_data, c(min(width), max(width))), + expand = FALSE + ) + + theme( + legend.background = element_blank(), + legend.key = element_rect(fill = "transparent"), + legend.position = c(0.01, 0.99), + legend.justification = c("left", "top"), + plot.margin = margin(5,5,5,5) + ) + +``` + +] + +.pull-right[ + +Works the same way with multiple predictors, but uses a multivariate probability distribution. + +Note that it still assumes linearity! In fact, LDA is equivalent to simple linear regression! + +Decision boundary (dark red line) between +- between derp and flerp is the point where $f_{derp} = f_{flerp}$ +- between derp and merp is the point where $f_{derp} = f_{merp}$ +- between merp and flerp is the point where $f_{merp} = f_{flerp}$ + +] + +--- + +## Principal Component Analysis + +```{r} + +fakes_pca <- fakes %>% + select(-type, -neck) %>% + prcomp(scale = TRUE) + +evals <- fakes_pca$sdev^2 +evctr <- fakes_pca$rotation +score <- fakes_pca$x + +orthogonal <- function(x, y, a = 0, b = 1){ + + # finds endpoint for a perpendicular segment + # from the point (x0,y0) + # to the line y=a+b*x + + xend <- (x+b*y-a*b)/(1+b^2) + yend <- a + b*xend + + tibble( + x = x, + y = y, + xend = xend, + yend = yend + ) + +} + +b <- evctr[2,1]/evctr[1,1] +a <- with(fakes, mean(width) - b*mean(length)) + +projection <- orthogonal( + x = fakes$length, + y = fakes$width, + a = a, + b = b +) %>% + mutate(method = "PCA") + +``` + + +.pull-left[ + +```{r} +#| fig.asp = 0.9, +#| out.width = "100%" + +ggplot(fakes, aes(length, width)) + + geom_segment( + data = projection, + aes(x, y, xend = xend, yend = yend), + color = qaad_colors("flame_orange"), + alpha = 0.3, + size = 0.4 + ) + + geom_abline( + intercept = a, + slope = b, + color = qaad_colors("flame_orange"), + size = 1 + ) + + geom_point( + size = 3.5, + alpha = 0.7 + ) + + annotate( + "segment", + x = mean(fakes$length) - 0.5, + y = mean(fakes$width) + (1/b*0.5), + xend = mean(fakes$length) + 0.5, + yend = mean(fakes$width) - (1/b*0.5), + size = 0.5, + color = "gray50" + ) + + annotate( + "point", + x = mean(fakes$length), + y = mean(fakes$width), + size = 5, + shape = 21, + color = qaad_colors("rufous_red"), + fill = "white", + stroke = 2 + ) + + coord_equal() + + labs( + x = "Length", + y = "Width" + ) + + theme( + legend.background = element_blank(), + legend.position = c(0.01, 0.99), + legend.justification = c("left", "top"), + plot.margin = margin(2, 2, 2, 2) + ) + +``` + +] + +.pull-right[ + +**What is it?** An ordination method for reducing the number of dimensions (variables) in a dataset. + +**Why is it?** PCA can (1) visualize complex datasets, (2) summarize redundant variables, (3) impute missing data, and (4) remove collinearity. + +**How is it?** Think of it like a set of nested OLS models, only the variance isn't parallel to the y-axis, but orthogonal to the principal component. + +The **goal** is to maximize the projected variance (spread of points on the PC line), or equivalently, to minimize the orthogonal distance from each point (orange lines connecting points to PC line). + +] + +--- + +## Principal Component Analysis + +.pull-left[ + +```{r} +#| fig.asp = 0.9, +#| out.width = "100%" + +w <- sqrt(nrow(fakes)-1)/5 +s <- 0.95 # shrinkage for labels + +arrow_data <- lapply( + names(fakes)[c(1,2,4)], + function(j){ + + xx <- fakes[, j, drop = TRUE] + pc1 <- score[, 1, drop = TRUE] + pc2 <- score[, 2, drop = TRUE] + + tibble( + type = j, + x = 0, + y = 0, + xend = w * cor(xx, pc1) * s, + yend = w * cor(xx, pc2) * s, + xlab = w * cor(xx, pc1), + ylab = w * cor(xx, pc2) + ) + + } +) %>% + bind_rows() + +score <- score %>% as_tibble() %>% mutate(class = fakes$type) + +ggplot() + + geom_hline( + yintercept = 0, + color = "gray75", + size = 0.5 + ) + + geom_vline( + xintercept = 0, + color = "gray75", + size = 0.5 + ) + + geom_point( + data = score, + aes(PC1, PC2, color = class), + size = 4, + alpha = 0.8 + ) + + scale_color_viridis( + name = NULL, + discrete = TRUE + ) + + geom_segment( + data = arrow_data, + aes(x, y, xend = xend, yend = yend, group = type), + arrow = arrow(length = unit(0.18, "in"), type = "closed") + ) + + geom_text( + data = arrow_data, + aes(xlab, ylab, label = type), + size = 8, + hjust = c(0, 0.5, 0), + vjust = c(0.5, 1, 0) + ) + + scale_x_continuous(expand = expansion(add = 1.2)) + + scale_y_continuous(expand = expansion(add = 0.5)) + + coord_equal() + + theme( + legend.background = element_blank(), + legend.position = c(0.01, 0.99), + legend.justification = c("left", "top") + ) + +``` + +] + +.pull-right[ + +**Why is it?** PCA can + +(1) **visualize complex datasets** - PC1 and PC2 represent three dimensions (variables) and their correlation (positive is < 90°, negative is > °) + +(2) **summarize redundant variables** - PC1 can be interpreted as a general measure of *shape*. + +(3) **impute missing data** - if missing one measure (like thickness), can estimate this based on measures of other variables (width and length). + +(4) **remove collinearity** - the PCs are orthogonal to each other and are uncorrelated by definition. + +] + +--- +class: center middle + +## Principal Component Analysis + +.w-70.ml-auto.mr-auto.tl[ + +```{r} +#| fig.asp = 0.5 + +vimp <- summary(fakes_pca)$importance %>% + as_tibble(rownames = "measure") %>% + mutate(measure = factor( + measure, + levels = c("Proportion of Variance", "Cumulative Proportion", "Standard deviation") + ) + ) %>% + pivot_longer(-measure) + +ggplot( + vimp %>% filter(measure != "Standard deviation"), + aes(name, value, group = 1) +) + + geom_line( + color = qaad_colors("rufous_red"), + size = 1 + ) + + geom_point( + color = qaad_colors("rufous_red"), + size = 4 + ) + + facet_wrap(~measure) + + labs( + x = NULL, + y = "Proportion Variance" + ) + + theme( + strip.background = element_blank(), + strip.text = element_blank() + ) + +``` + +Figure on left is a "scree" plot showing the proportion of variance explained, $R^2$, for each individual PC. Figure on right is the cumulative variance explained, the total $R^2$ when including each additional PC. + +] + +--- + +## K-means clustering + +.pull-left[ + +```{r} +#| out.width = "90%" + +figure("12_8.png") + +``` + +.right[Figure from [James *et al* (2021) ISLR](https://www.statlearning.com/)] + +] + +.pull-right[ + +**What is it?** A clustering algorithm that minimizes differences within groups (equivalently, maximizes similarity). + +Requires a **measure of difference** or similarity. Most common is squared Euclidean distance. + +**Assumes** that each observation belongs to one and only one group. + +] + +--- + +## K-means clustering + +.pull-left[ + +```{r} +#| out.width = "90%" + +figure("12_8.png") + +``` + +.right[Figure from [James *et al* (2021) ISLR](https://www.statlearning.com/)] + +] + +.pull-right[ + +**How is it?** + +- Must choose the number of groups $K$ prior to sorting. +- Algorithm randomly assigns points to each group. +- Calculates the centroids of each group (the means of each variable). +- Re-assigns points to groups based on their distance from centroids. +- Repeats until the within-group differences are minimized. + +] + +--- + +## Hierarchical clustering + +.pull-left[ + +```{r} + +figure("12_10.png") + +``` + +.right[Figure from [James *et al* (2021) ISLR](https://www.statlearning.com/)] + +] + +.pull-right[ + +**What is it?** A clustering algorithm that minimizes differences within groups (equivalently, maximizes similarity). + +Requires a **measure of difference** or similarity. Most common is squared Euclidean distance. + +Does not require a decision about the number of $K$. + +A **linkage** rule must be chosen. + +Results in dendrograms ("trees") representing groups and degrees of similarity/difference between observations. + +] + +--- + +## Dendrograms + +.pull-left[ + +```{r} + +figure("12_11.png") + +``` + +.right[Figure from [James *et al* (2021) ISLR](https://www.statlearning.com/)] + +] + +.pull-right[ + +A **leaf** is the lowest terminal point and represents a unique observation. + +A **fusion** represents the point where two observations or groups of observations are most similar. +- Fusions lower in the tree represent greater similarity. +- Fusions higher in the tree represent greater difference. +- Cannot infer similarity based on horizontal distance in the tree! + +**Hierarchy** refers to nested clusters in the tree. + +**Cuts** (dashed-lines) in the tree determine the number of groups. Thus, the position of the cut acts like $K$ in K-means. + +] + +--- + +## Hierarchical clustering + +.pull-left[ + +```{r} + +figure("12_13.png") + +``` + +.right[Figure from [James *et al* (2021) ISLR](https://www.statlearning.com/)] +] + +.pull-right[ + +**How is it?** + +- Starts by measuring all pairwise differences between individual observations (classified as clusters of one). +- The two clusters that are the least dissimilar are fused, with the height of the fusion determined by the degree of difference. +- Compute differences for the new set of clusters. +- Fuse. +- Rinse and repeat until all clusters are fused into one. + +] + +--- +class: middle center + +## There's a lot more to consider here, naturally, but... + + +--- +class: middle center + +```{r} + +figure("yenn.jpg") + +``` + + + +--- + +## 🔭 Looking Ahead + +```{r} + +schedule <- here("_misc", "course_outline.xlsx") %>% + readxl::read_excel(1) %>% + mutate( + date = lubridate::ymd(date), + description = glue::glue( + "{topic} ({date})
+
{details}
" + )) %>% + select(description) + +``` + +.pull-left[ + +```{r} + +schedule %>% + slice(15:16) %>% + kbl(escape = FALSE, col.names = NULL) %>% + kable_paper(c("striped", "hover")) + +``` + +] \ No newline at end of file diff --git a/slides/14-Cluster_Analysis.Rmd b/slides/14-Cluster_Analysis.Rmd deleted file mode 100644 index e9bf7a4..0000000 --- a/slides/14-Cluster_Analysis.Rmd +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "Quantitative Analysis of Archaeological Data" -subtitle: "Lecture 13: Cluster Analysis" -date: 'Last updated: `r Sys.Date()`' -output: - xaringan::moon_reader: - lib_dir: libs - css: [xaringan-themer.css, custom_style.css] - seal: TRUE - nature: - highlightStyle: magula - highlightLines: TRUE - countIncrementalSlides: TRUE - ratio: '16:9' ---- - -```{r, echo = FALSE, child = here::here("slides", "before_chunk.Rmd")} -``` - -```{r setup, include = FALSE} - -knitr::opts_chunk$set( - echo = FALSE, - warning = FALSE, - message = FALSE, - error = TRUE, - collapse = TRUE, - fig.align = "center", - dpi = 300 -) - -options( - htmltools.dir.version = FALSE, - str = strOptions(vec.len = 3) -) - -``` \ No newline at end of file diff --git a/slides/15-Ordination.Rmd b/slides/15-Ordination.Rmd deleted file mode 100644 index 7fbe01e..0000000 --- a/slides/15-Ordination.Rmd +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "Quantitative Analysis of Archaeological Data" -subtitle: "Lecture 14: Ordination" -date: 'Last updated: `r Sys.Date()`' -output: - xaringan::moon_reader: - lib_dir: libs - css: [xaringan-themer.css, custom_style.css] - seal: TRUE - nature: - highlightStyle: magula - highlightLines: TRUE - countIncrementalSlides: TRUE - ratio: '16:9' ---- - -```{r, echo = FALSE, child = here::here("slides", "before_chunk.Rmd")} -``` - -```{r setup, include = FALSE} - -knitr::opts_chunk$set( - echo = FALSE, - warning = FALSE, - message = FALSE, - error = TRUE, - collapse = TRUE, - fig.align = "center", - dpi = 300 -) - -options( - htmltools.dir.version = FALSE, - str = strOptions(vec.len = 3) -) - -``` \ No newline at end of file diff --git a/slides/before_chunk.R b/slides/before_chunk.R index df7c230..7b699d6 100644 --- a/slides/before_chunk.R +++ b/slides/before_chunk.R @@ -9,13 +9,13 @@ library(here) library(knitr) library(kableExtra) library(lubridate) +library(MASS) library(mgcv) library(patchwork) library(readxl) library(sf) library(showtext) library(splines) -library(terra) library(tidyverse) library(viridis) library(xaringanExtra) diff --git a/slides/images/12_10.png b/slides/images/12_10.png new file mode 100644 index 0000000..078227c Binary files /dev/null and b/slides/images/12_10.png differ diff --git a/slides/images/12_11.png b/slides/images/12_11.png new file mode 100644 index 0000000..07bf26c Binary files /dev/null and b/slides/images/12_11.png differ diff --git a/slides/images/12_13.png b/slides/images/12_13.png new file mode 100644 index 0000000..917042d Binary files /dev/null and b/slides/images/12_13.png differ diff --git a/slides/images/12_8.png b/slides/images/12_8.png new file mode 100644 index 0000000..2a0f45f Binary files /dev/null and b/slides/images/12_8.png differ diff --git a/slides/images/yenn.jpg b/slides/images/yenn.jpg new file mode 100644 index 0000000..726b63d Binary files /dev/null and b/slides/images/yenn.jpg differ