Jump to content

Module:Lang/data/is latn data

From Wiki Knights Errant Life
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

--[[--------------------------< S I N G L E S _ T >-----------------------------------------------------------

list of Latn and Zyyy (common) codepoints that are not included in <ranges_t> taken from Module:Unicode data/scripts and a local copy of https://www.unicode.org/Public/16.0.0/ucd/ScriptExtensions.txt

]]

local singles_t = { [170] = true, -- 00AA [186] = true, -- 00BA [215] = true, -- 00D7 [247] = true, -- 00F7 [787] = true, -- 0313 [800] = true, -- 0320 [856] = true, -- 0358 [862] = true, -- 035E [884] = true, -- 0374 [894] = true, -- 037E [901] = true, -- 0385 [903] = true, -- 0387 [1541] = true, -- 0605 [1548] = true, -- 060C [1563] = true, -- 061B [1567] = true, -- 061F [1600] = true, -- 0640 [1757] = true, -- 06DD [2274] = true, -- 08E2 [3647] = true, -- 0E3F [4347] = true, -- 10FB [6149] = true, -- 1805 [7379] = true, -- 1CD3 [7393] = true, -- 1CE1 [7418] = true, -- 1CFA [7672] = true, -- 1DF8 [8305] = true, -- 2071 [8319] = true, -- 207F [8432] = true, -- 20F0 [8498] = true, -- 2132 [8526] = true, -- 214E [12294] = true, -- 3006 [12448] = true, -- 30A0 [12783] = true, -- 31EF [13055] = true, -- 32FF [42963] = true, -- A7D3 [43310] = true, -- A92E [43471] = true, -- A9CF [43867] = true, -- AB5B [65279] = true, -- FEFF [65392] = true, -- FF70 [119970] = true, -- 1D4A2 [119995] = true, -- 1D4BB [120134] = true, -- 1D546 [129008] = true, -- 1F7F0 [917505] = true, -- E0001 }


--[[--------------------------< R A N G E S _ T >-------------------------------------------------------------

list of Latn and Zyyy (common) codepoints taken from Module:Unicode data/scripts and a local copy of https://www.unicode.org/Public/16.0.0/ucd/ScriptExtensions.txt

]]

local ranges_t = { {0, 169}, -- 0000..00A9 {171, 185}, -- 00AB..00B9 {187, 214}, -- 00BB..00D6 {216, 246}, -- 00D8..00F6 {248, 745}, -- 00F8..02E9 {748, 782}, -- 02EC..030E {784, 785}, -- 0310..0311 {803, 805}, -- 0323..0325 {813, 814}, -- 032D..032E {816, 817}, -- 0330..0331 {867, 879}, -- 0363..036F {1157, 1158}, -- 0485..0486 {2385, 2386}, -- 0951..0952 {2404, 2405}, -- 0964..0965 {4053, 4056}, -- 0FD5..0FD8 {5867, 5869}, -- 16EB..16ED {5941, 5942}, -- 1735..1736 {6146, 6147}, -- 1802..1803 {7401, 7404}, -- 1CE9..1CEC {7406, 7411}, -- 1CEE..1CF3 {7413, 7415}, -- 1CF5..1CF7 {7424, 7461}, -- 1D00..1D25 {7468, 7516}, -- 1D2C..1D5C {7522, 7525}, -- 1D62..1D65 {7531, 7543}, -- 1D6B..1D77 {7545, 7614}, -- 1D79..1DBE {7680, 7935}, -- 1E00..1EFF {8192, 8203}, -- 2000..200B {8206, 8292}, -- 200E..2064 {8294, 8304}, -- 2066..2070 {8308, 8318}, -- 2074..207E {8320, 8334}, -- 2080..208E {8336, 8348}, -- 2090..209C {8352, 8384}, -- 20A0..20C0 {8448, 8485}, -- 2100..2125 {8487, 8497}, -- 2127..2131 {8499, 8525}, -- 2133..214D {8527, 8587}, -- 214F..218B {8592, 9257}, -- 2190..2429 {9280, 9290}, -- 2440..244A {9312, 10239}, -- 2460..27FF {10496, 11123}, -- 2900..2B73 {11126, 11157}, -- 2B76..2B95 {11159, 11263}, -- 2B97..2BFF {11360, 11391}, -- 2C60..2C7F {11776, 11869}, -- 2E00..2E5D {12272, 12292}, -- 2FF0..3004 {12296, 12320}, -- 3008..3020 {12336, 12343}, -- 3030..3037 {12348, 12351}, -- 303C..303F {12443, 12444}, -- 309B..309C {12539, 12540}, -- 30FB..30FC {12688, 12703}, -- 3190..319F {12736, 12773}, -- 31C0..31E5 {12832, 12895}, -- 3220..325F {12927, 13007}, -- 327F..32CF {13144, 13311}, -- 3358..33FF {19904, 19967}, -- 4DC0..4DFF {42752, 42957}, -- A700..A7CD {42960, 42961}, -- A7D0..A7D1 {42965, 42972}, -- A7D5..A7DC {42994, 43007}, -- A7F2..A7FF {43056, 43065}, -- A830..A839 {43824, 43866}, -- AB30..AB5A {43868, 43876}, -- AB5C..AB64 {43878, 43883}, -- AB66..AB6B {64256, 64262}, -- FB00..FB06 {64830, 64831}, -- FD3E..FD3F {65040, 65049}, -- FE10..FE19 {65072, 65106}, -- FE30..FE52 {65108, 65126}, -- FE54..FE66 {65128, 65131}, -- FE68..FE6B {65281, 65381}, -- FF01..FF65 {65438, 65439}, -- FF9E..FF9F {65504, 65510}, -- FFE0..FFE6 {65512, 65518}, -- FFE8..FFEE {65529, 65533}, -- FFF9..FFFD {65792, 65794}, -- 10100..10102 {65799, 65843}, -- 10107..10133 {65847, 65855}, -- 10137..1013F {65936, 65948}, -- 10190..1019C {66000, 66044}, -- 101D0..101FC {66273, 66299}, -- 102E1..102FB {67456, 67461}, -- 10780..10785 {67463, 67504}, -- 10787..107B0 {67506, 67514}, -- 107B2..107BA {113824, 113827}, -- 1BCA0..1BCA3 {117760, 118009}, -- 1CC00..1CCF9 {118016, 118451}, -- 1CD00..1CEB3 {118608, 118723}, -- 1CF50..1CFC3 {118784, 119029}, -- 1D000..1D0F5 {119040, 119078}, -- 1D100..1D126 {119081, 119142}, -- 1D129..1D166 {119146, 119162}, -- 1D16A..1D17A {119171, 119172}, -- 1D183..1D184 {119180, 119209}, -- 1D18C..1D1A9 {119214, 119274}, -- 1D1AE..1D1EA {119488, 119507}, -- 1D2C0..1D2D3 {119520, 119539}, -- 1D2E0..1D2F3 {119552, 119638}, -- 1D300..1D356 {119648, 119672}, -- 1D360..1D378 {119808, 119892}, -- 1D400..1D454 {119894, 119964}, -- 1D456..1D49C {119966, 119967}, -- 1D49E..1D49F {119973, 119974}, -- 1D4A5..1D4A6 {119977, 119980}, -- 1D4A9..1D4AC {119982, 119993}, -- 1D4AE..1D4B9 {119997, 120003}, -- 1D4BD..1D4C3 {120005, 120069}, -- 1D4C5..1D505 {120071, 120074}, -- 1D507..1D50A {120077, 120084}, -- 1D50D..1D514 {120086, 120092}, -- 1D516..1D51C {120094, 120121}, -- 1D51E..1D539 {120123, 120126}, -- 1D53B..1D53E {120128, 120132}, -- 1D540..1D544 {120138, 120144}, -- 1D54A..1D550 {120146, 120485}, -- 1D552..1D6A5 {120488, 120779}, -- 1D6A8..1D7CB {120782, 120831}, -- 1D7CE..1D7FF {122624, 122654}, -- 1DF00..1DF1E {122661, 122666}, -- 1DF25..1DF2A {126065, 126132}, -- 1EC71..1ECB4 {126209, 126269}, -- 1ED01..1ED3D {126976, 127019}, -- 1F000..1F02B {127024, 127123}, -- 1F030..1F093 {127136, 127150}, -- 1F0A0..1F0AE {127153, 127167}, -- 1F0B1..1F0BF {127169, 127183}, -- 1F0C1..1F0CF {127185, 127221}, -- 1F0D1..1F0F5 {127232, 127405}, -- 1F100..1F1AD {127462, 127487}, -- 1F1E6..1F1FF {127489, 127490}, -- 1F201..1F202 {127504, 127547}, -- 1F210..1F23B {127552, 127560}, -- 1F240..1F248 {127568, 127569}, -- 1F250..1F251 {127584, 127589}, -- 1F260..1F265 {127744, 128727}, -- 1F300..1F6D7 {128732, 128748}, -- 1F6DC..1F6EC {128752, 128764}, -- 1F6F0..1F6FC {128768, 128886}, -- 1F700..1F776 {128891, 128985}, -- 1F77B..1F7D9 {128992, 129003}, -- 1F7E0..1F7EB {129024, 129035}, -- 1F800..1F80B {129040, 129095}, -- 1F810..1F847 {129104, 129113}, -- 1F850..1F859 {129120, 129159}, -- 1F860..1F887 {129168, 129197}, -- 1F890..1F8AD {129200, 129211}, -- 1F8B0..1F8BB {129216, 129217}, -- 1F8C0..1F8C1 {129280, 129619}, -- 1F900..1FA53 {129632, 129645}, -- 1FA60..1FA6D {129648, 129660}, -- 1FA70..1FA7C {129664, 129673}, -- 1FA80..1FA89 {129679, 129734}, -- 1FA8F..1FAC6 {129742, 129756}, -- 1FACE..1FADC {129759, 129769}, -- 1FADF..1FAE9 {129776, 129784}, -- 1FAF0..1FAF8 {129792, 129938}, -- 1FB00..1FB92 {129940, 130041}, -- 1FB94..1FBF9 {917536, 917631}, -- E0020..E007F }


--[[--------------------------< S P E C I A L S _ T >---------------------------------------------------------

list of individual language-specific non-Latn and non-Zyyy codepoints; these codepoints commonly used in transliterations. This list is manually currated so is most likely incomplete.

keys to <specials_t> are decimal codepoints; other keys are language tags (always lowercase) of language transliterations that use these non-Latn codepoints.

]]

local specials_t = { [788] = { -- U+0314: COMBINING REVERSED COMMA ABOVE ["hy"] = true, -- Armenian }, [794] = { -- U+031A: COMBINING LEFT ANGLE ABOVE ["ltc"] = true, -- Middle Chinese; is this really IPA? }, [795] = { -- U+031B: COMBINING HORN ["th"] = true, -- Thai }, [806] = { -- U+0326: COMBINING COMMA BELOW ["ab"] = true, -- Abkhaz ["kca"] = true, -- Khanty ["xal"] = true, -- Kalmyk or Oirat }, [807] = { -- U+0327: COMBINING CEDILLA ["fa"] = true, -- Persian }, [809] = { -- U+0329: COMBINING VERTICAL LINE BELOW ["ab"] = true, -- Abkhaz ["sa"] = true, -- Sanskrit }, [815] = { -- U+032F: COMBINING INVERTED BREVE BELOW ["mong"] = true, -- Mongolian ["xsc"] = true, -- Scythian }, [818] = { -- U+0332: COMBINING LOW LINE ["ar"] = true, -- Arabic ["hbo"] = true, -- Ancient Hebrew ["he"] = true, -- Hebrew ["jpa"] = true, -- Jewish Palestinian Aramaic ["mdh"] = true, -- Maguindanaon ["otk"] = true, -- Old Turkish }, [831] = { -- U+033F: COMBINING DOUBLE OVERLINE ["mnp"] = true, -- Northern Min Chinese, Jian'ou dialect }, [855] = { -- U+0357: COMBINING RIGHT HALF RING ABOVE ["egy"] = true, -- Ancient Egyptian }, [863] = { -- U+035F: COMBINING DOUBLE MACRON BELOW ["am"] = true, -- Amharic ["ar"] = true, -- Arabic ["dv"] = true, -- Dhivehi, Divehi, or Maldivian ["fa"] = true, -- Persian ["hi"] = true, -- Hindi ["inc"] = true, -- Indic languages ["ur"] = true, -- Urdu }, [864] = { -- U+0360: COMBINING DOUBLE TILDE ["hi"] = true, -- Hindi }, [865] = { -- U+0361: COMBINING DOUBLE INVERTED BREVE ["be"] = true, -- Belarusian ["ltc"] = true, -- Middle Chinese; is this really IPA? ["ru"] = true, -- Russian ["rue"] = true, -- Rusyn ["sem"] = true, -- Semitic languages ["sit"] = true, -- Sino-Tibetan languages ["tt"] = true, -- Tatar }, [916] = { -- U+0394: GREEK CAPITAL LETTER DELTA ["xsc"] = true, -- Scythian }, [920] = { -- U+0398: GREEK CAPITAL LETTER THETA ["ae"] = true, -- Avestan }, [934] = { -- U+03A6: GREEK CAPITAL LETTER PHI ["xle"] = true, -- Lemnian }, [945] = { -- U+03B1: GREEK SMALL LETTER ALPHA ["apc"] = true, -- Levantine Arabic }, [946] = { -- U+03B2: GREEK SMALL LETTER BETA ["ae"] = true, -- Avestan ["gha"] = true, -- Ghadamès ["ougr"] = true, -- Old Uyghur ["sem"] = true, -- Semitic languages ["syc"] = true, -- Classical Syriac ["wuu"] = true, -- Shanghainese variety of Wu Chinese }, [947] = { -- U+03B3: GREEK SMALL LETTER GAMMA ["ae"] = true, -- Avestan ["ltc"] = true, -- Late Middle Chinese ["mn"] = true, -- Mongolian ["och"] = true, -- Old Chinese ["ougr"] = true, -- Old Uyghur ["pal"] = true, -- Middle Persian ["syc"] = true, -- Classical Syriac ["syr"] = true, -- Syriac ["xal"] = true, -- Kalmyk or Oirat ["xng"] = true, -- Middle Mongolian ["xsc"] = true, -- Scythian ["mong"] = true, -- Mongolian }, [948] = { -- U+03B4: GREEK SMALL LETTER DELTA ["ae"] = true, -- Avestan ["ougr"] = true, -- Old Uyghur ["sog"] = true, -- Sogdian ["sogd"] = true, -- Sogdian ["syc"] = true, -- Classical Syriac ["xpr"] = true, -- Parthian ["xsc"] = true, -- Scythian ["xsc-x-pontic"] = true, -- Pontic Scythian }, [952] = { -- U+03B8: GREEK SMALL LETTER THETA ["ae"] = true, -- Avestan ["ba"] = true, -- Bashkir ["cms"] = true, -- Messapic ["ett"] = true, -- Etruscan ["hur"] = true, -- Halkomelem ["ira"] = true, -- Iranian languages ["my"] = true, -- Burmese ["pal"] = true, -- Middle Persian (Pahlavi) ["peo"] = true, -- Old Persian ["sa"] = true, -- Sanskrit ["sem"] = true, -- Semitic languages ["syc"] = true, -- Classical Syriac ["syr"] = true, -- Syriac ["xpg"] = true, -- Phrygian ["xpr"] = true, -- Parthian ["xsc"] = true, -- Scythian }, [955] = { -- U+03BB: GREEK SMALL LETTER LAMDA ["xcr"] = true, -- Carian ["xld"] = true, -- Lydian }, [963] = { -- U+03C3: GREEK SMALL LETTER SIGMA ["ett"] = true, -- Etruscan }, [964] = { -- U+03C4: GREEK SMALL LETTER TAU ["xld"] = true, -- Lydian }, [966] = { -- U+03C6: GREEK SMALL LETTER PHI ["ett"] = true, -- Etruscan }, [967] = { -- U+03C7: GREEK SMALL LETTER CHI ["ett"] = true, -- Etruscan ["gem"] = true, -- Germanic languages ["kbd"] = true, -- Kabardian ["ltc"] = true, -- Late Middle Chinese ["och"] = true, -- Old Chinese ["xlc"] = true, -- Lycian ["xle"] = true, -- Lemnian }, [968] = { -- U+03C8: GREEK SMALL LETTER PSI ["ett"] = true, -- Etruscan }, [977] = { -- U+03D1: GREEK THETA SYMBOL ["ae"] = true, -- Avestan ["xme"] = true, -- Median ["xsc"] = true, -- Scythian ["xsc-x-pontic"] = true, -- Pontic Scythian }, [1098] = { -- U+044A: CYRILLIC SMALL LETTER HARD SIGN ["ady"] = true, -- Adyghe ["cu"] = true, -- Church Slavic ["zls"] = true, -- South Slavic languages }, [1100] = { -- U+044C: CYRILLIC SMALL LETTER SOFT SIGN ["az"] = true, -- Azerbaijani ["cu"] = true, -- Church Slavonic ["dng"] = true, -- Dungan ["ru"] = true, -- Russian ["tt"] = true, -- Tatar ["tyv"] = true, -- Tuvinian }, [1278] = { -- U+04FE: CYRILLIC CAPITAL LETTER HA WITH STROKE ["av"] = true, -- Avar }, [1279] = { -- U+04FF: CYRILLIC SMALL LETTER HA WITH STROKE ["av"] = true, -- Avar }, [8113] = { -- U+1FB1: GREEK SMALL LETTER ALPHA WITH MACRON ["apc"] = true, -- Levantine Arabic }, [8190] = { -- U+1FFE: GREEK DASIA ["ar"] = true, -- Arabic (Ayin) ["xcl"] = true, -- Classical Armenian }, [19978] = { -- U+4E0A: [CJK Unified Ideographs] ["wuu"] = true, -- Wu Chinese tone marker }, [20837] = { -- U+5165: [CJK Unified Ideographs] ["wuu"] = true, -- Wu Chinese tone marker }, [21435] = { -- U+53BB: [CJK Unified Ideographs] ["wuu"] = true, -- Wu Chinese tone marker }, [24179] = { -- U+5E73: [CJK Unified Ideographs] ["wuu"] = true, -- Wu Chinese tone marker }, [38451] = { -- U+9633: [CJK Unified Ideographs] (Yang) ["wuu"] = true, -- Suzhou dialect of Wu Chinese tone marker --Suzhou dialect#Tones }, [38452] = { -- U+9634: [CJK Unified Ideographs] (Yin) ["wuu"] = true, -- Suzhou dialect of Wu Chinese tone marker --Suzhou dialect#Tones }, [65056] = { -- U+FE20: COMBINING LIGATURE LEFT HALF ["ru"] = true, -- Russian }, [65057] = { -- U+FE21: COMBINING LIGATURE RIGHT HALF ["ru"] = true, -- Russian }, }


--[[--------------------------< E X P O R T S >--------------------------------------------------------------- ]]

return { ranges_t = ranges_t, singles_t = singles_t, specials_t = specials_t,

sizeof_ranges_t = #ranges_t, }