言語処理100本ノック 第1章: 準備運動 06
06. 集合
"paraparaparadise"と"paragraph"に含まれる文字bi-gramの集合を,それぞれ, XとYとして求め,XとYの和集合,積集合,差集合を求めよ.さらに,'se'というbi-gramがXおよびYに含まれるかどうかを調べよ.
java
package net.vg4; import java.util.*; class NGram { public List<List<String>> make(List<String> list, int n) { ArrayList<List<String>> alistlist = new ArrayList<>(); for (int from = 0; from < list.size(); from++) { int to = from + n; if (to > list.size()) { to = list.size(); } List<String> listr = list.subList(from, to); alistlist.add(listr); } return alistlist; } public List<List<String>> make(String str, int n) { return this.make(Arrays.asList(str.split("")), n); } } public class Main { public static void main(String[] args) { String sample1 = "paraparaparadise"; String sample2 = "paragraph"; NGram ngram = new NGram(); List<List<String>> res1 = ngram.make(sample1, 2); Set<List<String>> xOrg = new HashSet<>(res1); List<List<String>> res2 = ngram.make(sample2, 2); Set<List<String>> y = new HashSet<>(res2); // 和集合 { List<List<String>> x = new ArrayList<List<String>>(xOrg); x.addAll(y); System.out.println(x); } // 積集合 { List<List<String>> x = new ArrayList<List<String>>(xOrg); x.retainAll(y); System.out.println(x); } // 差集合 { List<List<String>> x = new ArrayList<List<String>>(xOrg); x.removeAll(y); System.out.println(x); } List<String> seed = Arrays.asList("s", "e"); if (xOrg.contains(seed)) { System.out.println("including in x"); } if (y.contains(seed)) { System.out.println("including in y"); } } }